xref: /openbmc/linux/fs/smb/client/file.c (revision cdd7870a)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39 
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45 	struct address_space *mapping = inode->i_mapping;
46 	struct folio *folio;
47 	pgoff_t end;
48 
49 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50 
51 	rcu_read_lock();
52 
53 	end = (start + len - 1) / PAGE_SIZE;
54 	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 		if (xas_retry(&xas, folio))
56 			continue;
57 		xas_pause(&xas);
58 		rcu_read_unlock();
59 		folio_lock(folio);
60 		folio_clear_dirty_for_io(folio);
61 		folio_unlock(folio);
62 		rcu_read_lock();
63 	}
64 
65 	rcu_read_unlock();
66 }
67 
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73 	struct address_space *mapping = inode->i_mapping;
74 	struct folio *folio;
75 	pgoff_t end;
76 
77 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78 
79 	if (!len)
80 		return;
81 
82 	rcu_read_lock();
83 
84 	end = (start + len - 1) / PAGE_SIZE;
85 	xas_for_each(&xas, folio, end) {
86 		if (xas_retry(&xas, folio))
87 			continue;
88 		if (!folio_test_writeback(folio)) {
89 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 				  len, start, folio->index, end);
91 			continue;
92 		}
93 
94 		folio_detach_private(folio);
95 		folio_end_writeback(folio);
96 	}
97 
98 	rcu_read_unlock();
99 }
100 
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106 	struct address_space *mapping = inode->i_mapping;
107 	struct folio *folio;
108 	pgoff_t end;
109 
110 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111 
112 	if (!len)
113 		return;
114 
115 	rcu_read_lock();
116 
117 	end = (start + len - 1) / PAGE_SIZE;
118 	xas_for_each(&xas, folio, end) {
119 		if (xas_retry(&xas, folio))
120 			continue;
121 		if (!folio_test_writeback(folio)) {
122 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 				  len, start, folio->index, end);
124 			continue;
125 		}
126 
127 		folio_set_error(folio);
128 		folio_end_writeback(folio);
129 	}
130 
131 	rcu_read_unlock();
132 }
133 
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139 	struct address_space *mapping = inode->i_mapping;
140 	struct folio *folio;
141 	pgoff_t end;
142 
143 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144 
145 	if (!len)
146 		return;
147 
148 	rcu_read_lock();
149 
150 	end = (start + len - 1) / PAGE_SIZE;
151 	xas_for_each(&xas, folio, end) {
152 		if (!folio_test_writeback(folio)) {
153 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 				  len, start, folio->index, end);
155 			continue;
156 		}
157 
158 		filemap_dirty_folio(folio->mapping, folio);
159 		folio_end_writeback(folio);
160 	}
161 
162 	rcu_read_unlock();
163 }
164 
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172 	struct cifsFileInfo *open_file = NULL;
173 	struct list_head *tmp;
174 	struct list_head *tmp1;
175 
176 	/* only send once per connect */
177 	spin_lock(&tcon->tc_lock);
178 	if (tcon->need_reconnect)
179 		tcon->status = TID_NEED_RECON;
180 
181 	if (tcon->status != TID_NEED_RECON) {
182 		spin_unlock(&tcon->tc_lock);
183 		return;
184 	}
185 	tcon->status = TID_IN_FILES_INVALIDATE;
186 	spin_unlock(&tcon->tc_lock);
187 
188 	/* list all files open on tree connection and mark them invalid */
189 	spin_lock(&tcon->open_file_lock);
190 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192 		open_file->invalidHandle = true;
193 		open_file->oplock_break_cancelled = true;
194 	}
195 	spin_unlock(&tcon->open_file_lock);
196 
197 	invalidate_all_cached_dirs(tcon);
198 	spin_lock(&tcon->tc_lock);
199 	if (tcon->status == TID_IN_FILES_INVALIDATE)
200 		tcon->status = TID_NEED_TCON;
201 	spin_unlock(&tcon->tc_lock);
202 
203 	/*
204 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205 	 * to this tcon.
206 	 */
207 }
208 
209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
210 {
211 	if ((flags & O_ACCMODE) == O_RDONLY)
212 		return GENERIC_READ;
213 	else if ((flags & O_ACCMODE) == O_WRONLY)
214 		return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
215 	else if ((flags & O_ACCMODE) == O_RDWR) {
216 		/* GENERIC_ALL is too much permission to request
217 		   can cause unnecessary access denied on create */
218 		/* return GENERIC_ALL; */
219 		return (GENERIC_READ | GENERIC_WRITE);
220 	}
221 
222 	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223 		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224 		FILE_READ_DATA);
225 }
226 
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
229 {
230 	u32 posix_flags = 0;
231 
232 	if ((flags & O_ACCMODE) == O_RDONLY)
233 		posix_flags = SMB_O_RDONLY;
234 	else if ((flags & O_ACCMODE) == O_WRONLY)
235 		posix_flags = SMB_O_WRONLY;
236 	else if ((flags & O_ACCMODE) == O_RDWR)
237 		posix_flags = SMB_O_RDWR;
238 
239 	if (flags & O_CREAT) {
240 		posix_flags |= SMB_O_CREAT;
241 		if (flags & O_EXCL)
242 			posix_flags |= SMB_O_EXCL;
243 	} else if (flags & O_EXCL)
244 		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245 			 current->comm, current->tgid);
246 
247 	if (flags & O_TRUNC)
248 		posix_flags |= SMB_O_TRUNC;
249 	/* be safe and imply O_SYNC for O_DSYNC */
250 	if (flags & O_DSYNC)
251 		posix_flags |= SMB_O_SYNC;
252 	if (flags & O_DIRECTORY)
253 		posix_flags |= SMB_O_DIRECTORY;
254 	if (flags & O_NOFOLLOW)
255 		posix_flags |= SMB_O_NOFOLLOW;
256 	if (flags & O_DIRECT)
257 		posix_flags |= SMB_O_DIRECT;
258 
259 	return posix_flags;
260 }
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262 
263 static inline int cifs_get_disposition(unsigned int flags)
264 {
265 	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266 		return FILE_CREATE;
267 	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268 		return FILE_OVERWRITE_IF;
269 	else if ((flags & O_CREAT) == O_CREAT)
270 		return FILE_OPEN_IF;
271 	else if ((flags & O_TRUNC) == O_TRUNC)
272 		return FILE_OVERWRITE;
273 	else
274 		return FILE_OPEN;
275 }
276 
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279 			struct super_block *sb, int mode, unsigned int f_flags,
280 			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
281 {
282 	int rc;
283 	FILE_UNIX_BASIC_INFO *presp_data;
284 	__u32 posix_flags = 0;
285 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286 	struct cifs_fattr fattr;
287 	struct tcon_link *tlink;
288 	struct cifs_tcon *tcon;
289 
290 	cifs_dbg(FYI, "posix open %s\n", full_path);
291 
292 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293 	if (presp_data == NULL)
294 		return -ENOMEM;
295 
296 	tlink = cifs_sb_tlink(cifs_sb);
297 	if (IS_ERR(tlink)) {
298 		rc = PTR_ERR(tlink);
299 		goto posix_open_ret;
300 	}
301 
302 	tcon = tlink_tcon(tlink);
303 	mode &= ~current_umask();
304 
305 	posix_flags = cifs_posix_convert_flags(f_flags);
306 	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307 			     poplock, full_path, cifs_sb->local_nls,
308 			     cifs_remap(cifs_sb));
309 	cifs_put_tlink(tlink);
310 
311 	if (rc)
312 		goto posix_open_ret;
313 
314 	if (presp_data->Type == cpu_to_le32(-1))
315 		goto posix_open_ret; /* open ok, caller does qpathinfo */
316 
317 	if (!pinode)
318 		goto posix_open_ret; /* caller does not need info */
319 
320 	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321 
322 	/* get new inode and set it up */
323 	if (*pinode == NULL) {
324 		cifs_fill_uniqueid(sb, &fattr);
325 		*pinode = cifs_iget(sb, &fattr);
326 		if (!*pinode) {
327 			rc = -ENOMEM;
328 			goto posix_open_ret;
329 		}
330 	} else {
331 		cifs_revalidate_mapping(*pinode);
332 		rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333 	}
334 
335 posix_open_ret:
336 	kfree(presp_data);
337 	return rc;
338 }
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340 
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342 			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343 			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344 {
345 	int rc;
346 	int desired_access;
347 	int disposition;
348 	int create_options = CREATE_NOT_DIR;
349 	struct TCP_Server_Info *server = tcon->ses->server;
350 	struct cifs_open_parms oparms;
351 	int rdwr_for_fscache = 0;
352 
353 	if (!server->ops->open)
354 		return -ENOSYS;
355 
356 	/* If we're caching, we need to be able to fill in around partial writes. */
357 	if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
358 		rdwr_for_fscache = 1;
359 
360 	desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
361 
362 /*********************************************************************
363  *  open flag mapping table:
364  *
365  *	POSIX Flag            CIFS Disposition
366  *	----------            ----------------
367  *	O_CREAT               FILE_OPEN_IF
368  *	O_CREAT | O_EXCL      FILE_CREATE
369  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
370  *	O_TRUNC               FILE_OVERWRITE
371  *	none of the above     FILE_OPEN
372  *
373  *	Note that there is not a direct match between disposition
374  *	FILE_SUPERSEDE (ie create whether or not file exists although
375  *	O_CREAT | O_TRUNC is similar but truncates the existing
376  *	file rather than creating a new file as FILE_SUPERSEDE does
377  *	(which uses the attributes / metadata passed in on open call)
378  *?
379  *?  O_SYNC is a reasonable match to CIFS writethrough flag
380  *?  and the read write flags match reasonably.  O_LARGEFILE
381  *?  is irrelevant because largefile support is always used
382  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
383  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
384  *********************************************************************/
385 
386 	disposition = cifs_get_disposition(f_flags);
387 
388 	/* BB pass O_SYNC flag through on file attributes .. BB */
389 
390 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
391 	if (f_flags & O_SYNC)
392 		create_options |= CREATE_WRITE_THROUGH;
393 
394 	if (f_flags & O_DIRECT)
395 		create_options |= CREATE_NO_BUFFER;
396 
397 retry_open:
398 	oparms = (struct cifs_open_parms) {
399 		.tcon = tcon,
400 		.cifs_sb = cifs_sb,
401 		.desired_access = desired_access,
402 		.create_options = cifs_create_options(cifs_sb, create_options),
403 		.disposition = disposition,
404 		.path = full_path,
405 		.fid = fid,
406 	};
407 
408 	rc = server->ops->open(xid, &oparms, oplock, buf);
409 	if (rc) {
410 		if (rc == -EACCES && rdwr_for_fscache == 1) {
411 			desired_access = cifs_convert_flags(f_flags, 0);
412 			rdwr_for_fscache = 2;
413 			goto retry_open;
414 		}
415 		return rc;
416 	}
417 	if (rdwr_for_fscache == 2)
418 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
419 
420 	/* TODO: Add support for calling posix query info but with passing in fid */
421 	if (tcon->unix_ext)
422 		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
423 					      xid);
424 	else
425 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
426 					 xid, fid);
427 
428 	if (rc) {
429 		server->ops->close(xid, tcon, fid);
430 		if (rc == -ESTALE)
431 			rc = -EOPENSTALE;
432 	}
433 
434 	return rc;
435 }
436 
437 static bool
438 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
439 {
440 	struct cifs_fid_locks *cur;
441 	bool has_locks = false;
442 
443 	down_read(&cinode->lock_sem);
444 	list_for_each_entry(cur, &cinode->llist, llist) {
445 		if (!list_empty(&cur->locks)) {
446 			has_locks = true;
447 			break;
448 		}
449 	}
450 	up_read(&cinode->lock_sem);
451 	return has_locks;
452 }
453 
454 void
455 cifs_down_write(struct rw_semaphore *sem)
456 {
457 	while (!down_write_trylock(sem))
458 		msleep(10);
459 }
460 
461 static void cifsFileInfo_put_work(struct work_struct *work);
462 void serverclose_work(struct work_struct *work);
463 
464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
465 				       struct tcon_link *tlink, __u32 oplock,
466 				       const char *symlink_target)
467 {
468 	struct dentry *dentry = file_dentry(file);
469 	struct inode *inode = d_inode(dentry);
470 	struct cifsInodeInfo *cinode = CIFS_I(inode);
471 	struct cifsFileInfo *cfile;
472 	struct cifs_fid_locks *fdlocks;
473 	struct cifs_tcon *tcon = tlink_tcon(tlink);
474 	struct TCP_Server_Info *server = tcon->ses->server;
475 
476 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
477 	if (cfile == NULL)
478 		return cfile;
479 
480 	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
481 	if (!fdlocks) {
482 		kfree(cfile);
483 		return NULL;
484 	}
485 
486 	if (symlink_target) {
487 		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
488 		if (!cfile->symlink_target) {
489 			kfree(fdlocks);
490 			kfree(cfile);
491 			return NULL;
492 		}
493 	}
494 
495 	INIT_LIST_HEAD(&fdlocks->locks);
496 	fdlocks->cfile = cfile;
497 	cfile->llist = fdlocks;
498 
499 	cfile->count = 1;
500 	cfile->pid = current->tgid;
501 	cfile->uid = current_fsuid();
502 	cfile->dentry = dget(dentry);
503 	cfile->f_flags = file->f_flags;
504 	cfile->invalidHandle = false;
505 	cfile->deferred_close_scheduled = false;
506 	cfile->tlink = cifs_get_tlink(tlink);
507 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
508 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
509 	INIT_WORK(&cfile->serverclose, serverclose_work);
510 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
511 	mutex_init(&cfile->fh_mutex);
512 	spin_lock_init(&cfile->file_info_lock);
513 
514 	cifs_sb_active(inode->i_sb);
515 
516 	/*
517 	 * If the server returned a read oplock and we have mandatory brlocks,
518 	 * set oplock level to None.
519 	 */
520 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
521 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
522 		oplock = 0;
523 	}
524 
525 	cifs_down_write(&cinode->lock_sem);
526 	list_add(&fdlocks->llist, &cinode->llist);
527 	up_write(&cinode->lock_sem);
528 
529 	spin_lock(&tcon->open_file_lock);
530 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
531 		oplock = fid->pending_open->oplock;
532 	list_del(&fid->pending_open->olist);
533 
534 	fid->purge_cache = false;
535 	server->ops->set_fid(cfile, fid, oplock);
536 
537 	list_add(&cfile->tlist, &tcon->openFileList);
538 	atomic_inc(&tcon->num_local_opens);
539 
540 	/* if readable file instance put first in list*/
541 	spin_lock(&cinode->open_file_lock);
542 	if (file->f_mode & FMODE_READ)
543 		list_add(&cfile->flist, &cinode->openFileList);
544 	else
545 		list_add_tail(&cfile->flist, &cinode->openFileList);
546 	spin_unlock(&cinode->open_file_lock);
547 	spin_unlock(&tcon->open_file_lock);
548 
549 	if (fid->purge_cache)
550 		cifs_zap_mapping(inode);
551 
552 	file->private_data = cfile;
553 	return cfile;
554 }
555 
556 struct cifsFileInfo *
557 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
558 {
559 	spin_lock(&cifs_file->file_info_lock);
560 	cifsFileInfo_get_locked(cifs_file);
561 	spin_unlock(&cifs_file->file_info_lock);
562 	return cifs_file;
563 }
564 
565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
566 {
567 	struct inode *inode = d_inode(cifs_file->dentry);
568 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
569 	struct cifsLockInfo *li, *tmp;
570 	struct super_block *sb = inode->i_sb;
571 
572 	/*
573 	 * Delete any outstanding lock records. We'll lose them when the file
574 	 * is closed anyway.
575 	 */
576 	cifs_down_write(&cifsi->lock_sem);
577 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
578 		list_del(&li->llist);
579 		cifs_del_lock_waiters(li);
580 		kfree(li);
581 	}
582 	list_del(&cifs_file->llist->llist);
583 	kfree(cifs_file->llist);
584 	up_write(&cifsi->lock_sem);
585 
586 	cifs_put_tlink(cifs_file->tlink);
587 	dput(cifs_file->dentry);
588 	cifs_sb_deactive(sb);
589 	kfree(cifs_file->symlink_target);
590 	kfree(cifs_file);
591 }
592 
593 static void cifsFileInfo_put_work(struct work_struct *work)
594 {
595 	struct cifsFileInfo *cifs_file = container_of(work,
596 			struct cifsFileInfo, put);
597 
598 	cifsFileInfo_put_final(cifs_file);
599 }
600 
601 void serverclose_work(struct work_struct *work)
602 {
603 	struct cifsFileInfo *cifs_file = container_of(work,
604 			struct cifsFileInfo, serverclose);
605 
606 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
607 
608 	struct TCP_Server_Info *server = tcon->ses->server;
609 	int rc = 0;
610 	int retries = 0;
611 	int MAX_RETRIES = 4;
612 
613 	do {
614 		if (server->ops->close_getattr)
615 			rc = server->ops->close_getattr(0, tcon, cifs_file);
616 		else if (server->ops->close)
617 			rc = server->ops->close(0, tcon, &cifs_file->fid);
618 
619 		if (rc == -EBUSY || rc == -EAGAIN) {
620 			retries++;
621 			msleep(250);
622 		}
623 	} while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
624 	);
625 
626 	if (retries == MAX_RETRIES)
627 		pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
628 
629 	if (cifs_file->offload)
630 		queue_work(fileinfo_put_wq, &cifs_file->put);
631 	else
632 		cifsFileInfo_put_final(cifs_file);
633 }
634 
635 /**
636  * cifsFileInfo_put - release a reference of file priv data
637  *
638  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
639  *
640  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
641  */
642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
643 {
644 	_cifsFileInfo_put(cifs_file, true, true);
645 }
646 
647 /**
648  * _cifsFileInfo_put - release a reference of file priv data
649  *
650  * This may involve closing the filehandle @cifs_file out on the
651  * server. Must be called without holding tcon->open_file_lock,
652  * cinode->open_file_lock and cifs_file->file_info_lock.
653  *
654  * If @wait_for_oplock_handler is true and we are releasing the last
655  * reference, wait for any running oplock break handler of the file
656  * and cancel any pending one.
657  *
658  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
659  * @wait_oplock_handler: must be false if called from oplock_break_handler
660  * @offload:	not offloaded on close and oplock breaks
661  *
662  */
663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
664 		       bool wait_oplock_handler, bool offload)
665 {
666 	struct inode *inode = d_inode(cifs_file->dentry);
667 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
668 	struct TCP_Server_Info *server = tcon->ses->server;
669 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
670 	struct super_block *sb = inode->i_sb;
671 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
672 	struct cifs_fid fid = {};
673 	struct cifs_pending_open open;
674 	bool oplock_break_cancelled;
675 	bool serverclose_offloaded = false;
676 
677 	spin_lock(&tcon->open_file_lock);
678 	spin_lock(&cifsi->open_file_lock);
679 	spin_lock(&cifs_file->file_info_lock);
680 
681 	cifs_file->offload = offload;
682 	if (--cifs_file->count > 0) {
683 		spin_unlock(&cifs_file->file_info_lock);
684 		spin_unlock(&cifsi->open_file_lock);
685 		spin_unlock(&tcon->open_file_lock);
686 		return;
687 	}
688 	spin_unlock(&cifs_file->file_info_lock);
689 
690 	if (server->ops->get_lease_key)
691 		server->ops->get_lease_key(inode, &fid);
692 
693 	/* store open in pending opens to make sure we don't miss lease break */
694 	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
695 
696 	/* remove it from the lists */
697 	list_del(&cifs_file->flist);
698 	list_del(&cifs_file->tlist);
699 	atomic_dec(&tcon->num_local_opens);
700 
701 	if (list_empty(&cifsi->openFileList)) {
702 		cifs_dbg(FYI, "closing last open instance for inode %p\n",
703 			 d_inode(cifs_file->dentry));
704 		/*
705 		 * In strict cache mode we need invalidate mapping on the last
706 		 * close  because it may cause a error when we open this file
707 		 * again and get at least level II oplock.
708 		 */
709 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
710 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
711 		cifs_set_oplock_level(cifsi, 0);
712 	}
713 
714 	spin_unlock(&cifsi->open_file_lock);
715 	spin_unlock(&tcon->open_file_lock);
716 
717 	oplock_break_cancelled = wait_oplock_handler ?
718 		cancel_work_sync(&cifs_file->oplock_break) : false;
719 
720 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
721 		struct TCP_Server_Info *server = tcon->ses->server;
722 		unsigned int xid;
723 		int rc = 0;
724 
725 		xid = get_xid();
726 		if (server->ops->close_getattr)
727 			rc = server->ops->close_getattr(xid, tcon, cifs_file);
728 		else if (server->ops->close)
729 			rc = server->ops->close(xid, tcon, &cifs_file->fid);
730 		_free_xid(xid);
731 
732 		if (rc == -EBUSY || rc == -EAGAIN) {
733 			// Server close failed, hence offloading it as an async op
734 			queue_work(serverclose_wq, &cifs_file->serverclose);
735 			serverclose_offloaded = true;
736 		}
737 	}
738 
739 	if (oplock_break_cancelled)
740 		cifs_done_oplock_break(cifsi);
741 
742 	cifs_del_pending_open(&open);
743 
744 	// if serverclose has been offloaded to wq (on failure), it will
745 	// handle offloading put as well. If serverclose not offloaded,
746 	// we need to handle offloading put here.
747 	if (!serverclose_offloaded) {
748 		if (offload)
749 			queue_work(fileinfo_put_wq, &cifs_file->put);
750 		else
751 			cifsFileInfo_put_final(cifs_file);
752 	}
753 }
754 
755 int cifs_open(struct inode *inode, struct file *file)
756 
757 {
758 	int rc = -EACCES;
759 	unsigned int xid;
760 	__u32 oplock;
761 	struct cifs_sb_info *cifs_sb;
762 	struct TCP_Server_Info *server;
763 	struct cifs_tcon *tcon;
764 	struct tcon_link *tlink;
765 	struct cifsFileInfo *cfile = NULL;
766 	void *page;
767 	const char *full_path;
768 	bool posix_open_ok = false;
769 	struct cifs_fid fid = {};
770 	struct cifs_pending_open open;
771 	struct cifs_open_info_data data = {};
772 
773 	xid = get_xid();
774 
775 	cifs_sb = CIFS_SB(inode->i_sb);
776 	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
777 		free_xid(xid);
778 		return -EIO;
779 	}
780 
781 	tlink = cifs_sb_tlink(cifs_sb);
782 	if (IS_ERR(tlink)) {
783 		free_xid(xid);
784 		return PTR_ERR(tlink);
785 	}
786 	tcon = tlink_tcon(tlink);
787 	server = tcon->ses->server;
788 
789 	page = alloc_dentry_path();
790 	full_path = build_path_from_dentry(file_dentry(file), page);
791 	if (IS_ERR(full_path)) {
792 		rc = PTR_ERR(full_path);
793 		goto out;
794 	}
795 
796 	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
797 		 inode, file->f_flags, full_path);
798 
799 	if (file->f_flags & O_DIRECT &&
800 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
801 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
802 			file->f_op = &cifs_file_direct_nobrl_ops;
803 		else
804 			file->f_op = &cifs_file_direct_ops;
805 	}
806 
807 	/* Get the cached handle as SMB2 close is deferred */
808 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
809 	if (rc == 0) {
810 		if (file->f_flags == cfile->f_flags) {
811 			file->private_data = cfile;
812 			spin_lock(&CIFS_I(inode)->deferred_lock);
813 			cifs_del_deferred_close(cfile);
814 			spin_unlock(&CIFS_I(inode)->deferred_lock);
815 			goto use_cache;
816 		} else {
817 			_cifsFileInfo_put(cfile, true, false);
818 		}
819 	}
820 
821 	if (server->oplocks)
822 		oplock = REQ_OPLOCK;
823 	else
824 		oplock = 0;
825 
826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
827 	if (!tcon->broken_posix_open && tcon->unix_ext &&
828 	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
829 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
830 		/* can not refresh inode info since size could be stale */
831 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
832 				cifs_sb->ctx->file_mode /* ignored */,
833 				file->f_flags, &oplock, &fid.netfid, xid);
834 		if (rc == 0) {
835 			cifs_dbg(FYI, "posix open succeeded\n");
836 			posix_open_ok = true;
837 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
838 			if (tcon->ses->serverNOS)
839 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
840 					 tcon->ses->ip_addr,
841 					 tcon->ses->serverNOS);
842 			tcon->broken_posix_open = true;
843 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
844 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
845 			goto out;
846 		/*
847 		 * Else fallthrough to retry open the old way on network i/o
848 		 * or DFS errors.
849 		 */
850 	}
851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
852 
853 	if (server->ops->get_lease_key)
854 		server->ops->get_lease_key(inode, &fid);
855 
856 	cifs_add_pending_open(&fid, tlink, &open);
857 
858 	if (!posix_open_ok) {
859 		if (server->ops->get_lease_key)
860 			server->ops->get_lease_key(inode, &fid);
861 
862 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
863 				  xid, &data);
864 		if (rc) {
865 			cifs_del_pending_open(&open);
866 			goto out;
867 		}
868 	}
869 
870 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
871 	if (cfile == NULL) {
872 		if (server->ops->close)
873 			server->ops->close(xid, tcon, &fid);
874 		cifs_del_pending_open(&open);
875 		rc = -ENOMEM;
876 		goto out;
877 	}
878 
879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
881 		/*
882 		 * Time to set mode which we can not set earlier due to
883 		 * problems creating new read-only files.
884 		 */
885 		struct cifs_unix_set_info_args args = {
886 			.mode	= inode->i_mode,
887 			.uid	= INVALID_UID, /* no change */
888 			.gid	= INVALID_GID, /* no change */
889 			.ctime	= NO_CHANGE_64,
890 			.atime	= NO_CHANGE_64,
891 			.mtime	= NO_CHANGE_64,
892 			.device	= 0,
893 		};
894 		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
895 				       cfile->pid);
896 	}
897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
898 
899 use_cache:
900 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
901 			   file->f_mode & FMODE_WRITE);
902 	if (!(file->f_flags & O_DIRECT))
903 		goto out;
904 	if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
905 		goto out;
906 	cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
907 
908 out:
909 	free_dentry_path(page);
910 	free_xid(xid);
911 	cifs_put_tlink(tlink);
912 	cifs_free_open_info(&data);
913 	return rc;
914 }
915 
916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
919 
920 /*
921  * Try to reacquire byte range locks that were released when session
922  * to server was lost.
923  */
924 static int
925 cifs_relock_file(struct cifsFileInfo *cfile)
926 {
927 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
928 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
929 	int rc = 0;
930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
931 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
933 
934 	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
935 	if (cinode->can_cache_brlcks) {
936 		/* can cache locks - no need to relock */
937 		up_read(&cinode->lock_sem);
938 		return rc;
939 	}
940 
941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
942 	if (cap_unix(tcon->ses) &&
943 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
944 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
945 		rc = cifs_push_posix_locks(cfile);
946 	else
947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
948 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
949 
950 	up_read(&cinode->lock_sem);
951 	return rc;
952 }
953 
954 static int
955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
956 {
957 	int rc = -EACCES;
958 	unsigned int xid;
959 	__u32 oplock;
960 	struct cifs_sb_info *cifs_sb;
961 	struct cifs_tcon *tcon;
962 	struct TCP_Server_Info *server;
963 	struct cifsInodeInfo *cinode;
964 	struct inode *inode;
965 	void *page;
966 	const char *full_path;
967 	int desired_access;
968 	int disposition = FILE_OPEN;
969 	int create_options = CREATE_NOT_DIR;
970 	struct cifs_open_parms oparms;
971 	int rdwr_for_fscache = 0;
972 
973 	xid = get_xid();
974 	mutex_lock(&cfile->fh_mutex);
975 	if (!cfile->invalidHandle) {
976 		mutex_unlock(&cfile->fh_mutex);
977 		free_xid(xid);
978 		return 0;
979 	}
980 
981 	inode = d_inode(cfile->dentry);
982 	cifs_sb = CIFS_SB(inode->i_sb);
983 	tcon = tlink_tcon(cfile->tlink);
984 	server = tcon->ses->server;
985 
986 	/*
987 	 * Can not grab rename sem here because various ops, including those
988 	 * that already have the rename sem can end up causing writepage to get
989 	 * called and if the server was down that means we end up here, and we
990 	 * can never tell if the caller already has the rename_sem.
991 	 */
992 	page = alloc_dentry_path();
993 	full_path = build_path_from_dentry(cfile->dentry, page);
994 	if (IS_ERR(full_path)) {
995 		mutex_unlock(&cfile->fh_mutex);
996 		free_dentry_path(page);
997 		free_xid(xid);
998 		return PTR_ERR(full_path);
999 	}
1000 
1001 	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
1002 		 inode, cfile->f_flags, full_path);
1003 
1004 	if (tcon->ses->server->oplocks)
1005 		oplock = REQ_OPLOCK;
1006 	else
1007 		oplock = 0;
1008 
1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1010 	if (tcon->unix_ext && cap_unix(tcon->ses) &&
1011 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
1012 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
1013 		/*
1014 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
1015 		 * original open. Must mask them off for a reopen.
1016 		 */
1017 		unsigned int oflags = cfile->f_flags &
1018 						~(O_CREAT | O_EXCL | O_TRUNC);
1019 
1020 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
1021 				     cifs_sb->ctx->file_mode /* ignored */,
1022 				     oflags, &oplock, &cfile->fid.netfid, xid);
1023 		if (rc == 0) {
1024 			cifs_dbg(FYI, "posix reopen succeeded\n");
1025 			oparms.reconnect = true;
1026 			goto reopen_success;
1027 		}
1028 		/*
1029 		 * fallthrough to retry open the old way on errors, especially
1030 		 * in the reconnect path it is important to retry hard
1031 		 */
1032 	}
1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1034 
1035 	/* If we're caching, we need to be able to fill in around partial writes. */
1036 	if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
1037 		rdwr_for_fscache = 1;
1038 
1039 	desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
1040 
1041 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
1042 	if (cfile->f_flags & O_SYNC)
1043 		create_options |= CREATE_WRITE_THROUGH;
1044 
1045 	if (cfile->f_flags & O_DIRECT)
1046 		create_options |= CREATE_NO_BUFFER;
1047 
1048 	if (server->ops->get_lease_key)
1049 		server->ops->get_lease_key(inode, &cfile->fid);
1050 
1051 retry_open:
1052 	oparms = (struct cifs_open_parms) {
1053 		.tcon = tcon,
1054 		.cifs_sb = cifs_sb,
1055 		.desired_access = desired_access,
1056 		.create_options = cifs_create_options(cifs_sb, create_options),
1057 		.disposition = disposition,
1058 		.path = full_path,
1059 		.fid = &cfile->fid,
1060 		.reconnect = true,
1061 	};
1062 
1063 	/*
1064 	 * Can not refresh inode by passing in file_info buf to be returned by
1065 	 * ops->open and then calling get_inode_info with returned buf since
1066 	 * file might have write behind data that needs to be flushed and server
1067 	 * version of file size can be stale. If we knew for sure that inode was
1068 	 * not dirty locally we could do this.
1069 	 */
1070 	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1071 	if (rc == -ENOENT && oparms.reconnect == false) {
1072 		/* durable handle timeout is expired - open the file again */
1073 		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1074 		/* indicate that we need to relock the file */
1075 		oparms.reconnect = true;
1076 	}
1077 	if (rc == -EACCES && rdwr_for_fscache == 1) {
1078 		desired_access = cifs_convert_flags(cfile->f_flags, 0);
1079 		rdwr_for_fscache = 2;
1080 		goto retry_open;
1081 	}
1082 
1083 	if (rc) {
1084 		mutex_unlock(&cfile->fh_mutex);
1085 		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1086 		cifs_dbg(FYI, "oplock: %d\n", oplock);
1087 		goto reopen_error_exit;
1088 	}
1089 
1090 	if (rdwr_for_fscache == 2)
1091 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
1092 
1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1094 reopen_success:
1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1096 	cfile->invalidHandle = false;
1097 	mutex_unlock(&cfile->fh_mutex);
1098 	cinode = CIFS_I(inode);
1099 
1100 	if (can_flush) {
1101 		rc = filemap_write_and_wait(inode->i_mapping);
1102 		if (!is_interrupt_error(rc))
1103 			mapping_set_error(inode->i_mapping, rc);
1104 
1105 		if (tcon->posix_extensions) {
1106 			rc = smb311_posix_get_inode_info(&inode, full_path,
1107 							 NULL, inode->i_sb, xid);
1108 		} else if (tcon->unix_ext) {
1109 			rc = cifs_get_inode_info_unix(&inode, full_path,
1110 						      inode->i_sb, xid);
1111 		} else {
1112 			rc = cifs_get_inode_info(&inode, full_path, NULL,
1113 						 inode->i_sb, xid, NULL);
1114 		}
1115 	}
1116 	/*
1117 	 * Else we are writing out data to server already and could deadlock if
1118 	 * we tried to flush data, and since we do not know if we have data that
1119 	 * would invalidate the current end of file on the server we can not go
1120 	 * to the server to get the new inode info.
1121 	 */
1122 
1123 	/*
1124 	 * If the server returned a read oplock and we have mandatory brlocks,
1125 	 * set oplock level to None.
1126 	 */
1127 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1128 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1129 		oplock = 0;
1130 	}
1131 
1132 	server->ops->set_fid(cfile, &cfile->fid, oplock);
1133 	if (oparms.reconnect)
1134 		cifs_relock_file(cfile);
1135 
1136 reopen_error_exit:
1137 	free_dentry_path(page);
1138 	free_xid(xid);
1139 	return rc;
1140 }
1141 
1142 void smb2_deferred_work_close(struct work_struct *work)
1143 {
1144 	struct cifsFileInfo *cfile = container_of(work,
1145 			struct cifsFileInfo, deferred.work);
1146 
1147 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1148 	cifs_del_deferred_close(cfile);
1149 	cfile->deferred_close_scheduled = false;
1150 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1151 	_cifsFileInfo_put(cfile, true, false);
1152 }
1153 
1154 int cifs_close(struct inode *inode, struct file *file)
1155 {
1156 	struct cifsFileInfo *cfile;
1157 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1158 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1159 	struct cifs_deferred_close *dclose;
1160 
1161 	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1162 
1163 	if (file->private_data != NULL) {
1164 		cfile = file->private_data;
1165 		file->private_data = NULL;
1166 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1167 		if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1168 		    && cinode->lease_granted &&
1169 		    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1170 		    dclose) {
1171 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1172 				inode_set_mtime_to_ts(inode,
1173 						      inode_set_ctime_current(inode));
1174 			}
1175 			spin_lock(&cinode->deferred_lock);
1176 			cifs_add_deferred_close(cfile, dclose);
1177 			if (cfile->deferred_close_scheduled &&
1178 			    delayed_work_pending(&cfile->deferred)) {
1179 				/*
1180 				 * If there is no pending work, mod_delayed_work queues new work.
1181 				 * So, Increase the ref count to avoid use-after-free.
1182 				 */
1183 				if (!mod_delayed_work(deferredclose_wq,
1184 						&cfile->deferred, cifs_sb->ctx->closetimeo))
1185 					cifsFileInfo_get(cfile);
1186 			} else {
1187 				/* Deferred close for files */
1188 				queue_delayed_work(deferredclose_wq,
1189 						&cfile->deferred, cifs_sb->ctx->closetimeo);
1190 				cfile->deferred_close_scheduled = true;
1191 				spin_unlock(&cinode->deferred_lock);
1192 				return 0;
1193 			}
1194 			spin_unlock(&cinode->deferred_lock);
1195 			_cifsFileInfo_put(cfile, true, false);
1196 		} else {
1197 			_cifsFileInfo_put(cfile, true, false);
1198 			kfree(dclose);
1199 		}
1200 	}
1201 
1202 	/* return code from the ->release op is always ignored */
1203 	return 0;
1204 }
1205 
1206 void
1207 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1208 {
1209 	struct cifsFileInfo *open_file, *tmp;
1210 	struct list_head tmp_list;
1211 
1212 	if (!tcon->use_persistent || !tcon->need_reopen_files)
1213 		return;
1214 
1215 	tcon->need_reopen_files = false;
1216 
1217 	cifs_dbg(FYI, "Reopen persistent handles\n");
1218 	INIT_LIST_HEAD(&tmp_list);
1219 
1220 	/* list all files open on tree connection, reopen resilient handles  */
1221 	spin_lock(&tcon->open_file_lock);
1222 	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1223 		if (!open_file->invalidHandle)
1224 			continue;
1225 		cifsFileInfo_get(open_file);
1226 		list_add_tail(&open_file->rlist, &tmp_list);
1227 	}
1228 	spin_unlock(&tcon->open_file_lock);
1229 
1230 	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1231 		if (cifs_reopen_file(open_file, false /* do not flush */))
1232 			tcon->need_reopen_files = true;
1233 		list_del_init(&open_file->rlist);
1234 		cifsFileInfo_put(open_file);
1235 	}
1236 }
1237 
1238 int cifs_closedir(struct inode *inode, struct file *file)
1239 {
1240 	int rc = 0;
1241 	unsigned int xid;
1242 	struct cifsFileInfo *cfile = file->private_data;
1243 	struct cifs_tcon *tcon;
1244 	struct TCP_Server_Info *server;
1245 	char *buf;
1246 
1247 	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1248 
1249 	if (cfile == NULL)
1250 		return rc;
1251 
1252 	xid = get_xid();
1253 	tcon = tlink_tcon(cfile->tlink);
1254 	server = tcon->ses->server;
1255 
1256 	cifs_dbg(FYI, "Freeing private data in close dir\n");
1257 	spin_lock(&cfile->file_info_lock);
1258 	if (server->ops->dir_needs_close(cfile)) {
1259 		cfile->invalidHandle = true;
1260 		spin_unlock(&cfile->file_info_lock);
1261 		if (server->ops->close_dir)
1262 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1263 		else
1264 			rc = -ENOSYS;
1265 		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1266 		/* not much we can do if it fails anyway, ignore rc */
1267 		rc = 0;
1268 	} else
1269 		spin_unlock(&cfile->file_info_lock);
1270 
1271 	buf = cfile->srch_inf.ntwrk_buf_start;
1272 	if (buf) {
1273 		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1274 		cfile->srch_inf.ntwrk_buf_start = NULL;
1275 		if (cfile->srch_inf.smallBuf)
1276 			cifs_small_buf_release(buf);
1277 		else
1278 			cifs_buf_release(buf);
1279 	}
1280 
1281 	cifs_put_tlink(cfile->tlink);
1282 	kfree(file->private_data);
1283 	file->private_data = NULL;
1284 	/* BB can we lock the filestruct while this is going on? */
1285 	free_xid(xid);
1286 	return rc;
1287 }
1288 
1289 static struct cifsLockInfo *
1290 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1291 {
1292 	struct cifsLockInfo *lock =
1293 		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1294 	if (!lock)
1295 		return lock;
1296 	lock->offset = offset;
1297 	lock->length = length;
1298 	lock->type = type;
1299 	lock->pid = current->tgid;
1300 	lock->flags = flags;
1301 	INIT_LIST_HEAD(&lock->blist);
1302 	init_waitqueue_head(&lock->block_q);
1303 	return lock;
1304 }
1305 
1306 void
1307 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1308 {
1309 	struct cifsLockInfo *li, *tmp;
1310 	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1311 		list_del_init(&li->blist);
1312 		wake_up(&li->block_q);
1313 	}
1314 }
1315 
1316 #define CIFS_LOCK_OP	0
1317 #define CIFS_READ_OP	1
1318 #define CIFS_WRITE_OP	2
1319 
1320 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1321 static bool
1322 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1323 			    __u64 length, __u8 type, __u16 flags,
1324 			    struct cifsFileInfo *cfile,
1325 			    struct cifsLockInfo **conf_lock, int rw_check)
1326 {
1327 	struct cifsLockInfo *li;
1328 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1329 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1330 
1331 	list_for_each_entry(li, &fdlocks->locks, llist) {
1332 		if (offset + length <= li->offset ||
1333 		    offset >= li->offset + li->length)
1334 			continue;
1335 		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1336 		    server->ops->compare_fids(cfile, cur_cfile)) {
1337 			/* shared lock prevents write op through the same fid */
1338 			if (!(li->type & server->vals->shared_lock_type) ||
1339 			    rw_check != CIFS_WRITE_OP)
1340 				continue;
1341 		}
1342 		if ((type & server->vals->shared_lock_type) &&
1343 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1344 		     current->tgid == li->pid) || type == li->type))
1345 			continue;
1346 		if (rw_check == CIFS_LOCK_OP &&
1347 		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1348 		    server->ops->compare_fids(cfile, cur_cfile))
1349 			continue;
1350 		if (conf_lock)
1351 			*conf_lock = li;
1352 		return true;
1353 	}
1354 	return false;
1355 }
1356 
1357 bool
1358 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1359 			__u8 type, __u16 flags,
1360 			struct cifsLockInfo **conf_lock, int rw_check)
1361 {
1362 	bool rc = false;
1363 	struct cifs_fid_locks *cur;
1364 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1365 
1366 	list_for_each_entry(cur, &cinode->llist, llist) {
1367 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1368 						 flags, cfile, conf_lock,
1369 						 rw_check);
1370 		if (rc)
1371 			break;
1372 	}
1373 
1374 	return rc;
1375 }
1376 
1377 /*
1378  * Check if there is another lock that prevents us to set the lock (mandatory
1379  * style). If such a lock exists, update the flock structure with its
1380  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1381  * or leave it the same if we can't. Returns 0 if we don't need to request to
1382  * the server or 1 otherwise.
1383  */
1384 static int
1385 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1386 	       __u8 type, struct file_lock *flock)
1387 {
1388 	int rc = 0;
1389 	struct cifsLockInfo *conf_lock;
1390 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1391 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1392 	bool exist;
1393 
1394 	down_read(&cinode->lock_sem);
1395 
1396 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1397 					flock->fl_flags, &conf_lock,
1398 					CIFS_LOCK_OP);
1399 	if (exist) {
1400 		flock->fl_start = conf_lock->offset;
1401 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1402 		flock->fl_pid = conf_lock->pid;
1403 		if (conf_lock->type & server->vals->shared_lock_type)
1404 			flock->fl_type = F_RDLCK;
1405 		else
1406 			flock->fl_type = F_WRLCK;
1407 	} else if (!cinode->can_cache_brlcks)
1408 		rc = 1;
1409 	else
1410 		flock->fl_type = F_UNLCK;
1411 
1412 	up_read(&cinode->lock_sem);
1413 	return rc;
1414 }
1415 
1416 static void
1417 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1418 {
1419 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1420 	cifs_down_write(&cinode->lock_sem);
1421 	list_add_tail(&lock->llist, &cfile->llist->locks);
1422 	up_write(&cinode->lock_sem);
1423 }
1424 
1425 /*
1426  * Set the byte-range lock (mandatory style). Returns:
1427  * 1) 0, if we set the lock and don't need to request to the server;
1428  * 2) 1, if no locks prevent us but we need to request to the server;
1429  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1430  */
1431 static int
1432 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1433 		 bool wait)
1434 {
1435 	struct cifsLockInfo *conf_lock;
1436 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1437 	bool exist;
1438 	int rc = 0;
1439 
1440 try_again:
1441 	exist = false;
1442 	cifs_down_write(&cinode->lock_sem);
1443 
1444 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1445 					lock->type, lock->flags, &conf_lock,
1446 					CIFS_LOCK_OP);
1447 	if (!exist && cinode->can_cache_brlcks) {
1448 		list_add_tail(&lock->llist, &cfile->llist->locks);
1449 		up_write(&cinode->lock_sem);
1450 		return rc;
1451 	}
1452 
1453 	if (!exist)
1454 		rc = 1;
1455 	else if (!wait)
1456 		rc = -EACCES;
1457 	else {
1458 		list_add_tail(&lock->blist, &conf_lock->blist);
1459 		up_write(&cinode->lock_sem);
1460 		rc = wait_event_interruptible(lock->block_q,
1461 					(lock->blist.prev == &lock->blist) &&
1462 					(lock->blist.next == &lock->blist));
1463 		if (!rc)
1464 			goto try_again;
1465 		cifs_down_write(&cinode->lock_sem);
1466 		list_del_init(&lock->blist);
1467 	}
1468 
1469 	up_write(&cinode->lock_sem);
1470 	return rc;
1471 }
1472 
1473 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1474 /*
1475  * Check if there is another lock that prevents us to set the lock (posix
1476  * style). If such a lock exists, update the flock structure with its
1477  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1478  * or leave it the same if we can't. Returns 0 if we don't need to request to
1479  * the server or 1 otherwise.
1480  */
1481 static int
1482 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1483 {
1484 	int rc = 0;
1485 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1486 	unsigned char saved_type = flock->fl_type;
1487 
1488 	if ((flock->fl_flags & FL_POSIX) == 0)
1489 		return 1;
1490 
1491 	down_read(&cinode->lock_sem);
1492 	posix_test_lock(file, flock);
1493 
1494 	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1495 		flock->fl_type = saved_type;
1496 		rc = 1;
1497 	}
1498 
1499 	up_read(&cinode->lock_sem);
1500 	return rc;
1501 }
1502 
1503 /*
1504  * Set the byte-range lock (posix style). Returns:
1505  * 1) <0, if the error occurs while setting the lock;
1506  * 2) 0, if we set the lock and don't need to request to the server;
1507  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1508  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1509  */
1510 static int
1511 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1512 {
1513 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1514 	int rc = FILE_LOCK_DEFERRED + 1;
1515 
1516 	if ((flock->fl_flags & FL_POSIX) == 0)
1517 		return rc;
1518 
1519 	cifs_down_write(&cinode->lock_sem);
1520 	if (!cinode->can_cache_brlcks) {
1521 		up_write(&cinode->lock_sem);
1522 		return rc;
1523 	}
1524 
1525 	rc = posix_lock_file(file, flock, NULL);
1526 	up_write(&cinode->lock_sem);
1527 	return rc;
1528 }
1529 
1530 int
1531 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1532 {
1533 	unsigned int xid;
1534 	int rc = 0, stored_rc;
1535 	struct cifsLockInfo *li, *tmp;
1536 	struct cifs_tcon *tcon;
1537 	unsigned int num, max_num, max_buf;
1538 	LOCKING_ANDX_RANGE *buf, *cur;
1539 	static const int types[] = {
1540 		LOCKING_ANDX_LARGE_FILES,
1541 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1542 	};
1543 	int i;
1544 
1545 	xid = get_xid();
1546 	tcon = tlink_tcon(cfile->tlink);
1547 
1548 	/*
1549 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1550 	 * and check it before using.
1551 	 */
1552 	max_buf = tcon->ses->server->maxBuf;
1553 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1554 		free_xid(xid);
1555 		return -EINVAL;
1556 	}
1557 
1558 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1559 		     PAGE_SIZE);
1560 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1561 			PAGE_SIZE);
1562 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1563 						sizeof(LOCKING_ANDX_RANGE);
1564 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1565 	if (!buf) {
1566 		free_xid(xid);
1567 		return -ENOMEM;
1568 	}
1569 
1570 	for (i = 0; i < 2; i++) {
1571 		cur = buf;
1572 		num = 0;
1573 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1574 			if (li->type != types[i])
1575 				continue;
1576 			cur->Pid = cpu_to_le16(li->pid);
1577 			cur->LengthLow = cpu_to_le32((u32)li->length);
1578 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1579 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1580 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1581 			if (++num == max_num) {
1582 				stored_rc = cifs_lockv(xid, tcon,
1583 						       cfile->fid.netfid,
1584 						       (__u8)li->type, 0, num,
1585 						       buf);
1586 				if (stored_rc)
1587 					rc = stored_rc;
1588 				cur = buf;
1589 				num = 0;
1590 			} else
1591 				cur++;
1592 		}
1593 
1594 		if (num) {
1595 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1596 					       (__u8)types[i], 0, num, buf);
1597 			if (stored_rc)
1598 				rc = stored_rc;
1599 		}
1600 	}
1601 
1602 	kfree(buf);
1603 	free_xid(xid);
1604 	return rc;
1605 }
1606 
1607 static __u32
1608 hash_lockowner(fl_owner_t owner)
1609 {
1610 	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1611 }
1612 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1613 
1614 struct lock_to_push {
1615 	struct list_head llist;
1616 	__u64 offset;
1617 	__u64 length;
1618 	__u32 pid;
1619 	__u16 netfid;
1620 	__u8 type;
1621 };
1622 
1623 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1624 static int
1625 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1626 {
1627 	struct inode *inode = d_inode(cfile->dentry);
1628 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1629 	struct file_lock *flock;
1630 	struct file_lock_context *flctx = locks_inode_context(inode);
1631 	unsigned int count = 0, i;
1632 	int rc = 0, xid, type;
1633 	struct list_head locks_to_send, *el;
1634 	struct lock_to_push *lck, *tmp;
1635 	__u64 length;
1636 
1637 	xid = get_xid();
1638 
1639 	if (!flctx)
1640 		goto out;
1641 
1642 	spin_lock(&flctx->flc_lock);
1643 	list_for_each(el, &flctx->flc_posix) {
1644 		count++;
1645 	}
1646 	spin_unlock(&flctx->flc_lock);
1647 
1648 	INIT_LIST_HEAD(&locks_to_send);
1649 
1650 	/*
1651 	 * Allocating count locks is enough because no FL_POSIX locks can be
1652 	 * added to the list while we are holding cinode->lock_sem that
1653 	 * protects locking operations of this inode.
1654 	 */
1655 	for (i = 0; i < count; i++) {
1656 		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1657 		if (!lck) {
1658 			rc = -ENOMEM;
1659 			goto err_out;
1660 		}
1661 		list_add_tail(&lck->llist, &locks_to_send);
1662 	}
1663 
1664 	el = locks_to_send.next;
1665 	spin_lock(&flctx->flc_lock);
1666 	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1667 		if (el == &locks_to_send) {
1668 			/*
1669 			 * The list ended. We don't have enough allocated
1670 			 * structures - something is really wrong.
1671 			 */
1672 			cifs_dbg(VFS, "Can't push all brlocks!\n");
1673 			break;
1674 		}
1675 		length = cifs_flock_len(flock);
1676 		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1677 			type = CIFS_RDLCK;
1678 		else
1679 			type = CIFS_WRLCK;
1680 		lck = list_entry(el, struct lock_to_push, llist);
1681 		lck->pid = hash_lockowner(flock->fl_owner);
1682 		lck->netfid = cfile->fid.netfid;
1683 		lck->length = length;
1684 		lck->type = type;
1685 		lck->offset = flock->fl_start;
1686 	}
1687 	spin_unlock(&flctx->flc_lock);
1688 
1689 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1690 		int stored_rc;
1691 
1692 		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1693 					     lck->offset, lck->length, NULL,
1694 					     lck->type, 0);
1695 		if (stored_rc)
1696 			rc = stored_rc;
1697 		list_del(&lck->llist);
1698 		kfree(lck);
1699 	}
1700 
1701 out:
1702 	free_xid(xid);
1703 	return rc;
1704 err_out:
1705 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1706 		list_del(&lck->llist);
1707 		kfree(lck);
1708 	}
1709 	goto out;
1710 }
1711 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1712 
1713 static int
1714 cifs_push_locks(struct cifsFileInfo *cfile)
1715 {
1716 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1717 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1718 	int rc = 0;
1719 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1720 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1721 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1722 
1723 	/* we are going to update can_cache_brlcks here - need a write access */
1724 	cifs_down_write(&cinode->lock_sem);
1725 	if (!cinode->can_cache_brlcks) {
1726 		up_write(&cinode->lock_sem);
1727 		return rc;
1728 	}
1729 
1730 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1731 	if (cap_unix(tcon->ses) &&
1732 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1733 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1734 		rc = cifs_push_posix_locks(cfile);
1735 	else
1736 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1737 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1738 
1739 	cinode->can_cache_brlcks = false;
1740 	up_write(&cinode->lock_sem);
1741 	return rc;
1742 }
1743 
1744 static void
1745 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1746 		bool *wait_flag, struct TCP_Server_Info *server)
1747 {
1748 	if (flock->fl_flags & FL_POSIX)
1749 		cifs_dbg(FYI, "Posix\n");
1750 	if (flock->fl_flags & FL_FLOCK)
1751 		cifs_dbg(FYI, "Flock\n");
1752 	if (flock->fl_flags & FL_SLEEP) {
1753 		cifs_dbg(FYI, "Blocking lock\n");
1754 		*wait_flag = true;
1755 	}
1756 	if (flock->fl_flags & FL_ACCESS)
1757 		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1758 	if (flock->fl_flags & FL_LEASE)
1759 		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1760 	if (flock->fl_flags &
1761 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1762 	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1763 		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1764 
1765 	*type = server->vals->large_lock_type;
1766 	if (flock->fl_type == F_WRLCK) {
1767 		cifs_dbg(FYI, "F_WRLCK\n");
1768 		*type |= server->vals->exclusive_lock_type;
1769 		*lock = 1;
1770 	} else if (flock->fl_type == F_UNLCK) {
1771 		cifs_dbg(FYI, "F_UNLCK\n");
1772 		*type |= server->vals->unlock_lock_type;
1773 		*unlock = 1;
1774 		/* Check if unlock includes more than one lock range */
1775 	} else if (flock->fl_type == F_RDLCK) {
1776 		cifs_dbg(FYI, "F_RDLCK\n");
1777 		*type |= server->vals->shared_lock_type;
1778 		*lock = 1;
1779 	} else if (flock->fl_type == F_EXLCK) {
1780 		cifs_dbg(FYI, "F_EXLCK\n");
1781 		*type |= server->vals->exclusive_lock_type;
1782 		*lock = 1;
1783 	} else if (flock->fl_type == F_SHLCK) {
1784 		cifs_dbg(FYI, "F_SHLCK\n");
1785 		*type |= server->vals->shared_lock_type;
1786 		*lock = 1;
1787 	} else
1788 		cifs_dbg(FYI, "Unknown type of lock\n");
1789 }
1790 
1791 static int
1792 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1793 	   bool wait_flag, bool posix_lck, unsigned int xid)
1794 {
1795 	int rc = 0;
1796 	__u64 length = cifs_flock_len(flock);
1797 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1798 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1799 	struct TCP_Server_Info *server = tcon->ses->server;
1800 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1801 	__u16 netfid = cfile->fid.netfid;
1802 
1803 	if (posix_lck) {
1804 		int posix_lock_type;
1805 
1806 		rc = cifs_posix_lock_test(file, flock);
1807 		if (!rc)
1808 			return rc;
1809 
1810 		if (type & server->vals->shared_lock_type)
1811 			posix_lock_type = CIFS_RDLCK;
1812 		else
1813 			posix_lock_type = CIFS_WRLCK;
1814 		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1815 				      hash_lockowner(flock->fl_owner),
1816 				      flock->fl_start, length, flock,
1817 				      posix_lock_type, wait_flag);
1818 		return rc;
1819 	}
1820 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1821 
1822 	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1823 	if (!rc)
1824 		return rc;
1825 
1826 	/* BB we could chain these into one lock request BB */
1827 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1828 				    1, 0, false);
1829 	if (rc == 0) {
1830 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1831 					    type, 0, 1, false);
1832 		flock->fl_type = F_UNLCK;
1833 		if (rc != 0)
1834 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1835 				 rc);
1836 		return 0;
1837 	}
1838 
1839 	if (type & server->vals->shared_lock_type) {
1840 		flock->fl_type = F_WRLCK;
1841 		return 0;
1842 	}
1843 
1844 	type &= ~server->vals->exclusive_lock_type;
1845 
1846 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1847 				    type | server->vals->shared_lock_type,
1848 				    1, 0, false);
1849 	if (rc == 0) {
1850 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1851 			type | server->vals->shared_lock_type, 0, 1, false);
1852 		flock->fl_type = F_RDLCK;
1853 		if (rc != 0)
1854 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1855 				 rc);
1856 	} else
1857 		flock->fl_type = F_WRLCK;
1858 
1859 	return 0;
1860 }
1861 
1862 void
1863 cifs_move_llist(struct list_head *source, struct list_head *dest)
1864 {
1865 	struct list_head *li, *tmp;
1866 	list_for_each_safe(li, tmp, source)
1867 		list_move(li, dest);
1868 }
1869 
1870 void
1871 cifs_free_llist(struct list_head *llist)
1872 {
1873 	struct cifsLockInfo *li, *tmp;
1874 	list_for_each_entry_safe(li, tmp, llist, llist) {
1875 		cifs_del_lock_waiters(li);
1876 		list_del(&li->llist);
1877 		kfree(li);
1878 	}
1879 }
1880 
1881 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1882 int
1883 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1884 		  unsigned int xid)
1885 {
1886 	int rc = 0, stored_rc;
1887 	static const int types[] = {
1888 		LOCKING_ANDX_LARGE_FILES,
1889 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1890 	};
1891 	unsigned int i;
1892 	unsigned int max_num, num, max_buf;
1893 	LOCKING_ANDX_RANGE *buf, *cur;
1894 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1895 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1896 	struct cifsLockInfo *li, *tmp;
1897 	__u64 length = cifs_flock_len(flock);
1898 	struct list_head tmp_llist;
1899 
1900 	INIT_LIST_HEAD(&tmp_llist);
1901 
1902 	/*
1903 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1904 	 * and check it before using.
1905 	 */
1906 	max_buf = tcon->ses->server->maxBuf;
1907 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1908 		return -EINVAL;
1909 
1910 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1911 		     PAGE_SIZE);
1912 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1913 			PAGE_SIZE);
1914 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1915 						sizeof(LOCKING_ANDX_RANGE);
1916 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1917 	if (!buf)
1918 		return -ENOMEM;
1919 
1920 	cifs_down_write(&cinode->lock_sem);
1921 	for (i = 0; i < 2; i++) {
1922 		cur = buf;
1923 		num = 0;
1924 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1925 			if (flock->fl_start > li->offset ||
1926 			    (flock->fl_start + length) <
1927 			    (li->offset + li->length))
1928 				continue;
1929 			if (current->tgid != li->pid)
1930 				continue;
1931 			if (types[i] != li->type)
1932 				continue;
1933 			if (cinode->can_cache_brlcks) {
1934 				/*
1935 				 * We can cache brlock requests - simply remove
1936 				 * a lock from the file's list.
1937 				 */
1938 				list_del(&li->llist);
1939 				cifs_del_lock_waiters(li);
1940 				kfree(li);
1941 				continue;
1942 			}
1943 			cur->Pid = cpu_to_le16(li->pid);
1944 			cur->LengthLow = cpu_to_le32((u32)li->length);
1945 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1946 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1947 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1948 			/*
1949 			 * We need to save a lock here to let us add it again to
1950 			 * the file's list if the unlock range request fails on
1951 			 * the server.
1952 			 */
1953 			list_move(&li->llist, &tmp_llist);
1954 			if (++num == max_num) {
1955 				stored_rc = cifs_lockv(xid, tcon,
1956 						       cfile->fid.netfid,
1957 						       li->type, num, 0, buf);
1958 				if (stored_rc) {
1959 					/*
1960 					 * We failed on the unlock range
1961 					 * request - add all locks from the tmp
1962 					 * list to the head of the file's list.
1963 					 */
1964 					cifs_move_llist(&tmp_llist,
1965 							&cfile->llist->locks);
1966 					rc = stored_rc;
1967 				} else
1968 					/*
1969 					 * The unlock range request succeed -
1970 					 * free the tmp list.
1971 					 */
1972 					cifs_free_llist(&tmp_llist);
1973 				cur = buf;
1974 				num = 0;
1975 			} else
1976 				cur++;
1977 		}
1978 		if (num) {
1979 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1980 					       types[i], num, 0, buf);
1981 			if (stored_rc) {
1982 				cifs_move_llist(&tmp_llist,
1983 						&cfile->llist->locks);
1984 				rc = stored_rc;
1985 			} else
1986 				cifs_free_llist(&tmp_llist);
1987 		}
1988 	}
1989 
1990 	up_write(&cinode->lock_sem);
1991 	kfree(buf);
1992 	return rc;
1993 }
1994 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1995 
1996 static int
1997 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1998 	   bool wait_flag, bool posix_lck, int lock, int unlock,
1999 	   unsigned int xid)
2000 {
2001 	int rc = 0;
2002 	__u64 length = cifs_flock_len(flock);
2003 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2004 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2005 	struct TCP_Server_Info *server = tcon->ses->server;
2006 	struct inode *inode = d_inode(cfile->dentry);
2007 
2008 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
2009 	if (posix_lck) {
2010 		int posix_lock_type;
2011 
2012 		rc = cifs_posix_lock_set(file, flock);
2013 		if (rc <= FILE_LOCK_DEFERRED)
2014 			return rc;
2015 
2016 		if (type & server->vals->shared_lock_type)
2017 			posix_lock_type = CIFS_RDLCK;
2018 		else
2019 			posix_lock_type = CIFS_WRLCK;
2020 
2021 		if (unlock == 1)
2022 			posix_lock_type = CIFS_UNLCK;
2023 
2024 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
2025 				      hash_lockowner(flock->fl_owner),
2026 				      flock->fl_start, length,
2027 				      NULL, posix_lock_type, wait_flag);
2028 		goto out;
2029 	}
2030 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2031 	if (lock) {
2032 		struct cifsLockInfo *lock;
2033 
2034 		lock = cifs_lock_init(flock->fl_start, length, type,
2035 				      flock->fl_flags);
2036 		if (!lock)
2037 			return -ENOMEM;
2038 
2039 		rc = cifs_lock_add_if(cfile, lock, wait_flag);
2040 		if (rc < 0) {
2041 			kfree(lock);
2042 			return rc;
2043 		}
2044 		if (!rc)
2045 			goto out;
2046 
2047 		/*
2048 		 * Windows 7 server can delay breaking lease from read to None
2049 		 * if we set a byte-range lock on a file - break it explicitly
2050 		 * before sending the lock to the server to be sure the next
2051 		 * read won't conflict with non-overlapted locks due to
2052 		 * pagereading.
2053 		 */
2054 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
2055 					CIFS_CACHE_READ(CIFS_I(inode))) {
2056 			cifs_zap_mapping(inode);
2057 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
2058 				 inode);
2059 			CIFS_I(inode)->oplock = 0;
2060 		}
2061 
2062 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
2063 					    type, 1, 0, wait_flag);
2064 		if (rc) {
2065 			kfree(lock);
2066 			return rc;
2067 		}
2068 
2069 		cifs_lock_add(cfile, lock);
2070 	} else if (unlock)
2071 		rc = server->ops->mand_unlock_range(cfile, flock, xid);
2072 
2073 out:
2074 	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
2075 		/*
2076 		 * If this is a request to remove all locks because we
2077 		 * are closing the file, it doesn't matter if the
2078 		 * unlocking failed as both cifs.ko and the SMB server
2079 		 * remove the lock on file close
2080 		 */
2081 		if (rc) {
2082 			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2083 			if (!(flock->fl_flags & FL_CLOSE))
2084 				return rc;
2085 		}
2086 		rc = locks_lock_file_wait(file, flock);
2087 	}
2088 	return rc;
2089 }
2090 
2091 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2092 {
2093 	int rc, xid;
2094 	int lock = 0, unlock = 0;
2095 	bool wait_flag = false;
2096 	bool posix_lck = false;
2097 	struct cifs_sb_info *cifs_sb;
2098 	struct cifs_tcon *tcon;
2099 	struct cifsFileInfo *cfile;
2100 	__u32 type;
2101 
2102 	xid = get_xid();
2103 
2104 	if (!(fl->fl_flags & FL_FLOCK)) {
2105 		rc = -ENOLCK;
2106 		free_xid(xid);
2107 		return rc;
2108 	}
2109 
2110 	cfile = (struct cifsFileInfo *)file->private_data;
2111 	tcon = tlink_tcon(cfile->tlink);
2112 
2113 	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2114 			tcon->ses->server);
2115 	cifs_sb = CIFS_FILE_SB(file);
2116 
2117 	if (cap_unix(tcon->ses) &&
2118 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2119 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2120 		posix_lck = true;
2121 
2122 	if (!lock && !unlock) {
2123 		/*
2124 		 * if no lock or unlock then nothing to do since we do not
2125 		 * know what it is
2126 		 */
2127 		rc = -EOPNOTSUPP;
2128 		free_xid(xid);
2129 		return rc;
2130 	}
2131 
2132 	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2133 			xid);
2134 	free_xid(xid);
2135 	return rc;
2136 
2137 
2138 }
2139 
2140 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2141 {
2142 	int rc, xid;
2143 	int lock = 0, unlock = 0;
2144 	bool wait_flag = false;
2145 	bool posix_lck = false;
2146 	struct cifs_sb_info *cifs_sb;
2147 	struct cifs_tcon *tcon;
2148 	struct cifsFileInfo *cfile;
2149 	__u32 type;
2150 
2151 	rc = -EACCES;
2152 	xid = get_xid();
2153 
2154 	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2155 		 flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2156 		 (long long)flock->fl_end);
2157 
2158 	cfile = (struct cifsFileInfo *)file->private_data;
2159 	tcon = tlink_tcon(cfile->tlink);
2160 
2161 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2162 			tcon->ses->server);
2163 	cifs_sb = CIFS_FILE_SB(file);
2164 	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2165 
2166 	if (cap_unix(tcon->ses) &&
2167 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2168 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2169 		posix_lck = true;
2170 	/*
2171 	 * BB add code here to normalize offset and length to account for
2172 	 * negative length which we can not accept over the wire.
2173 	 */
2174 	if (IS_GETLK(cmd)) {
2175 		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2176 		free_xid(xid);
2177 		return rc;
2178 	}
2179 
2180 	if (!lock && !unlock) {
2181 		/*
2182 		 * if no lock or unlock then nothing to do since we do not
2183 		 * know what it is
2184 		 */
2185 		free_xid(xid);
2186 		return -EOPNOTSUPP;
2187 	}
2188 
2189 	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2190 			xid);
2191 	free_xid(xid);
2192 	return rc;
2193 }
2194 
2195 /*
2196  * update the file size (if needed) after a write. Should be called with
2197  * the inode->i_lock held
2198  */
2199 void
2200 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2201 		      unsigned int bytes_written)
2202 {
2203 	loff_t end_of_write = offset + bytes_written;
2204 
2205 	if (end_of_write > cifsi->server_eof)
2206 		cifsi->server_eof = end_of_write;
2207 }
2208 
2209 static ssize_t
2210 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2211 	   size_t write_size, loff_t *offset)
2212 {
2213 	int rc = 0;
2214 	unsigned int bytes_written = 0;
2215 	unsigned int total_written;
2216 	struct cifs_tcon *tcon;
2217 	struct TCP_Server_Info *server;
2218 	unsigned int xid;
2219 	struct dentry *dentry = open_file->dentry;
2220 	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2221 	struct cifs_io_parms io_parms = {0};
2222 
2223 	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2224 		 write_size, *offset, dentry);
2225 
2226 	tcon = tlink_tcon(open_file->tlink);
2227 	server = tcon->ses->server;
2228 
2229 	if (!server->ops->sync_write)
2230 		return -ENOSYS;
2231 
2232 	xid = get_xid();
2233 
2234 	for (total_written = 0; write_size > total_written;
2235 	     total_written += bytes_written) {
2236 		rc = -EAGAIN;
2237 		while (rc == -EAGAIN) {
2238 			struct kvec iov[2];
2239 			unsigned int len;
2240 
2241 			if (open_file->invalidHandle) {
2242 				/* we could deadlock if we called
2243 				   filemap_fdatawait from here so tell
2244 				   reopen_file not to flush data to
2245 				   server now */
2246 				rc = cifs_reopen_file(open_file, false);
2247 				if (rc != 0)
2248 					break;
2249 			}
2250 
2251 			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2252 				  (unsigned int)write_size - total_written);
2253 			/* iov[0] is reserved for smb header */
2254 			iov[1].iov_base = (char *)write_data + total_written;
2255 			iov[1].iov_len = len;
2256 			io_parms.pid = pid;
2257 			io_parms.tcon = tcon;
2258 			io_parms.offset = *offset;
2259 			io_parms.length = len;
2260 			rc = server->ops->sync_write(xid, &open_file->fid,
2261 					&io_parms, &bytes_written, iov, 1);
2262 		}
2263 		if (rc || (bytes_written == 0)) {
2264 			if (total_written)
2265 				break;
2266 			else {
2267 				free_xid(xid);
2268 				return rc;
2269 			}
2270 		} else {
2271 			spin_lock(&d_inode(dentry)->i_lock);
2272 			cifs_update_eof(cifsi, *offset, bytes_written);
2273 			spin_unlock(&d_inode(dentry)->i_lock);
2274 			*offset += bytes_written;
2275 		}
2276 	}
2277 
2278 	cifs_stats_bytes_written(tcon, total_written);
2279 
2280 	if (total_written > 0) {
2281 		spin_lock(&d_inode(dentry)->i_lock);
2282 		if (*offset > d_inode(dentry)->i_size) {
2283 			i_size_write(d_inode(dentry), *offset);
2284 			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2285 		}
2286 		spin_unlock(&d_inode(dentry)->i_lock);
2287 	}
2288 	mark_inode_dirty_sync(d_inode(dentry));
2289 	free_xid(xid);
2290 	return total_written;
2291 }
2292 
2293 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2294 					bool fsuid_only)
2295 {
2296 	struct cifsFileInfo *open_file = NULL;
2297 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2298 
2299 	/* only filter by fsuid on multiuser mounts */
2300 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2301 		fsuid_only = false;
2302 
2303 	spin_lock(&cifs_inode->open_file_lock);
2304 	/* we could simply get the first_list_entry since write-only entries
2305 	   are always at the end of the list but since the first entry might
2306 	   have a close pending, we go through the whole list */
2307 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2308 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2309 			continue;
2310 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2311 			if ((!open_file->invalidHandle)) {
2312 				/* found a good file */
2313 				/* lock it so it will not be closed on us */
2314 				cifsFileInfo_get(open_file);
2315 				spin_unlock(&cifs_inode->open_file_lock);
2316 				return open_file;
2317 			} /* else might as well continue, and look for
2318 			     another, or simply have the caller reopen it
2319 			     again rather than trying to fix this handle */
2320 		} else /* write only file */
2321 			break; /* write only files are last so must be done */
2322 	}
2323 	spin_unlock(&cifs_inode->open_file_lock);
2324 	return NULL;
2325 }
2326 
2327 /* Return -EBADF if no handle is found and general rc otherwise */
2328 int
2329 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2330 		       struct cifsFileInfo **ret_file)
2331 {
2332 	struct cifsFileInfo *open_file, *inv_file = NULL;
2333 	struct cifs_sb_info *cifs_sb;
2334 	bool any_available = false;
2335 	int rc = -EBADF;
2336 	unsigned int refind = 0;
2337 	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2338 	bool with_delete = flags & FIND_WR_WITH_DELETE;
2339 	*ret_file = NULL;
2340 
2341 	/*
2342 	 * Having a null inode here (because mapping->host was set to zero by
2343 	 * the VFS or MM) should not happen but we had reports of on oops (due
2344 	 * to it being zero) during stress testcases so we need to check for it
2345 	 */
2346 
2347 	if (cifs_inode == NULL) {
2348 		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2349 		dump_stack();
2350 		return rc;
2351 	}
2352 
2353 	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2354 
2355 	/* only filter by fsuid on multiuser mounts */
2356 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2357 		fsuid_only = false;
2358 
2359 	spin_lock(&cifs_inode->open_file_lock);
2360 refind_writable:
2361 	if (refind > MAX_REOPEN_ATT) {
2362 		spin_unlock(&cifs_inode->open_file_lock);
2363 		return rc;
2364 	}
2365 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2366 		if (!any_available && open_file->pid != current->tgid)
2367 			continue;
2368 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2369 			continue;
2370 		if (with_delete && !(open_file->fid.access & DELETE))
2371 			continue;
2372 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2373 			if (!open_file->invalidHandle) {
2374 				/* found a good writable file */
2375 				cifsFileInfo_get(open_file);
2376 				spin_unlock(&cifs_inode->open_file_lock);
2377 				*ret_file = open_file;
2378 				return 0;
2379 			} else {
2380 				if (!inv_file)
2381 					inv_file = open_file;
2382 			}
2383 		}
2384 	}
2385 	/* couldn't find useable FH with same pid, try any available */
2386 	if (!any_available) {
2387 		any_available = true;
2388 		goto refind_writable;
2389 	}
2390 
2391 	if (inv_file) {
2392 		any_available = false;
2393 		cifsFileInfo_get(inv_file);
2394 	}
2395 
2396 	spin_unlock(&cifs_inode->open_file_lock);
2397 
2398 	if (inv_file) {
2399 		rc = cifs_reopen_file(inv_file, false);
2400 		if (!rc) {
2401 			*ret_file = inv_file;
2402 			return 0;
2403 		}
2404 
2405 		spin_lock(&cifs_inode->open_file_lock);
2406 		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2407 		spin_unlock(&cifs_inode->open_file_lock);
2408 		cifsFileInfo_put(inv_file);
2409 		++refind;
2410 		inv_file = NULL;
2411 		spin_lock(&cifs_inode->open_file_lock);
2412 		goto refind_writable;
2413 	}
2414 
2415 	return rc;
2416 }
2417 
2418 struct cifsFileInfo *
2419 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2420 {
2421 	struct cifsFileInfo *cfile;
2422 	int rc;
2423 
2424 	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2425 	if (rc)
2426 		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2427 
2428 	return cfile;
2429 }
2430 
2431 int
2432 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2433 		       int flags,
2434 		       struct cifsFileInfo **ret_file)
2435 {
2436 	struct cifsFileInfo *cfile;
2437 	void *page = alloc_dentry_path();
2438 
2439 	*ret_file = NULL;
2440 
2441 	spin_lock(&tcon->open_file_lock);
2442 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2443 		struct cifsInodeInfo *cinode;
2444 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2445 		if (IS_ERR(full_path)) {
2446 			spin_unlock(&tcon->open_file_lock);
2447 			free_dentry_path(page);
2448 			return PTR_ERR(full_path);
2449 		}
2450 		if (strcmp(full_path, name))
2451 			continue;
2452 
2453 		cinode = CIFS_I(d_inode(cfile->dentry));
2454 		spin_unlock(&tcon->open_file_lock);
2455 		free_dentry_path(page);
2456 		return cifs_get_writable_file(cinode, flags, ret_file);
2457 	}
2458 
2459 	spin_unlock(&tcon->open_file_lock);
2460 	free_dentry_path(page);
2461 	return -ENOENT;
2462 }
2463 
2464 int
2465 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2466 		       struct cifsFileInfo **ret_file)
2467 {
2468 	struct cifsFileInfo *cfile;
2469 	void *page = alloc_dentry_path();
2470 
2471 	*ret_file = NULL;
2472 
2473 	spin_lock(&tcon->open_file_lock);
2474 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2475 		struct cifsInodeInfo *cinode;
2476 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2477 		if (IS_ERR(full_path)) {
2478 			spin_unlock(&tcon->open_file_lock);
2479 			free_dentry_path(page);
2480 			return PTR_ERR(full_path);
2481 		}
2482 		if (strcmp(full_path, name))
2483 			continue;
2484 
2485 		cinode = CIFS_I(d_inode(cfile->dentry));
2486 		spin_unlock(&tcon->open_file_lock);
2487 		free_dentry_path(page);
2488 		*ret_file = find_readable_file(cinode, 0);
2489 		return *ret_file ? 0 : -ENOENT;
2490 	}
2491 
2492 	spin_unlock(&tcon->open_file_lock);
2493 	free_dentry_path(page);
2494 	return -ENOENT;
2495 }
2496 
2497 void
2498 cifs_writedata_release(struct kref *refcount)
2499 {
2500 	struct cifs_writedata *wdata = container_of(refcount,
2501 					struct cifs_writedata, refcount);
2502 #ifdef CONFIG_CIFS_SMB_DIRECT
2503 	if (wdata->mr) {
2504 		smbd_deregister_mr(wdata->mr);
2505 		wdata->mr = NULL;
2506 	}
2507 #endif
2508 
2509 	if (wdata->cfile)
2510 		cifsFileInfo_put(wdata->cfile);
2511 
2512 	kfree(wdata);
2513 }
2514 
2515 /*
2516  * Write failed with a retryable error. Resend the write request. It's also
2517  * possible that the page was redirtied so re-clean the page.
2518  */
2519 static void
2520 cifs_writev_requeue(struct cifs_writedata *wdata)
2521 {
2522 	int rc = 0;
2523 	struct inode *inode = d_inode(wdata->cfile->dentry);
2524 	struct TCP_Server_Info *server;
2525 	unsigned int rest_len = wdata->bytes;
2526 	loff_t fpos = wdata->offset;
2527 
2528 	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2529 	do {
2530 		struct cifs_writedata *wdata2;
2531 		unsigned int wsize, cur_len;
2532 
2533 		wsize = server->ops->wp_retry_size(inode);
2534 		if (wsize < rest_len) {
2535 			if (wsize < PAGE_SIZE) {
2536 				rc = -EOPNOTSUPP;
2537 				break;
2538 			}
2539 			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2540 		} else {
2541 			cur_len = rest_len;
2542 		}
2543 
2544 		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2545 		if (!wdata2) {
2546 			rc = -ENOMEM;
2547 			break;
2548 		}
2549 
2550 		wdata2->sync_mode = wdata->sync_mode;
2551 		wdata2->offset	= fpos;
2552 		wdata2->bytes	= cur_len;
2553 		wdata2->iter	= wdata->iter;
2554 
2555 		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2556 		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2557 
2558 		if (iov_iter_is_xarray(&wdata2->iter))
2559 			/* Check for pages having been redirtied and clean
2560 			 * them.  We can do this by walking the xarray.  If
2561 			 * it's not an xarray, then it's a DIO and we shouldn't
2562 			 * be mucking around with the page bits.
2563 			 */
2564 			cifs_undirty_folios(inode, fpos, cur_len);
2565 
2566 		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2567 					    &wdata2->cfile);
2568 		if (!wdata2->cfile) {
2569 			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2570 				 rc);
2571 			if (!is_retryable_error(rc))
2572 				rc = -EBADF;
2573 		} else {
2574 			wdata2->pid = wdata2->cfile->pid;
2575 			rc = server->ops->async_writev(wdata2,
2576 						       cifs_writedata_release);
2577 		}
2578 
2579 		kref_put(&wdata2->refcount, cifs_writedata_release);
2580 		if (rc) {
2581 			if (is_retryable_error(rc))
2582 				continue;
2583 			fpos += cur_len;
2584 			rest_len -= cur_len;
2585 			break;
2586 		}
2587 
2588 		fpos += cur_len;
2589 		rest_len -= cur_len;
2590 	} while (rest_len > 0);
2591 
2592 	/* Clean up remaining pages from the original wdata */
2593 	if (iov_iter_is_xarray(&wdata->iter))
2594 		cifs_pages_write_failed(inode, fpos, rest_len);
2595 
2596 	if (rc != 0 && !is_retryable_error(rc))
2597 		mapping_set_error(inode->i_mapping, rc);
2598 	kref_put(&wdata->refcount, cifs_writedata_release);
2599 }
2600 
2601 void
2602 cifs_writev_complete(struct work_struct *work)
2603 {
2604 	struct cifs_writedata *wdata = container_of(work,
2605 						struct cifs_writedata, work);
2606 	struct inode *inode = d_inode(wdata->cfile->dentry);
2607 
2608 	if (wdata->result == 0) {
2609 		spin_lock(&inode->i_lock);
2610 		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2611 		spin_unlock(&inode->i_lock);
2612 		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2613 					 wdata->bytes);
2614 	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2615 		return cifs_writev_requeue(wdata);
2616 
2617 	if (wdata->result == -EAGAIN)
2618 		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2619 	else if (wdata->result < 0)
2620 		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2621 	else
2622 		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2623 
2624 	if (wdata->result != -EAGAIN)
2625 		mapping_set_error(inode->i_mapping, wdata->result);
2626 	kref_put(&wdata->refcount, cifs_writedata_release);
2627 }
2628 
2629 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2630 {
2631 	struct cifs_writedata *wdata;
2632 
2633 	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2634 	if (wdata != NULL) {
2635 		kref_init(&wdata->refcount);
2636 		INIT_LIST_HEAD(&wdata->list);
2637 		init_completion(&wdata->done);
2638 		INIT_WORK(&wdata->work, complete);
2639 	}
2640 	return wdata;
2641 }
2642 
2643 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2644 {
2645 	struct address_space *mapping = page->mapping;
2646 	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2647 	char *write_data;
2648 	int rc = -EFAULT;
2649 	int bytes_written = 0;
2650 	struct inode *inode;
2651 	struct cifsFileInfo *open_file;
2652 
2653 	if (!mapping || !mapping->host)
2654 		return -EFAULT;
2655 
2656 	inode = page->mapping->host;
2657 
2658 	offset += (loff_t)from;
2659 	write_data = kmap(page);
2660 	write_data += from;
2661 
2662 	if ((to > PAGE_SIZE) || (from > to)) {
2663 		kunmap(page);
2664 		return -EIO;
2665 	}
2666 
2667 	/* racing with truncate? */
2668 	if (offset > mapping->host->i_size) {
2669 		kunmap(page);
2670 		return 0; /* don't care */
2671 	}
2672 
2673 	/* check to make sure that we are not extending the file */
2674 	if (mapping->host->i_size - offset < (loff_t)to)
2675 		to = (unsigned)(mapping->host->i_size - offset);
2676 
2677 	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2678 				    &open_file);
2679 	if (!rc) {
2680 		bytes_written = cifs_write(open_file, open_file->pid,
2681 					   write_data, to - from, &offset);
2682 		cifsFileInfo_put(open_file);
2683 		/* Does mm or vfs already set times? */
2684 		simple_inode_init_ts(inode);
2685 		if ((bytes_written > 0) && (offset))
2686 			rc = 0;
2687 		else if (bytes_written < 0)
2688 			rc = bytes_written;
2689 		else
2690 			rc = -EFAULT;
2691 	} else {
2692 		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2693 		if (!is_retryable_error(rc))
2694 			rc = -EIO;
2695 	}
2696 
2697 	kunmap(page);
2698 	return rc;
2699 }
2700 
2701 /*
2702  * Extend the region to be written back to include subsequent contiguously
2703  * dirty pages if possible, but don't sleep while doing so.
2704  */
2705 static void cifs_extend_writeback(struct address_space *mapping,
2706 				  struct xa_state *xas,
2707 				  long *_count,
2708 				  loff_t start,
2709 				  int max_pages,
2710 				  loff_t max_len,
2711 				  size_t *_len)
2712 {
2713 	struct folio_batch batch;
2714 	struct folio *folio;
2715 	unsigned int nr_pages;
2716 	pgoff_t index = (start + *_len) / PAGE_SIZE;
2717 	size_t len;
2718 	bool stop = true;
2719 	unsigned int i;
2720 
2721 	folio_batch_init(&batch);
2722 
2723 	do {
2724 		/* Firstly, we gather up a batch of contiguous dirty pages
2725 		 * under the RCU read lock - but we can't clear the dirty flags
2726 		 * there if any of those pages are mapped.
2727 		 */
2728 		rcu_read_lock();
2729 
2730 		xas_for_each(xas, folio, ULONG_MAX) {
2731 			stop = true;
2732 			if (xas_retry(xas, folio))
2733 				continue;
2734 			if (xa_is_value(folio))
2735 				break;
2736 			if (folio->index != index) {
2737 				xas_reset(xas);
2738 				break;
2739 			}
2740 
2741 			if (!folio_try_get_rcu(folio)) {
2742 				xas_reset(xas);
2743 				continue;
2744 			}
2745 			nr_pages = folio_nr_pages(folio);
2746 			if (nr_pages > max_pages) {
2747 				xas_reset(xas);
2748 				break;
2749 			}
2750 
2751 			/* Has the page moved or been split? */
2752 			if (unlikely(folio != xas_reload(xas))) {
2753 				folio_put(folio);
2754 				xas_reset(xas);
2755 				break;
2756 			}
2757 
2758 			if (!folio_trylock(folio)) {
2759 				folio_put(folio);
2760 				xas_reset(xas);
2761 				break;
2762 			}
2763 			if (!folio_test_dirty(folio) ||
2764 			    folio_test_writeback(folio)) {
2765 				folio_unlock(folio);
2766 				folio_put(folio);
2767 				xas_reset(xas);
2768 				break;
2769 			}
2770 
2771 			max_pages -= nr_pages;
2772 			len = folio_size(folio);
2773 			stop = false;
2774 
2775 			index += nr_pages;
2776 			*_count -= nr_pages;
2777 			*_len += len;
2778 			if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2779 				stop = true;
2780 
2781 			if (!folio_batch_add(&batch, folio))
2782 				break;
2783 			if (stop)
2784 				break;
2785 		}
2786 
2787 		xas_pause(xas);
2788 		rcu_read_unlock();
2789 
2790 		/* Now, if we obtained any pages, we can shift them to being
2791 		 * writable and mark them for caching.
2792 		 */
2793 		if (!folio_batch_count(&batch))
2794 			break;
2795 
2796 		for (i = 0; i < folio_batch_count(&batch); i++) {
2797 			folio = batch.folios[i];
2798 			/* The folio should be locked, dirty and not undergoing
2799 			 * writeback from the loop above.
2800 			 */
2801 			if (!folio_clear_dirty_for_io(folio))
2802 				WARN_ON(1);
2803 			folio_start_writeback(folio);
2804 			folio_unlock(folio);
2805 		}
2806 
2807 		folio_batch_release(&batch);
2808 		cond_resched();
2809 	} while (!stop);
2810 }
2811 
2812 /*
2813  * Write back the locked page and any subsequent non-locked dirty pages.
2814  */
2815 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2816 						 struct writeback_control *wbc,
2817 						 struct xa_state *xas,
2818 						 struct folio *folio,
2819 						 unsigned long long start,
2820 						 unsigned long long end)
2821 {
2822 	struct inode *inode = mapping->host;
2823 	struct TCP_Server_Info *server;
2824 	struct cifs_writedata *wdata;
2825 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2826 	struct cifs_credits credits_on_stack;
2827 	struct cifs_credits *credits = &credits_on_stack;
2828 	struct cifsFileInfo *cfile = NULL;
2829 	unsigned long long i_size = i_size_read(inode), max_len;
2830 	unsigned int xid, wsize;
2831 	size_t len = folio_size(folio);
2832 	long count = wbc->nr_to_write;
2833 	int rc;
2834 
2835 	/* The folio should be locked, dirty and not undergoing writeback. */
2836 	if (!folio_clear_dirty_for_io(folio))
2837 		WARN_ON_ONCE(1);
2838 	folio_start_writeback(folio);
2839 
2840 	count -= folio_nr_pages(folio);
2841 
2842 	xid = get_xid();
2843 	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2844 
2845 	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2846 	if (rc) {
2847 		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2848 		goto err_xid;
2849 	}
2850 
2851 	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2852 					   &wsize, credits);
2853 	if (rc != 0)
2854 		goto err_close;
2855 
2856 	wdata = cifs_writedata_alloc(cifs_writev_complete);
2857 	if (!wdata) {
2858 		rc = -ENOMEM;
2859 		goto err_uncredit;
2860 	}
2861 
2862 	wdata->sync_mode = wbc->sync_mode;
2863 	wdata->offset = folio_pos(folio);
2864 	wdata->pid = cfile->pid;
2865 	wdata->credits = credits_on_stack;
2866 	wdata->cfile = cfile;
2867 	wdata->server = server;
2868 	cfile = NULL;
2869 
2870 	/* Find all consecutive lockable dirty pages that have contiguous
2871 	 * written regions, stopping when we find a page that is not
2872 	 * immediately lockable, is not dirty or is missing, or we reach the
2873 	 * end of the range.
2874 	 */
2875 	if (start < i_size) {
2876 		/* Trim the write to the EOF; the extra data is ignored.  Also
2877 		 * put an upper limit on the size of a single storedata op.
2878 		 */
2879 		max_len = wsize;
2880 		max_len = min_t(unsigned long long, max_len, end - start + 1);
2881 		max_len = min_t(unsigned long long, max_len, i_size - start);
2882 
2883 		if (len < max_len) {
2884 			int max_pages = INT_MAX;
2885 
2886 #ifdef CONFIG_CIFS_SMB_DIRECT
2887 			if (server->smbd_conn)
2888 				max_pages = server->smbd_conn->max_frmr_depth;
2889 #endif
2890 			max_pages -= folio_nr_pages(folio);
2891 
2892 			if (max_pages > 0)
2893 				cifs_extend_writeback(mapping, xas, &count, start,
2894 						      max_pages, max_len, &len);
2895 		}
2896 	}
2897 	len = min_t(unsigned long long, len, i_size - start);
2898 
2899 	/* We now have a contiguous set of dirty pages, each with writeback
2900 	 * set; the first page is still locked at this point, but all the rest
2901 	 * have been unlocked.
2902 	 */
2903 	folio_unlock(folio);
2904 	wdata->bytes = len;
2905 
2906 	if (start < i_size) {
2907 		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2908 				start, len);
2909 
2910 		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2911 		if (rc)
2912 			goto err_wdata;
2913 
2914 		if (wdata->cfile->invalidHandle)
2915 			rc = -EAGAIN;
2916 		else
2917 			rc = wdata->server->ops->async_writev(wdata,
2918 							      cifs_writedata_release);
2919 		if (rc >= 0) {
2920 			kref_put(&wdata->refcount, cifs_writedata_release);
2921 			goto err_close;
2922 		}
2923 	} else {
2924 		/* The dirty region was entirely beyond the EOF. */
2925 		cifs_pages_written_back(inode, start, len);
2926 		rc = 0;
2927 	}
2928 
2929 err_wdata:
2930 	kref_put(&wdata->refcount, cifs_writedata_release);
2931 err_uncredit:
2932 	add_credits_and_wake_if(server, credits, 0);
2933 err_close:
2934 	if (cfile)
2935 		cifsFileInfo_put(cfile);
2936 err_xid:
2937 	free_xid(xid);
2938 	if (rc == 0) {
2939 		wbc->nr_to_write = count;
2940 		rc = len;
2941 	} else if (is_retryable_error(rc)) {
2942 		cifs_pages_write_redirty(inode, start, len);
2943 	} else {
2944 		cifs_pages_write_failed(inode, start, len);
2945 		mapping_set_error(mapping, rc);
2946 	}
2947 	/* Indication to update ctime and mtime as close is deferred */
2948 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2949 	return rc;
2950 }
2951 
2952 /*
2953  * write a region of pages back to the server
2954  */
2955 static ssize_t cifs_writepages_begin(struct address_space *mapping,
2956 				     struct writeback_control *wbc,
2957 				     struct xa_state *xas,
2958 				     unsigned long long *_start,
2959 				     unsigned long long end)
2960 {
2961 	struct folio *folio;
2962 	unsigned long long start = *_start;
2963 	ssize_t ret;
2964 	int skips = 0;
2965 
2966 search_again:
2967 	/* Find the first dirty page. */
2968 	rcu_read_lock();
2969 
2970 	for (;;) {
2971 		folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2972 		if (xas_retry(xas, folio) || xa_is_value(folio))
2973 			continue;
2974 		if (!folio)
2975 			break;
2976 
2977 		if (!folio_try_get_rcu(folio)) {
2978 			xas_reset(xas);
2979 			continue;
2980 		}
2981 
2982 		if (unlikely(folio != xas_reload(xas))) {
2983 			folio_put(folio);
2984 			xas_reset(xas);
2985 			continue;
2986 		}
2987 
2988 		xas_pause(xas);
2989 		break;
2990 	}
2991 	rcu_read_unlock();
2992 	if (!folio)
2993 		return 0;
2994 
2995 	start = folio_pos(folio); /* May regress with THPs */
2996 
2997 	/* At this point we hold neither the i_pages lock nor the page lock:
2998 	 * the page may be truncated or invalidated (changing page->mapping to
2999 	 * NULL), or even swizzled back from swapper_space to tmpfs file
3000 	 * mapping
3001 	 */
3002 lock_again:
3003 	if (wbc->sync_mode != WB_SYNC_NONE) {
3004 		ret = folio_lock_killable(folio);
3005 		if (ret < 0)
3006 			return ret;
3007 	} else {
3008 		if (!folio_trylock(folio))
3009 			goto search_again;
3010 	}
3011 
3012 	if (folio->mapping != mapping ||
3013 	    !folio_test_dirty(folio)) {
3014 		start += folio_size(folio);
3015 		folio_unlock(folio);
3016 		goto search_again;
3017 	}
3018 
3019 	if (folio_test_writeback(folio) ||
3020 	    folio_test_fscache(folio)) {
3021 		folio_unlock(folio);
3022 		if (wbc->sync_mode != WB_SYNC_NONE) {
3023 			folio_wait_writeback(folio);
3024 #ifdef CONFIG_CIFS_FSCACHE
3025 			folio_wait_fscache(folio);
3026 #endif
3027 			goto lock_again;
3028 		}
3029 
3030 		start += folio_size(folio);
3031 		if (wbc->sync_mode == WB_SYNC_NONE) {
3032 			if (skips >= 5 || need_resched()) {
3033 				ret = 0;
3034 				goto out;
3035 			}
3036 			skips++;
3037 		}
3038 		goto search_again;
3039 	}
3040 
3041 	ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
3042 out:
3043 	if (ret > 0)
3044 		*_start = start + ret;
3045 	return ret;
3046 }
3047 
3048 /*
3049  * Write a region of pages back to the server
3050  */
3051 static int cifs_writepages_region(struct address_space *mapping,
3052 				  struct writeback_control *wbc,
3053 				  unsigned long long *_start,
3054 				  unsigned long long end)
3055 {
3056 	ssize_t ret;
3057 
3058 	XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
3059 
3060 	do {
3061 		ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
3062 		if (ret > 0 && wbc->nr_to_write > 0)
3063 			cond_resched();
3064 	} while (ret > 0 && wbc->nr_to_write > 0);
3065 
3066 	return ret > 0 ? 0 : ret;
3067 }
3068 
3069 /*
3070  * Write some of the pending data back to the server
3071  */
3072 static int cifs_writepages(struct address_space *mapping,
3073 			   struct writeback_control *wbc)
3074 {
3075 	loff_t start, end;
3076 	int ret;
3077 
3078 	/* We have to be careful as we can end up racing with setattr()
3079 	 * truncating the pagecache since the caller doesn't take a lock here
3080 	 * to prevent it.
3081 	 */
3082 
3083 	if (wbc->range_cyclic && mapping->writeback_index) {
3084 		start = mapping->writeback_index * PAGE_SIZE;
3085 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3086 		if (ret < 0)
3087 			goto out;
3088 
3089 		if (wbc->nr_to_write <= 0) {
3090 			mapping->writeback_index = start / PAGE_SIZE;
3091 			goto out;
3092 		}
3093 
3094 		start = 0;
3095 		end = mapping->writeback_index * PAGE_SIZE;
3096 		mapping->writeback_index = 0;
3097 		ret = cifs_writepages_region(mapping, wbc, &start, end);
3098 		if (ret == 0)
3099 			mapping->writeback_index = start / PAGE_SIZE;
3100 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3101 		start = 0;
3102 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3103 		if (wbc->nr_to_write > 0 && ret == 0)
3104 			mapping->writeback_index = start / PAGE_SIZE;
3105 	} else {
3106 		start = wbc->range_start;
3107 		ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3108 	}
3109 
3110 out:
3111 	return ret;
3112 }
3113 
3114 static int
3115 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3116 {
3117 	int rc;
3118 	unsigned int xid;
3119 
3120 	xid = get_xid();
3121 /* BB add check for wbc flags */
3122 	get_page(page);
3123 	if (!PageUptodate(page))
3124 		cifs_dbg(FYI, "ppw - page not up to date\n");
3125 
3126 	/*
3127 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3128 	 *
3129 	 * A writepage() implementation always needs to do either this,
3130 	 * or re-dirty the page with "redirty_page_for_writepage()" in
3131 	 * the case of a failure.
3132 	 *
3133 	 * Just unlocking the page will cause the radix tree tag-bits
3134 	 * to fail to update with the state of the page correctly.
3135 	 */
3136 	set_page_writeback(page);
3137 retry_write:
3138 	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3139 	if (is_retryable_error(rc)) {
3140 		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3141 			goto retry_write;
3142 		redirty_page_for_writepage(wbc, page);
3143 	} else if (rc != 0) {
3144 		SetPageError(page);
3145 		mapping_set_error(page->mapping, rc);
3146 	} else {
3147 		SetPageUptodate(page);
3148 	}
3149 	end_page_writeback(page);
3150 	put_page(page);
3151 	free_xid(xid);
3152 	return rc;
3153 }
3154 
3155 static int cifs_write_end(struct file *file, struct address_space *mapping,
3156 			loff_t pos, unsigned len, unsigned copied,
3157 			struct page *page, void *fsdata)
3158 {
3159 	int rc;
3160 	struct inode *inode = mapping->host;
3161 	struct cifsFileInfo *cfile = file->private_data;
3162 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3163 	struct folio *folio = page_folio(page);
3164 	__u32 pid;
3165 
3166 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3167 		pid = cfile->pid;
3168 	else
3169 		pid = current->tgid;
3170 
3171 	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3172 		 page, pos, copied);
3173 
3174 	if (folio_test_checked(folio)) {
3175 		if (copied == len)
3176 			folio_mark_uptodate(folio);
3177 		folio_clear_checked(folio);
3178 	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3179 		folio_mark_uptodate(folio);
3180 
3181 	if (!folio_test_uptodate(folio)) {
3182 		char *page_data;
3183 		unsigned offset = pos & (PAGE_SIZE - 1);
3184 		unsigned int xid;
3185 
3186 		xid = get_xid();
3187 		/* this is probably better than directly calling
3188 		   partialpage_write since in this function the file handle is
3189 		   known which we might as well	leverage */
3190 		/* BB check if anything else missing out of ppw
3191 		   such as updating last write time */
3192 		page_data = kmap(page);
3193 		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3194 		/* if (rc < 0) should we set writebehind rc? */
3195 		kunmap(page);
3196 
3197 		free_xid(xid);
3198 	} else {
3199 		rc = copied;
3200 		pos += copied;
3201 		set_page_dirty(page);
3202 	}
3203 
3204 	if (rc > 0) {
3205 		spin_lock(&inode->i_lock);
3206 		if (pos > inode->i_size) {
3207 			i_size_write(inode, pos);
3208 			inode->i_blocks = (512 - 1 + pos) >> 9;
3209 		}
3210 		spin_unlock(&inode->i_lock);
3211 	}
3212 
3213 	unlock_page(page);
3214 	put_page(page);
3215 	/* Indication to update ctime and mtime as close is deferred */
3216 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3217 
3218 	return rc;
3219 }
3220 
3221 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3222 		      int datasync)
3223 {
3224 	unsigned int xid;
3225 	int rc = 0;
3226 	struct cifs_tcon *tcon;
3227 	struct TCP_Server_Info *server;
3228 	struct cifsFileInfo *smbfile = file->private_data;
3229 	struct inode *inode = file_inode(file);
3230 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3231 
3232 	rc = file_write_and_wait_range(file, start, end);
3233 	if (rc) {
3234 		trace_cifs_fsync_err(inode->i_ino, rc);
3235 		return rc;
3236 	}
3237 
3238 	xid = get_xid();
3239 
3240 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3241 		 file, datasync);
3242 
3243 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3244 		rc = cifs_zap_mapping(inode);
3245 		if (rc) {
3246 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3247 			rc = 0; /* don't care about it in fsync */
3248 		}
3249 	}
3250 
3251 	tcon = tlink_tcon(smbfile->tlink);
3252 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3253 		server = tcon->ses->server;
3254 		if (server->ops->flush == NULL) {
3255 			rc = -ENOSYS;
3256 			goto strict_fsync_exit;
3257 		}
3258 
3259 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3260 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3261 			if (smbfile) {
3262 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3263 				cifsFileInfo_put(smbfile);
3264 			} else
3265 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3266 		} else
3267 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3268 	}
3269 
3270 strict_fsync_exit:
3271 	free_xid(xid);
3272 	return rc;
3273 }
3274 
3275 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3276 {
3277 	unsigned int xid;
3278 	int rc = 0;
3279 	struct cifs_tcon *tcon;
3280 	struct TCP_Server_Info *server;
3281 	struct cifsFileInfo *smbfile = file->private_data;
3282 	struct inode *inode = file_inode(file);
3283 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3284 
3285 	rc = file_write_and_wait_range(file, start, end);
3286 	if (rc) {
3287 		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3288 		return rc;
3289 	}
3290 
3291 	xid = get_xid();
3292 
3293 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3294 		 file, datasync);
3295 
3296 	tcon = tlink_tcon(smbfile->tlink);
3297 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3298 		server = tcon->ses->server;
3299 		if (server->ops->flush == NULL) {
3300 			rc = -ENOSYS;
3301 			goto fsync_exit;
3302 		}
3303 
3304 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3305 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3306 			if (smbfile) {
3307 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3308 				cifsFileInfo_put(smbfile);
3309 			} else
3310 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3311 		} else
3312 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3313 	}
3314 
3315 fsync_exit:
3316 	free_xid(xid);
3317 	return rc;
3318 }
3319 
3320 /*
3321  * As file closes, flush all cached write data for this inode checking
3322  * for write behind errors.
3323  */
3324 int cifs_flush(struct file *file, fl_owner_t id)
3325 {
3326 	struct inode *inode = file_inode(file);
3327 	int rc = 0;
3328 
3329 	if (file->f_mode & FMODE_WRITE)
3330 		rc = filemap_write_and_wait(inode->i_mapping);
3331 
3332 	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3333 	if (rc) {
3334 		/* get more nuanced writeback errors */
3335 		rc = filemap_check_wb_err(file->f_mapping, 0);
3336 		trace_cifs_flush_err(inode->i_ino, rc);
3337 	}
3338 	return rc;
3339 }
3340 
3341 static void
3342 cifs_uncached_writedata_release(struct kref *refcount)
3343 {
3344 	struct cifs_writedata *wdata = container_of(refcount,
3345 					struct cifs_writedata, refcount);
3346 
3347 	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3348 	cifs_writedata_release(refcount);
3349 }
3350 
3351 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3352 
3353 static void
3354 cifs_uncached_writev_complete(struct work_struct *work)
3355 {
3356 	struct cifs_writedata *wdata = container_of(work,
3357 					struct cifs_writedata, work);
3358 	struct inode *inode = d_inode(wdata->cfile->dentry);
3359 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3360 
3361 	spin_lock(&inode->i_lock);
3362 	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3363 	if (cifsi->server_eof > inode->i_size)
3364 		i_size_write(inode, cifsi->server_eof);
3365 	spin_unlock(&inode->i_lock);
3366 
3367 	complete(&wdata->done);
3368 	collect_uncached_write_data(wdata->ctx);
3369 	/* the below call can possibly free the last ref to aio ctx */
3370 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3371 }
3372 
3373 static int
3374 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3375 	struct cifs_aio_ctx *ctx)
3376 {
3377 	unsigned int wsize;
3378 	struct cifs_credits credits;
3379 	int rc;
3380 	struct TCP_Server_Info *server = wdata->server;
3381 
3382 	do {
3383 		if (wdata->cfile->invalidHandle) {
3384 			rc = cifs_reopen_file(wdata->cfile, false);
3385 			if (rc == -EAGAIN)
3386 				continue;
3387 			else if (rc)
3388 				break;
3389 		}
3390 
3391 
3392 		/*
3393 		 * Wait for credits to resend this wdata.
3394 		 * Note: we are attempting to resend the whole wdata not in
3395 		 * segments
3396 		 */
3397 		do {
3398 			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3399 						&wsize, &credits);
3400 			if (rc)
3401 				goto fail;
3402 
3403 			if (wsize < wdata->bytes) {
3404 				add_credits_and_wake_if(server, &credits, 0);
3405 				msleep(1000);
3406 			}
3407 		} while (wsize < wdata->bytes);
3408 		wdata->credits = credits;
3409 
3410 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3411 
3412 		if (!rc) {
3413 			if (wdata->cfile->invalidHandle)
3414 				rc = -EAGAIN;
3415 			else {
3416 				wdata->replay = true;
3417 #ifdef CONFIG_CIFS_SMB_DIRECT
3418 				if (wdata->mr) {
3419 					wdata->mr->need_invalidate = true;
3420 					smbd_deregister_mr(wdata->mr);
3421 					wdata->mr = NULL;
3422 				}
3423 #endif
3424 				rc = server->ops->async_writev(wdata,
3425 					cifs_uncached_writedata_release);
3426 			}
3427 		}
3428 
3429 		/* If the write was successfully sent, we are done */
3430 		if (!rc) {
3431 			list_add_tail(&wdata->list, wdata_list);
3432 			return 0;
3433 		}
3434 
3435 		/* Roll back credits and retry if needed */
3436 		add_credits_and_wake_if(server, &wdata->credits, 0);
3437 	} while (rc == -EAGAIN);
3438 
3439 fail:
3440 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3441 	return rc;
3442 }
3443 
3444 /*
3445  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3446  * size and maximum number of segments.
3447  */
3448 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3449 				     size_t max_segs, unsigned int *_nsegs)
3450 {
3451 	const struct bio_vec *bvecs = iter->bvec;
3452 	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3453 	size_t len, span = 0, n = iter->count;
3454 	size_t skip = iter->iov_offset;
3455 
3456 	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3457 		return 0;
3458 
3459 	while (n && ix < nbv && skip) {
3460 		len = bvecs[ix].bv_len;
3461 		if (skip < len)
3462 			break;
3463 		skip -= len;
3464 		n -= len;
3465 		ix++;
3466 	}
3467 
3468 	while (n && ix < nbv) {
3469 		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3470 		span += len;
3471 		max_size -= len;
3472 		nsegs++;
3473 		ix++;
3474 		if (max_size == 0 || nsegs >= max_segs)
3475 			break;
3476 		skip = 0;
3477 		n -= len;
3478 	}
3479 
3480 	*_nsegs = nsegs;
3481 	return span;
3482 }
3483 
3484 static int
3485 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3486 		     struct cifsFileInfo *open_file,
3487 		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3488 		     struct cifs_aio_ctx *ctx)
3489 {
3490 	int rc = 0;
3491 	size_t cur_len, max_len;
3492 	struct cifs_writedata *wdata;
3493 	pid_t pid;
3494 	struct TCP_Server_Info *server;
3495 	unsigned int xid, max_segs = INT_MAX;
3496 
3497 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3498 		pid = open_file->pid;
3499 	else
3500 		pid = current->tgid;
3501 
3502 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3503 	xid = get_xid();
3504 
3505 #ifdef CONFIG_CIFS_SMB_DIRECT
3506 	if (server->smbd_conn)
3507 		max_segs = server->smbd_conn->max_frmr_depth;
3508 #endif
3509 
3510 	do {
3511 		struct cifs_credits credits_on_stack;
3512 		struct cifs_credits *credits = &credits_on_stack;
3513 		unsigned int wsize, nsegs = 0;
3514 
3515 		if (signal_pending(current)) {
3516 			rc = -EINTR;
3517 			break;
3518 		}
3519 
3520 		if (open_file->invalidHandle) {
3521 			rc = cifs_reopen_file(open_file, false);
3522 			if (rc == -EAGAIN)
3523 				continue;
3524 			else if (rc)
3525 				break;
3526 		}
3527 
3528 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3529 						   &wsize, credits);
3530 		if (rc)
3531 			break;
3532 
3533 		max_len = min_t(const size_t, len, wsize);
3534 		if (!max_len) {
3535 			rc = -EAGAIN;
3536 			add_credits_and_wake_if(server, credits, 0);
3537 			break;
3538 		}
3539 
3540 		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3541 		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3542 			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3543 		if (cur_len == 0) {
3544 			rc = -EIO;
3545 			add_credits_and_wake_if(server, credits, 0);
3546 			break;
3547 		}
3548 
3549 		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3550 		if (!wdata) {
3551 			rc = -ENOMEM;
3552 			add_credits_and_wake_if(server, credits, 0);
3553 			break;
3554 		}
3555 
3556 		wdata->sync_mode = WB_SYNC_ALL;
3557 		wdata->offset	= (__u64)fpos;
3558 		wdata->cfile	= cifsFileInfo_get(open_file);
3559 		wdata->server	= server;
3560 		wdata->pid	= pid;
3561 		wdata->bytes	= cur_len;
3562 		wdata->credits	= credits_on_stack;
3563 		wdata->iter	= *from;
3564 		wdata->ctx	= ctx;
3565 		kref_get(&ctx->refcount);
3566 
3567 		iov_iter_truncate(&wdata->iter, cur_len);
3568 
3569 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3570 
3571 		if (!rc) {
3572 			if (wdata->cfile->invalidHandle)
3573 				rc = -EAGAIN;
3574 			else
3575 				rc = server->ops->async_writev(wdata,
3576 					cifs_uncached_writedata_release);
3577 		}
3578 
3579 		if (rc) {
3580 			add_credits_and_wake_if(server, &wdata->credits, 0);
3581 			kref_put(&wdata->refcount,
3582 				 cifs_uncached_writedata_release);
3583 			if (rc == -EAGAIN)
3584 				continue;
3585 			break;
3586 		}
3587 
3588 		list_add_tail(&wdata->list, wdata_list);
3589 		iov_iter_advance(from, cur_len);
3590 		fpos += cur_len;
3591 		len -= cur_len;
3592 	} while (len > 0);
3593 
3594 	free_xid(xid);
3595 	return rc;
3596 }
3597 
3598 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3599 {
3600 	struct cifs_writedata *wdata, *tmp;
3601 	struct cifs_tcon *tcon;
3602 	struct cifs_sb_info *cifs_sb;
3603 	struct dentry *dentry = ctx->cfile->dentry;
3604 	ssize_t rc;
3605 
3606 	tcon = tlink_tcon(ctx->cfile->tlink);
3607 	cifs_sb = CIFS_SB(dentry->d_sb);
3608 
3609 	mutex_lock(&ctx->aio_mutex);
3610 
3611 	if (list_empty(&ctx->list)) {
3612 		mutex_unlock(&ctx->aio_mutex);
3613 		return;
3614 	}
3615 
3616 	rc = ctx->rc;
3617 	/*
3618 	 * Wait for and collect replies for any successful sends in order of
3619 	 * increasing offset. Once an error is hit, then return without waiting
3620 	 * for any more replies.
3621 	 */
3622 restart_loop:
3623 	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3624 		if (!rc) {
3625 			if (!try_wait_for_completion(&wdata->done)) {
3626 				mutex_unlock(&ctx->aio_mutex);
3627 				return;
3628 			}
3629 
3630 			if (wdata->result)
3631 				rc = wdata->result;
3632 			else
3633 				ctx->total_len += wdata->bytes;
3634 
3635 			/* resend call if it's a retryable error */
3636 			if (rc == -EAGAIN) {
3637 				struct list_head tmp_list;
3638 				struct iov_iter tmp_from = ctx->iter;
3639 
3640 				INIT_LIST_HEAD(&tmp_list);
3641 				list_del_init(&wdata->list);
3642 
3643 				if (ctx->direct_io)
3644 					rc = cifs_resend_wdata(
3645 						wdata, &tmp_list, ctx);
3646 				else {
3647 					iov_iter_advance(&tmp_from,
3648 						 wdata->offset - ctx->pos);
3649 
3650 					rc = cifs_write_from_iter(wdata->offset,
3651 						wdata->bytes, &tmp_from,
3652 						ctx->cfile, cifs_sb, &tmp_list,
3653 						ctx);
3654 
3655 					kref_put(&wdata->refcount,
3656 						cifs_uncached_writedata_release);
3657 				}
3658 
3659 				list_splice(&tmp_list, &ctx->list);
3660 				goto restart_loop;
3661 			}
3662 		}
3663 		list_del_init(&wdata->list);
3664 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3665 	}
3666 
3667 	cifs_stats_bytes_written(tcon, ctx->total_len);
3668 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3669 
3670 	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3671 
3672 	mutex_unlock(&ctx->aio_mutex);
3673 
3674 	if (ctx->iocb && ctx->iocb->ki_complete)
3675 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3676 	else
3677 		complete(&ctx->done);
3678 }
3679 
3680 static ssize_t __cifs_writev(
3681 	struct kiocb *iocb, struct iov_iter *from, bool direct)
3682 {
3683 	struct file *file = iocb->ki_filp;
3684 	ssize_t total_written = 0;
3685 	struct cifsFileInfo *cfile;
3686 	struct cifs_tcon *tcon;
3687 	struct cifs_sb_info *cifs_sb;
3688 	struct cifs_aio_ctx *ctx;
3689 	int rc;
3690 
3691 	rc = generic_write_checks(iocb, from);
3692 	if (rc <= 0)
3693 		return rc;
3694 
3695 	cifs_sb = CIFS_FILE_SB(file);
3696 	cfile = file->private_data;
3697 	tcon = tlink_tcon(cfile->tlink);
3698 
3699 	if (!tcon->ses->server->ops->async_writev)
3700 		return -ENOSYS;
3701 
3702 	ctx = cifs_aio_ctx_alloc();
3703 	if (!ctx)
3704 		return -ENOMEM;
3705 
3706 	ctx->cfile = cifsFileInfo_get(cfile);
3707 
3708 	if (!is_sync_kiocb(iocb))
3709 		ctx->iocb = iocb;
3710 
3711 	ctx->pos = iocb->ki_pos;
3712 	ctx->direct_io = direct;
3713 	ctx->nr_pinned_pages = 0;
3714 
3715 	if (user_backed_iter(from)) {
3716 		/*
3717 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3718 		 * they contain references to the calling process's virtual
3719 		 * memory layout which won't be available in an async worker
3720 		 * thread.  This also takes a pin on every folio involved.
3721 		 */
3722 		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3723 					     &ctx->iter, 0);
3724 		if (rc < 0) {
3725 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3726 			return rc;
3727 		}
3728 
3729 		ctx->nr_pinned_pages = rc;
3730 		ctx->bv = (void *)ctx->iter.bvec;
3731 		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3732 	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3733 		   !is_sync_kiocb(iocb)) {
3734 		/*
3735 		 * If the op is asynchronous, we need to copy the list attached
3736 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3737 		 * will be pinned by the caller; in any case, we may or may not
3738 		 * be able to pin the pages, so we don't try.
3739 		 */
3740 		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3741 		if (!ctx->bv) {
3742 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3743 			return -ENOMEM;
3744 		}
3745 	} else {
3746 		/*
3747 		 * Otherwise, we just pass the iterator down as-is and rely on
3748 		 * the caller to make sure the pages referred to by the
3749 		 * iterator don't evaporate.
3750 		 */
3751 		ctx->iter = *from;
3752 	}
3753 
3754 	ctx->len = iov_iter_count(&ctx->iter);
3755 
3756 	/* grab a lock here due to read response handlers can access ctx */
3757 	mutex_lock(&ctx->aio_mutex);
3758 
3759 	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3760 				  cfile, cifs_sb, &ctx->list, ctx);
3761 
3762 	/*
3763 	 * If at least one write was successfully sent, then discard any rc
3764 	 * value from the later writes. If the other write succeeds, then
3765 	 * we'll end up returning whatever was written. If it fails, then
3766 	 * we'll get a new rc value from that.
3767 	 */
3768 	if (!list_empty(&ctx->list))
3769 		rc = 0;
3770 
3771 	mutex_unlock(&ctx->aio_mutex);
3772 
3773 	if (rc) {
3774 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3775 		return rc;
3776 	}
3777 
3778 	if (!is_sync_kiocb(iocb)) {
3779 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3780 		return -EIOCBQUEUED;
3781 	}
3782 
3783 	rc = wait_for_completion_killable(&ctx->done);
3784 	if (rc) {
3785 		mutex_lock(&ctx->aio_mutex);
3786 		ctx->rc = rc = -EINTR;
3787 		total_written = ctx->total_len;
3788 		mutex_unlock(&ctx->aio_mutex);
3789 	} else {
3790 		rc = ctx->rc;
3791 		total_written = ctx->total_len;
3792 	}
3793 
3794 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3795 
3796 	if (unlikely(!total_written))
3797 		return rc;
3798 
3799 	iocb->ki_pos += total_written;
3800 	return total_written;
3801 }
3802 
3803 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3804 {
3805 	struct file *file = iocb->ki_filp;
3806 
3807 	cifs_revalidate_mapping(file->f_inode);
3808 	return __cifs_writev(iocb, from, true);
3809 }
3810 
3811 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3812 {
3813 	return __cifs_writev(iocb, from, false);
3814 }
3815 
3816 static ssize_t
3817 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3818 {
3819 	struct file *file = iocb->ki_filp;
3820 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3821 	struct inode *inode = file->f_mapping->host;
3822 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3823 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3824 	ssize_t rc;
3825 
3826 	inode_lock(inode);
3827 	/*
3828 	 * We need to hold the sem to be sure nobody modifies lock list
3829 	 * with a brlock that prevents writing.
3830 	 */
3831 	down_read(&cinode->lock_sem);
3832 
3833 	rc = generic_write_checks(iocb, from);
3834 	if (rc <= 0)
3835 		goto out;
3836 
3837 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3838 				     server->vals->exclusive_lock_type, 0,
3839 				     NULL, CIFS_WRITE_OP))
3840 		rc = __generic_file_write_iter(iocb, from);
3841 	else
3842 		rc = -EACCES;
3843 out:
3844 	up_read(&cinode->lock_sem);
3845 	inode_unlock(inode);
3846 
3847 	if (rc > 0)
3848 		rc = generic_write_sync(iocb, rc);
3849 	return rc;
3850 }
3851 
3852 ssize_t
3853 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3854 {
3855 	struct inode *inode = file_inode(iocb->ki_filp);
3856 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3857 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3858 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3859 						iocb->ki_filp->private_data;
3860 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3861 	ssize_t written;
3862 
3863 	written = cifs_get_writer(cinode);
3864 	if (written)
3865 		return written;
3866 
3867 	if (CIFS_CACHE_WRITE(cinode)) {
3868 		if (cap_unix(tcon->ses) &&
3869 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3870 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3871 			written = generic_file_write_iter(iocb, from);
3872 			goto out;
3873 		}
3874 		written = cifs_writev(iocb, from);
3875 		goto out;
3876 	}
3877 	/*
3878 	 * For non-oplocked files in strict cache mode we need to write the data
3879 	 * to the server exactly from the pos to pos+len-1 rather than flush all
3880 	 * affected pages because it may cause a error with mandatory locks on
3881 	 * these pages but not on the region from pos to ppos+len-1.
3882 	 */
3883 	written = cifs_user_writev(iocb, from);
3884 	if (CIFS_CACHE_READ(cinode)) {
3885 		/*
3886 		 * We have read level caching and we have just sent a write
3887 		 * request to the server thus making data in the cache stale.
3888 		 * Zap the cache and set oplock/lease level to NONE to avoid
3889 		 * reading stale data from the cache. All subsequent read
3890 		 * operations will read new data from the server.
3891 		 */
3892 		cifs_zap_mapping(inode);
3893 		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3894 			 inode);
3895 		cinode->oplock = 0;
3896 	}
3897 out:
3898 	cifs_put_writer(cinode);
3899 	return written;
3900 }
3901 
3902 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3903 {
3904 	struct cifs_readdata *rdata;
3905 
3906 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3907 	if (rdata) {
3908 		kref_init(&rdata->refcount);
3909 		INIT_LIST_HEAD(&rdata->list);
3910 		init_completion(&rdata->done);
3911 		INIT_WORK(&rdata->work, complete);
3912 	}
3913 
3914 	return rdata;
3915 }
3916 
3917 void
3918 cifs_readdata_release(struct kref *refcount)
3919 {
3920 	struct cifs_readdata *rdata = container_of(refcount,
3921 					struct cifs_readdata, refcount);
3922 
3923 	if (rdata->ctx)
3924 		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3925 #ifdef CONFIG_CIFS_SMB_DIRECT
3926 	if (rdata->mr) {
3927 		smbd_deregister_mr(rdata->mr);
3928 		rdata->mr = NULL;
3929 	}
3930 #endif
3931 	if (rdata->cfile)
3932 		cifsFileInfo_put(rdata->cfile);
3933 
3934 	kfree(rdata);
3935 }
3936 
3937 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3938 
3939 static void
3940 cifs_uncached_readv_complete(struct work_struct *work)
3941 {
3942 	struct cifs_readdata *rdata = container_of(work,
3943 						struct cifs_readdata, work);
3944 
3945 	complete(&rdata->done);
3946 	collect_uncached_read_data(rdata->ctx);
3947 	/* the below call can possibly free the last ref to aio ctx */
3948 	kref_put(&rdata->refcount, cifs_readdata_release);
3949 }
3950 
3951 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3952 			struct list_head *rdata_list,
3953 			struct cifs_aio_ctx *ctx)
3954 {
3955 	unsigned int rsize;
3956 	struct cifs_credits credits;
3957 	int rc;
3958 	struct TCP_Server_Info *server;
3959 
3960 	/* XXX: should we pick a new channel here? */
3961 	server = rdata->server;
3962 
3963 	do {
3964 		if (rdata->cfile->invalidHandle) {
3965 			rc = cifs_reopen_file(rdata->cfile, true);
3966 			if (rc == -EAGAIN)
3967 				continue;
3968 			else if (rc)
3969 				break;
3970 		}
3971 
3972 		/*
3973 		 * Wait for credits to resend this rdata.
3974 		 * Note: we are attempting to resend the whole rdata not in
3975 		 * segments
3976 		 */
3977 		do {
3978 			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3979 						&rsize, &credits);
3980 
3981 			if (rc)
3982 				goto fail;
3983 
3984 			if (rsize < rdata->bytes) {
3985 				add_credits_and_wake_if(server, &credits, 0);
3986 				msleep(1000);
3987 			}
3988 		} while (rsize < rdata->bytes);
3989 		rdata->credits = credits;
3990 
3991 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3992 		if (!rc) {
3993 			if (rdata->cfile->invalidHandle)
3994 				rc = -EAGAIN;
3995 			else {
3996 #ifdef CONFIG_CIFS_SMB_DIRECT
3997 				if (rdata->mr) {
3998 					rdata->mr->need_invalidate = true;
3999 					smbd_deregister_mr(rdata->mr);
4000 					rdata->mr = NULL;
4001 				}
4002 #endif
4003 				rc = server->ops->async_readv(rdata);
4004 			}
4005 		}
4006 
4007 		/* If the read was successfully sent, we are done */
4008 		if (!rc) {
4009 			/* Add to aio pending list */
4010 			list_add_tail(&rdata->list, rdata_list);
4011 			return 0;
4012 		}
4013 
4014 		/* Roll back credits and retry if needed */
4015 		add_credits_and_wake_if(server, &rdata->credits, 0);
4016 	} while (rc == -EAGAIN);
4017 
4018 fail:
4019 	kref_put(&rdata->refcount, cifs_readdata_release);
4020 	return rc;
4021 }
4022 
4023 static int
4024 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
4025 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4026 		     struct cifs_aio_ctx *ctx)
4027 {
4028 	struct cifs_readdata *rdata;
4029 	unsigned int rsize, nsegs, max_segs = INT_MAX;
4030 	struct cifs_credits credits_on_stack;
4031 	struct cifs_credits *credits = &credits_on_stack;
4032 	size_t cur_len, max_len;
4033 	int rc;
4034 	pid_t pid;
4035 	struct TCP_Server_Info *server;
4036 
4037 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4038 
4039 #ifdef CONFIG_CIFS_SMB_DIRECT
4040 	if (server->smbd_conn)
4041 		max_segs = server->smbd_conn->max_frmr_depth;
4042 #endif
4043 
4044 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4045 		pid = open_file->pid;
4046 	else
4047 		pid = current->tgid;
4048 
4049 	do {
4050 		if (open_file->invalidHandle) {
4051 			rc = cifs_reopen_file(open_file, true);
4052 			if (rc == -EAGAIN)
4053 				continue;
4054 			else if (rc)
4055 				break;
4056 		}
4057 
4058 		if (cifs_sb->ctx->rsize == 0)
4059 			cifs_sb->ctx->rsize =
4060 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4061 							     cifs_sb->ctx);
4062 
4063 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4064 						   &rsize, credits);
4065 		if (rc)
4066 			break;
4067 
4068 		max_len = min_t(size_t, len, rsize);
4069 
4070 		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4071 						 max_segs, &nsegs);
4072 		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4073 			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4074 		if (cur_len == 0) {
4075 			rc = -EIO;
4076 			add_credits_and_wake_if(server, credits, 0);
4077 			break;
4078 		}
4079 
4080 		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4081 		if (!rdata) {
4082 			add_credits_and_wake_if(server, credits, 0);
4083 			rc = -ENOMEM;
4084 			break;
4085 		}
4086 
4087 		rdata->server	= server;
4088 		rdata->cfile	= cifsFileInfo_get(open_file);
4089 		rdata->offset	= fpos;
4090 		rdata->bytes	= cur_len;
4091 		rdata->pid	= pid;
4092 		rdata->credits	= credits_on_stack;
4093 		rdata->ctx	= ctx;
4094 		kref_get(&ctx->refcount);
4095 
4096 		rdata->iter	= ctx->iter;
4097 		iov_iter_truncate(&rdata->iter, cur_len);
4098 
4099 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4100 
4101 		if (!rc) {
4102 			if (rdata->cfile->invalidHandle)
4103 				rc = -EAGAIN;
4104 			else
4105 				rc = server->ops->async_readv(rdata);
4106 		}
4107 
4108 		if (rc) {
4109 			add_credits_and_wake_if(server, &rdata->credits, 0);
4110 			kref_put(&rdata->refcount, cifs_readdata_release);
4111 			if (rc == -EAGAIN)
4112 				continue;
4113 			break;
4114 		}
4115 
4116 		list_add_tail(&rdata->list, rdata_list);
4117 		iov_iter_advance(&ctx->iter, cur_len);
4118 		fpos += cur_len;
4119 		len -= cur_len;
4120 	} while (len > 0);
4121 
4122 	return rc;
4123 }
4124 
4125 static void
4126 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4127 {
4128 	struct cifs_readdata *rdata, *tmp;
4129 	struct cifs_sb_info *cifs_sb;
4130 	int rc;
4131 
4132 	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4133 
4134 	mutex_lock(&ctx->aio_mutex);
4135 
4136 	if (list_empty(&ctx->list)) {
4137 		mutex_unlock(&ctx->aio_mutex);
4138 		return;
4139 	}
4140 
4141 	rc = ctx->rc;
4142 	/* the loop below should proceed in the order of increasing offsets */
4143 again:
4144 	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4145 		if (!rc) {
4146 			if (!try_wait_for_completion(&rdata->done)) {
4147 				mutex_unlock(&ctx->aio_mutex);
4148 				return;
4149 			}
4150 
4151 			if (rdata->result == -EAGAIN) {
4152 				/* resend call if it's a retryable error */
4153 				struct list_head tmp_list;
4154 				unsigned int got_bytes = rdata->got_bytes;
4155 
4156 				list_del_init(&rdata->list);
4157 				INIT_LIST_HEAD(&tmp_list);
4158 
4159 				if (ctx->direct_io) {
4160 					/*
4161 					 * Re-use rdata as this is a
4162 					 * direct I/O
4163 					 */
4164 					rc = cifs_resend_rdata(
4165 						rdata,
4166 						&tmp_list, ctx);
4167 				} else {
4168 					rc = cifs_send_async_read(
4169 						rdata->offset + got_bytes,
4170 						rdata->bytes - got_bytes,
4171 						rdata->cfile, cifs_sb,
4172 						&tmp_list, ctx);
4173 
4174 					kref_put(&rdata->refcount,
4175 						cifs_readdata_release);
4176 				}
4177 
4178 				list_splice(&tmp_list, &ctx->list);
4179 
4180 				goto again;
4181 			} else if (rdata->result)
4182 				rc = rdata->result;
4183 
4184 			/* if there was a short read -- discard anything left */
4185 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4186 				rc = -ENODATA;
4187 
4188 			ctx->total_len += rdata->got_bytes;
4189 		}
4190 		list_del_init(&rdata->list);
4191 		kref_put(&rdata->refcount, cifs_readdata_release);
4192 	}
4193 
4194 	/* mask nodata case */
4195 	if (rc == -ENODATA)
4196 		rc = 0;
4197 
4198 	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4199 
4200 	mutex_unlock(&ctx->aio_mutex);
4201 
4202 	if (ctx->iocb && ctx->iocb->ki_complete)
4203 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4204 	else
4205 		complete(&ctx->done);
4206 }
4207 
4208 static ssize_t __cifs_readv(
4209 	struct kiocb *iocb, struct iov_iter *to, bool direct)
4210 {
4211 	size_t len;
4212 	struct file *file = iocb->ki_filp;
4213 	struct cifs_sb_info *cifs_sb;
4214 	struct cifsFileInfo *cfile;
4215 	struct cifs_tcon *tcon;
4216 	ssize_t rc, total_read = 0;
4217 	loff_t offset = iocb->ki_pos;
4218 	struct cifs_aio_ctx *ctx;
4219 
4220 	len = iov_iter_count(to);
4221 	if (!len)
4222 		return 0;
4223 
4224 	cifs_sb = CIFS_FILE_SB(file);
4225 	cfile = file->private_data;
4226 	tcon = tlink_tcon(cfile->tlink);
4227 
4228 	if (!tcon->ses->server->ops->async_readv)
4229 		return -ENOSYS;
4230 
4231 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4232 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4233 
4234 	ctx = cifs_aio_ctx_alloc();
4235 	if (!ctx)
4236 		return -ENOMEM;
4237 
4238 	ctx->pos	= offset;
4239 	ctx->direct_io	= direct;
4240 	ctx->len	= len;
4241 	ctx->cfile	= cifsFileInfo_get(cfile);
4242 	ctx->nr_pinned_pages = 0;
4243 
4244 	if (!is_sync_kiocb(iocb))
4245 		ctx->iocb = iocb;
4246 
4247 	if (user_backed_iter(to)) {
4248 		/*
4249 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4250 		 * they contain references to the calling process's virtual
4251 		 * memory layout which won't be available in an async worker
4252 		 * thread.  This also takes a pin on every folio involved.
4253 		 */
4254 		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4255 					     &ctx->iter, 0);
4256 		if (rc < 0) {
4257 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4258 			return rc;
4259 		}
4260 
4261 		ctx->nr_pinned_pages = rc;
4262 		ctx->bv = (void *)ctx->iter.bvec;
4263 		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4264 		ctx->should_dirty = true;
4265 	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4266 		   !is_sync_kiocb(iocb)) {
4267 		/*
4268 		 * If the op is asynchronous, we need to copy the list attached
4269 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4270 		 * will be retained by the caller; in any case, we may or may
4271 		 * not be able to pin the pages, so we don't try.
4272 		 */
4273 		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4274 		if (!ctx->bv) {
4275 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4276 			return -ENOMEM;
4277 		}
4278 	} else {
4279 		/*
4280 		 * Otherwise, we just pass the iterator down as-is and rely on
4281 		 * the caller to make sure the pages referred to by the
4282 		 * iterator don't evaporate.
4283 		 */
4284 		ctx->iter = *to;
4285 	}
4286 
4287 	if (direct) {
4288 		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4289 						  offset, offset + len - 1);
4290 		if (rc) {
4291 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4292 			return -EAGAIN;
4293 		}
4294 	}
4295 
4296 	/* grab a lock here due to read response handlers can access ctx */
4297 	mutex_lock(&ctx->aio_mutex);
4298 
4299 	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4300 
4301 	/* if at least one read request send succeeded, then reset rc */
4302 	if (!list_empty(&ctx->list))
4303 		rc = 0;
4304 
4305 	mutex_unlock(&ctx->aio_mutex);
4306 
4307 	if (rc) {
4308 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4309 		return rc;
4310 	}
4311 
4312 	if (!is_sync_kiocb(iocb)) {
4313 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4314 		return -EIOCBQUEUED;
4315 	}
4316 
4317 	rc = wait_for_completion_killable(&ctx->done);
4318 	if (rc) {
4319 		mutex_lock(&ctx->aio_mutex);
4320 		ctx->rc = rc = -EINTR;
4321 		total_read = ctx->total_len;
4322 		mutex_unlock(&ctx->aio_mutex);
4323 	} else {
4324 		rc = ctx->rc;
4325 		total_read = ctx->total_len;
4326 	}
4327 
4328 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4329 
4330 	if (total_read) {
4331 		iocb->ki_pos += total_read;
4332 		return total_read;
4333 	}
4334 	return rc;
4335 }
4336 
4337 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4338 {
4339 	return __cifs_readv(iocb, to, true);
4340 }
4341 
4342 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4343 {
4344 	return __cifs_readv(iocb, to, false);
4345 }
4346 
4347 ssize_t
4348 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4349 {
4350 	struct inode *inode = file_inode(iocb->ki_filp);
4351 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4352 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4353 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4354 						iocb->ki_filp->private_data;
4355 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4356 	int rc = -EACCES;
4357 
4358 	/*
4359 	 * In strict cache mode we need to read from the server all the time
4360 	 * if we don't have level II oplock because the server can delay mtime
4361 	 * change - so we can't make a decision about inode invalidating.
4362 	 * And we can also fail with pagereading if there are mandatory locks
4363 	 * on pages affected by this read but not on the region from pos to
4364 	 * pos+len-1.
4365 	 */
4366 	if (!CIFS_CACHE_READ(cinode))
4367 		return cifs_user_readv(iocb, to);
4368 
4369 	if (cap_unix(tcon->ses) &&
4370 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4371 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4372 		return generic_file_read_iter(iocb, to);
4373 
4374 	/*
4375 	 * We need to hold the sem to be sure nobody modifies lock list
4376 	 * with a brlock that prevents reading.
4377 	 */
4378 	down_read(&cinode->lock_sem);
4379 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4380 				     tcon->ses->server->vals->shared_lock_type,
4381 				     0, NULL, CIFS_READ_OP))
4382 		rc = generic_file_read_iter(iocb, to);
4383 	up_read(&cinode->lock_sem);
4384 	return rc;
4385 }
4386 
4387 static ssize_t
4388 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4389 {
4390 	int rc = -EACCES;
4391 	unsigned int bytes_read = 0;
4392 	unsigned int total_read;
4393 	unsigned int current_read_size;
4394 	unsigned int rsize;
4395 	struct cifs_sb_info *cifs_sb;
4396 	struct cifs_tcon *tcon;
4397 	struct TCP_Server_Info *server;
4398 	unsigned int xid;
4399 	char *cur_offset;
4400 	struct cifsFileInfo *open_file;
4401 	struct cifs_io_parms io_parms = {0};
4402 	int buf_type = CIFS_NO_BUFFER;
4403 	__u32 pid;
4404 
4405 	xid = get_xid();
4406 	cifs_sb = CIFS_FILE_SB(file);
4407 
4408 	/* FIXME: set up handlers for larger reads and/or convert to async */
4409 	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4410 
4411 	if (file->private_data == NULL) {
4412 		rc = -EBADF;
4413 		free_xid(xid);
4414 		return rc;
4415 	}
4416 	open_file = file->private_data;
4417 	tcon = tlink_tcon(open_file->tlink);
4418 	server = cifs_pick_channel(tcon->ses);
4419 
4420 	if (!server->ops->sync_read) {
4421 		free_xid(xid);
4422 		return -ENOSYS;
4423 	}
4424 
4425 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4426 		pid = open_file->pid;
4427 	else
4428 		pid = current->tgid;
4429 
4430 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4431 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4432 
4433 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4434 	     total_read += bytes_read, cur_offset += bytes_read) {
4435 		do {
4436 			current_read_size = min_t(uint, read_size - total_read,
4437 						  rsize);
4438 			/*
4439 			 * For windows me and 9x we do not want to request more
4440 			 * than it negotiated since it will refuse the read
4441 			 * then.
4442 			 */
4443 			if (!(tcon->ses->capabilities &
4444 				tcon->ses->server->vals->cap_large_files)) {
4445 				current_read_size = min_t(uint,
4446 					current_read_size, CIFSMaxBufSize);
4447 			}
4448 			if (open_file->invalidHandle) {
4449 				rc = cifs_reopen_file(open_file, true);
4450 				if (rc != 0)
4451 					break;
4452 			}
4453 			io_parms.pid = pid;
4454 			io_parms.tcon = tcon;
4455 			io_parms.offset = *offset;
4456 			io_parms.length = current_read_size;
4457 			io_parms.server = server;
4458 			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4459 						    &bytes_read, &cur_offset,
4460 						    &buf_type);
4461 		} while (rc == -EAGAIN);
4462 
4463 		if (rc || (bytes_read == 0)) {
4464 			if (total_read) {
4465 				break;
4466 			} else {
4467 				free_xid(xid);
4468 				return rc;
4469 			}
4470 		} else {
4471 			cifs_stats_bytes_read(tcon, total_read);
4472 			*offset += bytes_read;
4473 		}
4474 	}
4475 	free_xid(xid);
4476 	return total_read;
4477 }
4478 
4479 /*
4480  * If the page is mmap'ed into a process' page tables, then we need to make
4481  * sure that it doesn't change while being written back.
4482  */
4483 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4484 {
4485 	struct folio *folio = page_folio(vmf->page);
4486 
4487 	/* Wait for the folio to be written to the cache before we allow it to
4488 	 * be modified.  We then assume the entire folio will need writing back.
4489 	 */
4490 #ifdef CONFIG_CIFS_FSCACHE
4491 	if (folio_test_fscache(folio) &&
4492 	    folio_wait_fscache_killable(folio) < 0)
4493 		return VM_FAULT_RETRY;
4494 #endif
4495 
4496 	folio_wait_writeback(folio);
4497 
4498 	if (folio_lock_killable(folio) < 0)
4499 		return VM_FAULT_RETRY;
4500 	return VM_FAULT_LOCKED;
4501 }
4502 
4503 static const struct vm_operations_struct cifs_file_vm_ops = {
4504 	.fault = filemap_fault,
4505 	.map_pages = filemap_map_pages,
4506 	.page_mkwrite = cifs_page_mkwrite,
4507 };
4508 
4509 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4510 {
4511 	int xid, rc = 0;
4512 	struct inode *inode = file_inode(file);
4513 
4514 	xid = get_xid();
4515 
4516 	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4517 		rc = cifs_zap_mapping(inode);
4518 	if (!rc)
4519 		rc = generic_file_mmap(file, vma);
4520 	if (!rc)
4521 		vma->vm_ops = &cifs_file_vm_ops;
4522 
4523 	free_xid(xid);
4524 	return rc;
4525 }
4526 
4527 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4528 {
4529 	int rc, xid;
4530 
4531 	xid = get_xid();
4532 
4533 	rc = cifs_revalidate_file(file);
4534 	if (rc)
4535 		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4536 			 rc);
4537 	if (!rc)
4538 		rc = generic_file_mmap(file, vma);
4539 	if (!rc)
4540 		vma->vm_ops = &cifs_file_vm_ops;
4541 
4542 	free_xid(xid);
4543 	return rc;
4544 }
4545 
4546 /*
4547  * Unlock a bunch of folios in the pagecache.
4548  */
4549 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4550 {
4551 	struct folio *folio;
4552 	XA_STATE(xas, &mapping->i_pages, first);
4553 
4554 	rcu_read_lock();
4555 	xas_for_each(&xas, folio, last) {
4556 		folio_unlock(folio);
4557 	}
4558 	rcu_read_unlock();
4559 }
4560 
4561 static void cifs_readahead_complete(struct work_struct *work)
4562 {
4563 	struct cifs_readdata *rdata = container_of(work,
4564 						   struct cifs_readdata, work);
4565 	struct folio *folio;
4566 	pgoff_t last;
4567 	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4568 
4569 	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4570 
4571 	if (good)
4572 		cifs_readahead_to_fscache(rdata->mapping->host,
4573 					  rdata->offset, rdata->bytes);
4574 
4575 	if (iov_iter_count(&rdata->iter) > 0)
4576 		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4577 
4578 	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4579 
4580 	rcu_read_lock();
4581 	xas_for_each(&xas, folio, last) {
4582 		if (good) {
4583 			flush_dcache_folio(folio);
4584 			folio_mark_uptodate(folio);
4585 		}
4586 		folio_unlock(folio);
4587 	}
4588 	rcu_read_unlock();
4589 
4590 	kref_put(&rdata->refcount, cifs_readdata_release);
4591 }
4592 
4593 static void cifs_readahead(struct readahead_control *ractl)
4594 {
4595 	struct cifsFileInfo *open_file = ractl->file->private_data;
4596 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4597 	struct TCP_Server_Info *server;
4598 	unsigned int xid, nr_pages, cache_nr_pages = 0;
4599 	unsigned int ra_pages;
4600 	pgoff_t next_cached = ULONG_MAX, ra_index;
4601 	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4602 		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4603 	bool check_cache = caching;
4604 	pid_t pid;
4605 	int rc = 0;
4606 
4607 	/* Note that readahead_count() lags behind our dequeuing of pages from
4608 	 * the ractl, wo we have to keep track for ourselves.
4609 	 */
4610 	ra_pages = readahead_count(ractl);
4611 	ra_index = readahead_index(ractl);
4612 
4613 	xid = get_xid();
4614 
4615 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4616 		pid = open_file->pid;
4617 	else
4618 		pid = current->tgid;
4619 
4620 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4621 
4622 	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4623 		 __func__, ractl->file, ractl->mapping, ra_pages);
4624 
4625 	/*
4626 	 * Chop the readahead request up into rsize-sized read requests.
4627 	 */
4628 	while ((nr_pages = ra_pages)) {
4629 		unsigned int i, rsize;
4630 		struct cifs_readdata *rdata;
4631 		struct cifs_credits credits_on_stack;
4632 		struct cifs_credits *credits = &credits_on_stack;
4633 		struct folio *folio;
4634 		pgoff_t fsize;
4635 
4636 		/*
4637 		 * Find out if we have anything cached in the range of
4638 		 * interest, and if so, where the next chunk of cached data is.
4639 		 */
4640 		if (caching) {
4641 			if (check_cache) {
4642 				rc = cifs_fscache_query_occupancy(
4643 					ractl->mapping->host, ra_index, nr_pages,
4644 					&next_cached, &cache_nr_pages);
4645 				if (rc < 0)
4646 					caching = false;
4647 				check_cache = false;
4648 			}
4649 
4650 			if (ra_index == next_cached) {
4651 				/*
4652 				 * TODO: Send a whole batch of pages to be read
4653 				 * by the cache.
4654 				 */
4655 				folio = readahead_folio(ractl);
4656 				fsize = folio_nr_pages(folio);
4657 				ra_pages -= fsize;
4658 				ra_index += fsize;
4659 				if (cifs_readpage_from_fscache(ractl->mapping->host,
4660 							       &folio->page) < 0) {
4661 					/*
4662 					 * TODO: Deal with cache read failure
4663 					 * here, but for the moment, delegate
4664 					 * that to readpage.
4665 					 */
4666 					caching = false;
4667 				}
4668 				folio_unlock(folio);
4669 				next_cached += fsize;
4670 				cache_nr_pages -= fsize;
4671 				if (cache_nr_pages == 0)
4672 					check_cache = true;
4673 				continue;
4674 			}
4675 		}
4676 
4677 		if (open_file->invalidHandle) {
4678 			rc = cifs_reopen_file(open_file, true);
4679 			if (rc) {
4680 				if (rc == -EAGAIN)
4681 					continue;
4682 				break;
4683 			}
4684 		}
4685 
4686 		if (cifs_sb->ctx->rsize == 0)
4687 			cifs_sb->ctx->rsize =
4688 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4689 							     cifs_sb->ctx);
4690 
4691 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4692 						   &rsize, credits);
4693 		if (rc)
4694 			break;
4695 		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4696 		if (next_cached != ULONG_MAX)
4697 			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4698 
4699 		/*
4700 		 * Give up immediately if rsize is too small to read an entire
4701 		 * page. The VFS will fall back to readpage. We should never
4702 		 * reach this point however since we set ra_pages to 0 when the
4703 		 * rsize is smaller than a cache page.
4704 		 */
4705 		if (unlikely(!nr_pages)) {
4706 			add_credits_and_wake_if(server, credits, 0);
4707 			break;
4708 		}
4709 
4710 		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4711 		if (!rdata) {
4712 			/* best to give up if we're out of mem */
4713 			add_credits_and_wake_if(server, credits, 0);
4714 			break;
4715 		}
4716 
4717 		rdata->offset	= ra_index * PAGE_SIZE;
4718 		rdata->bytes	= nr_pages * PAGE_SIZE;
4719 		rdata->cfile	= cifsFileInfo_get(open_file);
4720 		rdata->server	= server;
4721 		rdata->mapping	= ractl->mapping;
4722 		rdata->pid	= pid;
4723 		rdata->credits	= credits_on_stack;
4724 
4725 		for (i = 0; i < nr_pages; i++) {
4726 			if (!readahead_folio(ractl))
4727 				WARN_ON(1);
4728 		}
4729 		ra_pages -= nr_pages;
4730 		ra_index += nr_pages;
4731 
4732 		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4733 				rdata->offset, rdata->bytes);
4734 
4735 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4736 		if (!rc) {
4737 			if (rdata->cfile->invalidHandle)
4738 				rc = -EAGAIN;
4739 			else
4740 				rc = server->ops->async_readv(rdata);
4741 		}
4742 
4743 		if (rc) {
4744 			add_credits_and_wake_if(server, &rdata->credits, 0);
4745 			cifs_unlock_folios(rdata->mapping,
4746 					   rdata->offset / PAGE_SIZE,
4747 					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4748 			/* Fallback to the readpage in error/reconnect cases */
4749 			kref_put(&rdata->refcount, cifs_readdata_release);
4750 			break;
4751 		}
4752 
4753 		kref_put(&rdata->refcount, cifs_readdata_release);
4754 	}
4755 
4756 	free_xid(xid);
4757 }
4758 
4759 /*
4760  * cifs_readpage_worker must be called with the page pinned
4761  */
4762 static int cifs_readpage_worker(struct file *file, struct page *page,
4763 	loff_t *poffset)
4764 {
4765 	struct inode *inode = file_inode(file);
4766 	struct timespec64 atime, mtime;
4767 	char *read_data;
4768 	int rc;
4769 
4770 	/* Is the page cached? */
4771 	rc = cifs_readpage_from_fscache(inode, page);
4772 	if (rc == 0)
4773 		goto read_complete;
4774 
4775 	read_data = kmap(page);
4776 	/* for reads over a certain size could initiate async read ahead */
4777 
4778 	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4779 
4780 	if (rc < 0)
4781 		goto io_error;
4782 	else
4783 		cifs_dbg(FYI, "Bytes read %d\n", rc);
4784 
4785 	/* we do not want atime to be less than mtime, it broke some apps */
4786 	atime = inode_set_atime_to_ts(inode, current_time(inode));
4787 	mtime = inode_get_mtime(inode);
4788 	if (timespec64_compare(&atime, &mtime) < 0)
4789 		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4790 
4791 	if (PAGE_SIZE > rc)
4792 		memset(read_data + rc, 0, PAGE_SIZE - rc);
4793 
4794 	flush_dcache_page(page);
4795 	SetPageUptodate(page);
4796 	rc = 0;
4797 
4798 io_error:
4799 	kunmap(page);
4800 
4801 read_complete:
4802 	unlock_page(page);
4803 	return rc;
4804 }
4805 
4806 static int cifs_read_folio(struct file *file, struct folio *folio)
4807 {
4808 	struct page *page = &folio->page;
4809 	loff_t offset = page_file_offset(page);
4810 	int rc = -EACCES;
4811 	unsigned int xid;
4812 
4813 	xid = get_xid();
4814 
4815 	if (file->private_data == NULL) {
4816 		rc = -EBADF;
4817 		free_xid(xid);
4818 		return rc;
4819 	}
4820 
4821 	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4822 		 page, (int)offset, (int)offset);
4823 
4824 	rc = cifs_readpage_worker(file, page, &offset);
4825 
4826 	free_xid(xid);
4827 	return rc;
4828 }
4829 
4830 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4831 {
4832 	struct cifsFileInfo *open_file;
4833 
4834 	spin_lock(&cifs_inode->open_file_lock);
4835 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4836 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4837 			spin_unlock(&cifs_inode->open_file_lock);
4838 			return 1;
4839 		}
4840 	}
4841 	spin_unlock(&cifs_inode->open_file_lock);
4842 	return 0;
4843 }
4844 
4845 /* We do not want to update the file size from server for inodes
4846    open for write - to avoid races with writepage extending
4847    the file - in the future we could consider allowing
4848    refreshing the inode only on increases in the file size
4849    but this is tricky to do without racing with writebehind
4850    page caching in the current Linux kernel design */
4851 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4852 			    bool from_readdir)
4853 {
4854 	if (!cifsInode)
4855 		return true;
4856 
4857 	if (is_inode_writable(cifsInode) ||
4858 		((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4859 		/* This inode is open for write at least once */
4860 		struct cifs_sb_info *cifs_sb;
4861 
4862 		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4863 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4864 			/* since no page cache to corrupt on directio
4865 			we can change size safely */
4866 			return true;
4867 		}
4868 
4869 		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4870 			return true;
4871 
4872 		return false;
4873 	} else
4874 		return true;
4875 }
4876 
4877 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4878 			loff_t pos, unsigned len,
4879 			struct page **pagep, void **fsdata)
4880 {
4881 	int oncethru = 0;
4882 	pgoff_t index = pos >> PAGE_SHIFT;
4883 	loff_t offset = pos & (PAGE_SIZE - 1);
4884 	loff_t page_start = pos & PAGE_MASK;
4885 	loff_t i_size;
4886 	struct page *page;
4887 	int rc = 0;
4888 
4889 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4890 
4891 start:
4892 	page = grab_cache_page_write_begin(mapping, index);
4893 	if (!page) {
4894 		rc = -ENOMEM;
4895 		goto out;
4896 	}
4897 
4898 	if (PageUptodate(page))
4899 		goto out;
4900 
4901 	/*
4902 	 * If we write a full page it will be up to date, no need to read from
4903 	 * the server. If the write is short, we'll end up doing a sync write
4904 	 * instead.
4905 	 */
4906 	if (len == PAGE_SIZE)
4907 		goto out;
4908 
4909 	/*
4910 	 * optimize away the read when we have an oplock, and we're not
4911 	 * expecting to use any of the data we'd be reading in. That
4912 	 * is, when the page lies beyond the EOF, or straddles the EOF
4913 	 * and the write will cover all of the existing data.
4914 	 */
4915 	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4916 		i_size = i_size_read(mapping->host);
4917 		if (page_start >= i_size ||
4918 		    (offset == 0 && (pos + len) >= i_size)) {
4919 			zero_user_segments(page, 0, offset,
4920 					   offset + len,
4921 					   PAGE_SIZE);
4922 			/*
4923 			 * PageChecked means that the parts of the page
4924 			 * to which we're not writing are considered up
4925 			 * to date. Once the data is copied to the
4926 			 * page, it can be set uptodate.
4927 			 */
4928 			SetPageChecked(page);
4929 			goto out;
4930 		}
4931 	}
4932 
4933 	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4934 		/*
4935 		 * might as well read a page, it is fast enough. If we get
4936 		 * an error, we don't need to return it. cifs_write_end will
4937 		 * do a sync write instead since PG_uptodate isn't set.
4938 		 */
4939 		cifs_readpage_worker(file, page, &page_start);
4940 		put_page(page);
4941 		oncethru = 1;
4942 		goto start;
4943 	} else {
4944 		/* we could try using another file handle if there is one -
4945 		   but how would we lock it to prevent close of that handle
4946 		   racing with this read? In any case
4947 		   this will be written out by write_end so is fine */
4948 	}
4949 out:
4950 	*pagep = page;
4951 	return rc;
4952 }
4953 
4954 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4955 {
4956 	if (folio_test_private(folio))
4957 		return 0;
4958 	if (folio_test_fscache(folio)) {
4959 		if (current_is_kswapd() || !(gfp & __GFP_FS))
4960 			return false;
4961 		folio_wait_fscache(folio);
4962 	}
4963 	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4964 	return true;
4965 }
4966 
4967 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4968 				 size_t length)
4969 {
4970 	folio_wait_fscache(folio);
4971 }
4972 
4973 static int cifs_launder_folio(struct folio *folio)
4974 {
4975 	int rc = 0;
4976 	loff_t range_start = folio_pos(folio);
4977 	loff_t range_end = range_start + folio_size(folio);
4978 	struct writeback_control wbc = {
4979 		.sync_mode = WB_SYNC_ALL,
4980 		.nr_to_write = 0,
4981 		.range_start = range_start,
4982 		.range_end = range_end,
4983 	};
4984 
4985 	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4986 
4987 	if (folio_clear_dirty_for_io(folio))
4988 		rc = cifs_writepage_locked(&folio->page, &wbc);
4989 
4990 	folio_wait_fscache(folio);
4991 	return rc;
4992 }
4993 
4994 void cifs_oplock_break(struct work_struct *work)
4995 {
4996 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4997 						  oplock_break);
4998 	struct inode *inode = d_inode(cfile->dentry);
4999 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5000 	struct cifsInodeInfo *cinode = CIFS_I(inode);
5001 	struct cifs_tcon *tcon;
5002 	struct TCP_Server_Info *server;
5003 	struct tcon_link *tlink;
5004 	int rc = 0;
5005 	bool purge_cache = false, oplock_break_cancelled;
5006 	__u64 persistent_fid, volatile_fid;
5007 	__u16 net_fid;
5008 
5009 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5010 			TASK_UNINTERRUPTIBLE);
5011 
5012 	tlink = cifs_sb_tlink(cifs_sb);
5013 	if (IS_ERR(tlink))
5014 		goto out;
5015 	tcon = tlink_tcon(tlink);
5016 	server = tcon->ses->server;
5017 
5018 	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5019 				      cfile->oplock_epoch, &purge_cache);
5020 
5021 	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5022 						cifs_has_mand_locks(cinode)) {
5023 		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5024 			 inode);
5025 		cinode->oplock = 0;
5026 	}
5027 
5028 	if (inode && S_ISREG(inode->i_mode)) {
5029 		if (CIFS_CACHE_READ(cinode))
5030 			break_lease(inode, O_RDONLY);
5031 		else
5032 			break_lease(inode, O_WRONLY);
5033 		rc = filemap_fdatawrite(inode->i_mapping);
5034 		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5035 			rc = filemap_fdatawait(inode->i_mapping);
5036 			mapping_set_error(inode->i_mapping, rc);
5037 			cifs_zap_mapping(inode);
5038 		}
5039 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5040 		if (CIFS_CACHE_WRITE(cinode))
5041 			goto oplock_break_ack;
5042 	}
5043 
5044 	rc = cifs_push_locks(cfile);
5045 	if (rc)
5046 		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5047 
5048 oplock_break_ack:
5049 	/*
5050 	 * When oplock break is received and there are no active
5051 	 * file handles but cached, then schedule deferred close immediately.
5052 	 * So, new open will not use cached handle.
5053 	 */
5054 
5055 	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5056 		cifs_close_deferred_file(cinode);
5057 
5058 	persistent_fid = cfile->fid.persistent_fid;
5059 	volatile_fid = cfile->fid.volatile_fid;
5060 	net_fid = cfile->fid.netfid;
5061 	oplock_break_cancelled = cfile->oplock_break_cancelled;
5062 
5063 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5064 	/*
5065 	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5066 	 * an acknowledgment to be sent when the file has already been closed.
5067 	 */
5068 	spin_lock(&cinode->open_file_lock);
5069 	/* check list empty since can race with kill_sb calling tree disconnect */
5070 	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5071 		spin_unlock(&cinode->open_file_lock);
5072 		rc = server->ops->oplock_response(tcon, persistent_fid,
5073 						  volatile_fid, net_fid, cinode);
5074 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5075 	} else
5076 		spin_unlock(&cinode->open_file_lock);
5077 
5078 	cifs_put_tlink(tlink);
5079 out:
5080 	cifs_done_oplock_break(cinode);
5081 }
5082 
5083 /*
5084  * The presence of cifs_direct_io() in the address space ops vector
5085  * allowes open() O_DIRECT flags which would have failed otherwise.
5086  *
5087  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5088  * so this method should never be called.
5089  *
5090  * Direct IO is not yet supported in the cached mode.
5091  */
5092 static ssize_t
5093 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5094 {
5095         /*
5096          * FIXME
5097          * Eventually need to support direct IO for non forcedirectio mounts
5098          */
5099         return -EINVAL;
5100 }
5101 
5102 static int cifs_swap_activate(struct swap_info_struct *sis,
5103 			      struct file *swap_file, sector_t *span)
5104 {
5105 	struct cifsFileInfo *cfile = swap_file->private_data;
5106 	struct inode *inode = swap_file->f_mapping->host;
5107 	unsigned long blocks;
5108 	long long isize;
5109 
5110 	cifs_dbg(FYI, "swap activate\n");
5111 
5112 	if (!swap_file->f_mapping->a_ops->swap_rw)
5113 		/* Cannot support swap */
5114 		return -EINVAL;
5115 
5116 	spin_lock(&inode->i_lock);
5117 	blocks = inode->i_blocks;
5118 	isize = inode->i_size;
5119 	spin_unlock(&inode->i_lock);
5120 	if (blocks*512 < isize) {
5121 		pr_warn("swap activate: swapfile has holes\n");
5122 		return -EINVAL;
5123 	}
5124 	*span = sis->pages;
5125 
5126 	pr_warn_once("Swap support over SMB3 is experimental\n");
5127 
5128 	/*
5129 	 * TODO: consider adding ACL (or documenting how) to prevent other
5130 	 * users (on this or other systems) from reading it
5131 	 */
5132 
5133 
5134 	/* TODO: add sk_set_memalloc(inet) or similar */
5135 
5136 	if (cfile)
5137 		cfile->swapfile = true;
5138 	/*
5139 	 * TODO: Since file already open, we can't open with DENY_ALL here
5140 	 * but we could add call to grab a byte range lock to prevent others
5141 	 * from reading or writing the file
5142 	 */
5143 
5144 	sis->flags |= SWP_FS_OPS;
5145 	return add_swap_extent(sis, 0, sis->max, 0);
5146 }
5147 
5148 static void cifs_swap_deactivate(struct file *file)
5149 {
5150 	struct cifsFileInfo *cfile = file->private_data;
5151 
5152 	cifs_dbg(FYI, "swap deactivate\n");
5153 
5154 	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5155 
5156 	if (cfile)
5157 		cfile->swapfile = false;
5158 
5159 	/* do we need to unpin (or unlock) the file */
5160 }
5161 
5162 /*
5163  * Mark a page as having been made dirty and thus needing writeback.  We also
5164  * need to pin the cache object to write back to.
5165  */
5166 #ifdef CONFIG_CIFS_FSCACHE
5167 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5168 {
5169 	return fscache_dirty_folio(mapping, folio,
5170 					cifs_inode_cookie(mapping->host));
5171 }
5172 #else
5173 #define cifs_dirty_folio filemap_dirty_folio
5174 #endif
5175 
5176 const struct address_space_operations cifs_addr_ops = {
5177 	.read_folio = cifs_read_folio,
5178 	.readahead = cifs_readahead,
5179 	.writepages = cifs_writepages,
5180 	.write_begin = cifs_write_begin,
5181 	.write_end = cifs_write_end,
5182 	.dirty_folio = cifs_dirty_folio,
5183 	.release_folio = cifs_release_folio,
5184 	.direct_IO = cifs_direct_io,
5185 	.invalidate_folio = cifs_invalidate_folio,
5186 	.launder_folio = cifs_launder_folio,
5187 	.migrate_folio = filemap_migrate_folio,
5188 	/*
5189 	 * TODO: investigate and if useful we could add an is_dirty_writeback
5190 	 * helper if needed
5191 	 */
5192 	.swap_activate = cifs_swap_activate,
5193 	.swap_deactivate = cifs_swap_deactivate,
5194 };
5195 
5196 /*
5197  * cifs_readahead requires the server to support a buffer large enough to
5198  * contain the header plus one complete page of data.  Otherwise, we need
5199  * to leave cifs_readahead out of the address space operations.
5200  */
5201 const struct address_space_operations cifs_addr_ops_smallbuf = {
5202 	.read_folio = cifs_read_folio,
5203 	.writepages = cifs_writepages,
5204 	.write_begin = cifs_write_begin,
5205 	.write_end = cifs_write_end,
5206 	.dirty_folio = cifs_dirty_folio,
5207 	.release_folio = cifs_release_folio,
5208 	.invalidate_folio = cifs_invalidate_folio,
5209 	.launder_folio = cifs_launder_folio,
5210 	.migrate_folio = filemap_migrate_folio,
5211 };
5212