xref: /openbmc/linux/fs/smb/client/file.c (revision e4232010)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39 
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45 	struct address_space *mapping = inode->i_mapping;
46 	struct folio *folio;
47 	pgoff_t end;
48 
49 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50 
51 	rcu_read_lock();
52 
53 	end = (start + len - 1) / PAGE_SIZE;
54 	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 		if (xas_retry(&xas, folio))
56 			continue;
57 		xas_pause(&xas);
58 		rcu_read_unlock();
59 		folio_lock(folio);
60 		folio_clear_dirty_for_io(folio);
61 		folio_unlock(folio);
62 		rcu_read_lock();
63 	}
64 
65 	rcu_read_unlock();
66 }
67 
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73 	struct address_space *mapping = inode->i_mapping;
74 	struct folio *folio;
75 	pgoff_t end;
76 
77 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78 
79 	if (!len)
80 		return;
81 
82 	rcu_read_lock();
83 
84 	end = (start + len - 1) / PAGE_SIZE;
85 	xas_for_each(&xas, folio, end) {
86 		if (xas_retry(&xas, folio))
87 			continue;
88 		if (!folio_test_writeback(folio)) {
89 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 				  len, start, folio->index, end);
91 			continue;
92 		}
93 
94 		folio_detach_private(folio);
95 		folio_end_writeback(folio);
96 	}
97 
98 	rcu_read_unlock();
99 }
100 
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106 	struct address_space *mapping = inode->i_mapping;
107 	struct folio *folio;
108 	pgoff_t end;
109 
110 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111 
112 	if (!len)
113 		return;
114 
115 	rcu_read_lock();
116 
117 	end = (start + len - 1) / PAGE_SIZE;
118 	xas_for_each(&xas, folio, end) {
119 		if (xas_retry(&xas, folio))
120 			continue;
121 		if (!folio_test_writeback(folio)) {
122 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 				  len, start, folio->index, end);
124 			continue;
125 		}
126 
127 		folio_set_error(folio);
128 		folio_end_writeback(folio);
129 	}
130 
131 	rcu_read_unlock();
132 }
133 
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139 	struct address_space *mapping = inode->i_mapping;
140 	struct folio *folio;
141 	pgoff_t end;
142 
143 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144 
145 	if (!len)
146 		return;
147 
148 	rcu_read_lock();
149 
150 	end = (start + len - 1) / PAGE_SIZE;
151 	xas_for_each(&xas, folio, end) {
152 		if (!folio_test_writeback(folio)) {
153 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 				  len, start, folio->index, end);
155 			continue;
156 		}
157 
158 		filemap_dirty_folio(folio->mapping, folio);
159 		folio_end_writeback(folio);
160 	}
161 
162 	rcu_read_unlock();
163 }
164 
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172 	struct cifsFileInfo *open_file = NULL;
173 	struct list_head *tmp;
174 	struct list_head *tmp1;
175 
176 	/* only send once per connect */
177 	spin_lock(&tcon->tc_lock);
178 	if (tcon->need_reconnect)
179 		tcon->status = TID_NEED_RECON;
180 
181 	if (tcon->status != TID_NEED_RECON) {
182 		spin_unlock(&tcon->tc_lock);
183 		return;
184 	}
185 	tcon->status = TID_IN_FILES_INVALIDATE;
186 	spin_unlock(&tcon->tc_lock);
187 
188 	/* list all files open on tree connection and mark them invalid */
189 	spin_lock(&tcon->open_file_lock);
190 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192 		open_file->invalidHandle = true;
193 		open_file->oplock_break_cancelled = true;
194 	}
195 	spin_unlock(&tcon->open_file_lock);
196 
197 	invalidate_all_cached_dirs(tcon);
198 	spin_lock(&tcon->tc_lock);
199 	if (tcon->status == TID_IN_FILES_INVALIDATE)
200 		tcon->status = TID_NEED_TCON;
201 	spin_unlock(&tcon->tc_lock);
202 
203 	/*
204 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205 	 * to this tcon.
206 	 */
207 }
208 
209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
210 {
211 	if ((flags & O_ACCMODE) == O_RDONLY)
212 		return GENERIC_READ;
213 	else if ((flags & O_ACCMODE) == O_WRONLY)
214 		return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
215 	else if ((flags & O_ACCMODE) == O_RDWR) {
216 		/* GENERIC_ALL is too much permission to request
217 		   can cause unnecessary access denied on create */
218 		/* return GENERIC_ALL; */
219 		return (GENERIC_READ | GENERIC_WRITE);
220 	}
221 
222 	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223 		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224 		FILE_READ_DATA);
225 }
226 
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
229 {
230 	u32 posix_flags = 0;
231 
232 	if ((flags & O_ACCMODE) == O_RDONLY)
233 		posix_flags = SMB_O_RDONLY;
234 	else if ((flags & O_ACCMODE) == O_WRONLY)
235 		posix_flags = SMB_O_WRONLY;
236 	else if ((flags & O_ACCMODE) == O_RDWR)
237 		posix_flags = SMB_O_RDWR;
238 
239 	if (flags & O_CREAT) {
240 		posix_flags |= SMB_O_CREAT;
241 		if (flags & O_EXCL)
242 			posix_flags |= SMB_O_EXCL;
243 	} else if (flags & O_EXCL)
244 		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245 			 current->comm, current->tgid);
246 
247 	if (flags & O_TRUNC)
248 		posix_flags |= SMB_O_TRUNC;
249 	/* be safe and imply O_SYNC for O_DSYNC */
250 	if (flags & O_DSYNC)
251 		posix_flags |= SMB_O_SYNC;
252 	if (flags & O_DIRECTORY)
253 		posix_flags |= SMB_O_DIRECTORY;
254 	if (flags & O_NOFOLLOW)
255 		posix_flags |= SMB_O_NOFOLLOW;
256 	if (flags & O_DIRECT)
257 		posix_flags |= SMB_O_DIRECT;
258 
259 	return posix_flags;
260 }
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262 
263 static inline int cifs_get_disposition(unsigned int flags)
264 {
265 	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266 		return FILE_CREATE;
267 	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268 		return FILE_OVERWRITE_IF;
269 	else if ((flags & O_CREAT) == O_CREAT)
270 		return FILE_OPEN_IF;
271 	else if ((flags & O_TRUNC) == O_TRUNC)
272 		return FILE_OVERWRITE;
273 	else
274 		return FILE_OPEN;
275 }
276 
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279 			struct super_block *sb, int mode, unsigned int f_flags,
280 			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
281 {
282 	int rc;
283 	FILE_UNIX_BASIC_INFO *presp_data;
284 	__u32 posix_flags = 0;
285 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286 	struct cifs_fattr fattr;
287 	struct tcon_link *tlink;
288 	struct cifs_tcon *tcon;
289 
290 	cifs_dbg(FYI, "posix open %s\n", full_path);
291 
292 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293 	if (presp_data == NULL)
294 		return -ENOMEM;
295 
296 	tlink = cifs_sb_tlink(cifs_sb);
297 	if (IS_ERR(tlink)) {
298 		rc = PTR_ERR(tlink);
299 		goto posix_open_ret;
300 	}
301 
302 	tcon = tlink_tcon(tlink);
303 	mode &= ~current_umask();
304 
305 	posix_flags = cifs_posix_convert_flags(f_flags);
306 	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307 			     poplock, full_path, cifs_sb->local_nls,
308 			     cifs_remap(cifs_sb));
309 	cifs_put_tlink(tlink);
310 
311 	if (rc)
312 		goto posix_open_ret;
313 
314 	if (presp_data->Type == cpu_to_le32(-1))
315 		goto posix_open_ret; /* open ok, caller does qpathinfo */
316 
317 	if (!pinode)
318 		goto posix_open_ret; /* caller does not need info */
319 
320 	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321 
322 	/* get new inode and set it up */
323 	if (*pinode == NULL) {
324 		cifs_fill_uniqueid(sb, &fattr);
325 		*pinode = cifs_iget(sb, &fattr);
326 		if (!*pinode) {
327 			rc = -ENOMEM;
328 			goto posix_open_ret;
329 		}
330 	} else {
331 		cifs_revalidate_mapping(*pinode);
332 		rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333 	}
334 
335 posix_open_ret:
336 	kfree(presp_data);
337 	return rc;
338 }
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340 
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342 			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343 			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344 {
345 	int rc;
346 	int desired_access;
347 	int disposition;
348 	int create_options = CREATE_NOT_DIR;
349 	struct TCP_Server_Info *server = tcon->ses->server;
350 	struct cifs_open_parms oparms;
351 	int rdwr_for_fscache = 0;
352 
353 	if (!server->ops->open)
354 		return -ENOSYS;
355 
356 	/* If we're caching, we need to be able to fill in around partial writes. */
357 	if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
358 		rdwr_for_fscache = 1;
359 
360 	desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
361 
362 /*********************************************************************
363  *  open flag mapping table:
364  *
365  *	POSIX Flag            CIFS Disposition
366  *	----------            ----------------
367  *	O_CREAT               FILE_OPEN_IF
368  *	O_CREAT | O_EXCL      FILE_CREATE
369  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
370  *	O_TRUNC               FILE_OVERWRITE
371  *	none of the above     FILE_OPEN
372  *
373  *	Note that there is not a direct match between disposition
374  *	FILE_SUPERSEDE (ie create whether or not file exists although
375  *	O_CREAT | O_TRUNC is similar but truncates the existing
376  *	file rather than creating a new file as FILE_SUPERSEDE does
377  *	(which uses the attributes / metadata passed in on open call)
378  *?
379  *?  O_SYNC is a reasonable match to CIFS writethrough flag
380  *?  and the read write flags match reasonably.  O_LARGEFILE
381  *?  is irrelevant because largefile support is always used
382  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
383  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
384  *********************************************************************/
385 
386 	disposition = cifs_get_disposition(f_flags);
387 
388 	/* BB pass O_SYNC flag through on file attributes .. BB */
389 
390 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
391 	if (f_flags & O_SYNC)
392 		create_options |= CREATE_WRITE_THROUGH;
393 
394 	if (f_flags & O_DIRECT)
395 		create_options |= CREATE_NO_BUFFER;
396 
397 retry_open:
398 	oparms = (struct cifs_open_parms) {
399 		.tcon = tcon,
400 		.cifs_sb = cifs_sb,
401 		.desired_access = desired_access,
402 		.create_options = cifs_create_options(cifs_sb, create_options),
403 		.disposition = disposition,
404 		.path = full_path,
405 		.fid = fid,
406 	};
407 
408 	rc = server->ops->open(xid, &oparms, oplock, buf);
409 	if (rc) {
410 		if (rc == -EACCES && rdwr_for_fscache == 1) {
411 			desired_access = cifs_convert_flags(f_flags, 0);
412 			rdwr_for_fscache = 2;
413 			goto retry_open;
414 		}
415 		return rc;
416 	}
417 	if (rdwr_for_fscache == 2)
418 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
419 
420 	/* TODO: Add support for calling posix query info but with passing in fid */
421 	if (tcon->unix_ext)
422 		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
423 					      xid);
424 	else
425 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
426 					 xid, fid);
427 
428 	if (rc) {
429 		server->ops->close(xid, tcon, fid);
430 		if (rc == -ESTALE)
431 			rc = -EOPENSTALE;
432 	}
433 
434 	return rc;
435 }
436 
437 static bool
438 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
439 {
440 	struct cifs_fid_locks *cur;
441 	bool has_locks = false;
442 
443 	down_read(&cinode->lock_sem);
444 	list_for_each_entry(cur, &cinode->llist, llist) {
445 		if (!list_empty(&cur->locks)) {
446 			has_locks = true;
447 			break;
448 		}
449 	}
450 	up_read(&cinode->lock_sem);
451 	return has_locks;
452 }
453 
454 void
455 cifs_down_write(struct rw_semaphore *sem)
456 {
457 	while (!down_write_trylock(sem))
458 		msleep(10);
459 }
460 
461 static void cifsFileInfo_put_work(struct work_struct *work);
462 void serverclose_work(struct work_struct *work);
463 
464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
465 				       struct tcon_link *tlink, __u32 oplock,
466 				       const char *symlink_target)
467 {
468 	struct dentry *dentry = file_dentry(file);
469 	struct inode *inode = d_inode(dentry);
470 	struct cifsInodeInfo *cinode = CIFS_I(inode);
471 	struct cifsFileInfo *cfile;
472 	struct cifs_fid_locks *fdlocks;
473 	struct cifs_tcon *tcon = tlink_tcon(tlink);
474 	struct TCP_Server_Info *server = tcon->ses->server;
475 
476 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
477 	if (cfile == NULL)
478 		return cfile;
479 
480 	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
481 	if (!fdlocks) {
482 		kfree(cfile);
483 		return NULL;
484 	}
485 
486 	if (symlink_target) {
487 		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
488 		if (!cfile->symlink_target) {
489 			kfree(fdlocks);
490 			kfree(cfile);
491 			return NULL;
492 		}
493 	}
494 
495 	INIT_LIST_HEAD(&fdlocks->locks);
496 	fdlocks->cfile = cfile;
497 	cfile->llist = fdlocks;
498 
499 	cfile->count = 1;
500 	cfile->pid = current->tgid;
501 	cfile->uid = current_fsuid();
502 	cfile->dentry = dget(dentry);
503 	cfile->f_flags = file->f_flags;
504 	cfile->invalidHandle = false;
505 	cfile->deferred_close_scheduled = false;
506 	cfile->tlink = cifs_get_tlink(tlink);
507 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
508 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
509 	INIT_WORK(&cfile->serverclose, serverclose_work);
510 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
511 	mutex_init(&cfile->fh_mutex);
512 	spin_lock_init(&cfile->file_info_lock);
513 
514 	cifs_sb_active(inode->i_sb);
515 
516 	/*
517 	 * If the server returned a read oplock and we have mandatory brlocks,
518 	 * set oplock level to None.
519 	 */
520 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
521 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
522 		oplock = 0;
523 	}
524 
525 	cifs_down_write(&cinode->lock_sem);
526 	list_add(&fdlocks->llist, &cinode->llist);
527 	up_write(&cinode->lock_sem);
528 
529 	spin_lock(&tcon->open_file_lock);
530 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
531 		oplock = fid->pending_open->oplock;
532 	list_del(&fid->pending_open->olist);
533 
534 	fid->purge_cache = false;
535 	server->ops->set_fid(cfile, fid, oplock);
536 
537 	list_add(&cfile->tlist, &tcon->openFileList);
538 	atomic_inc(&tcon->num_local_opens);
539 
540 	/* if readable file instance put first in list*/
541 	spin_lock(&cinode->open_file_lock);
542 	if (file->f_mode & FMODE_READ)
543 		list_add(&cfile->flist, &cinode->openFileList);
544 	else
545 		list_add_tail(&cfile->flist, &cinode->openFileList);
546 	spin_unlock(&cinode->open_file_lock);
547 	spin_unlock(&tcon->open_file_lock);
548 
549 	if (fid->purge_cache)
550 		cifs_zap_mapping(inode);
551 
552 	file->private_data = cfile;
553 	return cfile;
554 }
555 
556 struct cifsFileInfo *
557 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
558 {
559 	spin_lock(&cifs_file->file_info_lock);
560 	cifsFileInfo_get_locked(cifs_file);
561 	spin_unlock(&cifs_file->file_info_lock);
562 	return cifs_file;
563 }
564 
565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
566 {
567 	struct inode *inode = d_inode(cifs_file->dentry);
568 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
569 	struct cifsLockInfo *li, *tmp;
570 	struct super_block *sb = inode->i_sb;
571 
572 	/*
573 	 * Delete any outstanding lock records. We'll lose them when the file
574 	 * is closed anyway.
575 	 */
576 	cifs_down_write(&cifsi->lock_sem);
577 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
578 		list_del(&li->llist);
579 		cifs_del_lock_waiters(li);
580 		kfree(li);
581 	}
582 	list_del(&cifs_file->llist->llist);
583 	kfree(cifs_file->llist);
584 	up_write(&cifsi->lock_sem);
585 
586 	cifs_put_tlink(cifs_file->tlink);
587 	dput(cifs_file->dentry);
588 	cifs_sb_deactive(sb);
589 	kfree(cifs_file->symlink_target);
590 	kfree(cifs_file);
591 }
592 
593 static void cifsFileInfo_put_work(struct work_struct *work)
594 {
595 	struct cifsFileInfo *cifs_file = container_of(work,
596 			struct cifsFileInfo, put);
597 
598 	cifsFileInfo_put_final(cifs_file);
599 }
600 
601 void serverclose_work(struct work_struct *work)
602 {
603 	struct cifsFileInfo *cifs_file = container_of(work,
604 			struct cifsFileInfo, serverclose);
605 
606 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
607 
608 	struct TCP_Server_Info *server = tcon->ses->server;
609 	int rc = 0;
610 	int retries = 0;
611 	int MAX_RETRIES = 4;
612 
613 	do {
614 		if (server->ops->close_getattr)
615 			rc = server->ops->close_getattr(0, tcon, cifs_file);
616 		else if (server->ops->close)
617 			rc = server->ops->close(0, tcon, &cifs_file->fid);
618 
619 		if (rc == -EBUSY || rc == -EAGAIN) {
620 			retries++;
621 			msleep(250);
622 		}
623 	} while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
624 	);
625 
626 	if (retries == MAX_RETRIES)
627 		pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
628 
629 	if (cifs_file->offload)
630 		queue_work(fileinfo_put_wq, &cifs_file->put);
631 	else
632 		cifsFileInfo_put_final(cifs_file);
633 }
634 
635 /**
636  * cifsFileInfo_put - release a reference of file priv data
637  *
638  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
639  *
640  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
641  */
642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
643 {
644 	_cifsFileInfo_put(cifs_file, true, true);
645 }
646 
647 /**
648  * _cifsFileInfo_put - release a reference of file priv data
649  *
650  * This may involve closing the filehandle @cifs_file out on the
651  * server. Must be called without holding tcon->open_file_lock,
652  * cinode->open_file_lock and cifs_file->file_info_lock.
653  *
654  * If @wait_for_oplock_handler is true and we are releasing the last
655  * reference, wait for any running oplock break handler of the file
656  * and cancel any pending one.
657  *
658  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
659  * @wait_oplock_handler: must be false if called from oplock_break_handler
660  * @offload:	not offloaded on close and oplock breaks
661  *
662  */
663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
664 		       bool wait_oplock_handler, bool offload)
665 {
666 	struct inode *inode = d_inode(cifs_file->dentry);
667 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
668 	struct TCP_Server_Info *server = tcon->ses->server;
669 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
670 	struct super_block *sb = inode->i_sb;
671 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
672 	struct cifs_fid fid = {};
673 	struct cifs_pending_open open;
674 	bool oplock_break_cancelled;
675 	bool serverclose_offloaded = false;
676 
677 	spin_lock(&tcon->open_file_lock);
678 	spin_lock(&cifsi->open_file_lock);
679 	spin_lock(&cifs_file->file_info_lock);
680 
681 	cifs_file->offload = offload;
682 	if (--cifs_file->count > 0) {
683 		spin_unlock(&cifs_file->file_info_lock);
684 		spin_unlock(&cifsi->open_file_lock);
685 		spin_unlock(&tcon->open_file_lock);
686 		return;
687 	}
688 	spin_unlock(&cifs_file->file_info_lock);
689 
690 	if (server->ops->get_lease_key)
691 		server->ops->get_lease_key(inode, &fid);
692 
693 	/* store open in pending opens to make sure we don't miss lease break */
694 	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
695 
696 	/* remove it from the lists */
697 	list_del(&cifs_file->flist);
698 	list_del(&cifs_file->tlist);
699 	atomic_dec(&tcon->num_local_opens);
700 
701 	if (list_empty(&cifsi->openFileList)) {
702 		cifs_dbg(FYI, "closing last open instance for inode %p\n",
703 			 d_inode(cifs_file->dentry));
704 		/*
705 		 * In strict cache mode we need invalidate mapping on the last
706 		 * close  because it may cause a error when we open this file
707 		 * again and get at least level II oplock.
708 		 */
709 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
710 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
711 		cifs_set_oplock_level(cifsi, 0);
712 	}
713 
714 	spin_unlock(&cifsi->open_file_lock);
715 	spin_unlock(&tcon->open_file_lock);
716 
717 	oplock_break_cancelled = wait_oplock_handler ?
718 		cancel_work_sync(&cifs_file->oplock_break) : false;
719 
720 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
721 		struct TCP_Server_Info *server = tcon->ses->server;
722 		unsigned int xid;
723 		int rc = 0;
724 
725 		xid = get_xid();
726 		if (server->ops->close_getattr)
727 			rc = server->ops->close_getattr(xid, tcon, cifs_file);
728 		else if (server->ops->close)
729 			rc = server->ops->close(xid, tcon, &cifs_file->fid);
730 		_free_xid(xid);
731 
732 		if (rc == -EBUSY || rc == -EAGAIN) {
733 			// Server close failed, hence offloading it as an async op
734 			queue_work(serverclose_wq, &cifs_file->serverclose);
735 			serverclose_offloaded = true;
736 		}
737 	}
738 
739 	if (oplock_break_cancelled)
740 		cifs_done_oplock_break(cifsi);
741 
742 	cifs_del_pending_open(&open);
743 
744 	// if serverclose has been offloaded to wq (on failure), it will
745 	// handle offloading put as well. If serverclose not offloaded,
746 	// we need to handle offloading put here.
747 	if (!serverclose_offloaded) {
748 		if (offload)
749 			queue_work(fileinfo_put_wq, &cifs_file->put);
750 		else
751 			cifsFileInfo_put_final(cifs_file);
752 	}
753 }
754 
755 int cifs_open(struct inode *inode, struct file *file)
756 
757 {
758 	int rc = -EACCES;
759 	unsigned int xid;
760 	__u32 oplock;
761 	struct cifs_sb_info *cifs_sb;
762 	struct TCP_Server_Info *server;
763 	struct cifs_tcon *tcon;
764 	struct tcon_link *tlink;
765 	struct cifsFileInfo *cfile = NULL;
766 	void *page;
767 	const char *full_path;
768 	bool posix_open_ok = false;
769 	struct cifs_fid fid = {};
770 	struct cifs_pending_open open;
771 	struct cifs_open_info_data data = {};
772 
773 	xid = get_xid();
774 
775 	cifs_sb = CIFS_SB(inode->i_sb);
776 	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
777 		free_xid(xid);
778 		return -EIO;
779 	}
780 
781 	tlink = cifs_sb_tlink(cifs_sb);
782 	if (IS_ERR(tlink)) {
783 		free_xid(xid);
784 		return PTR_ERR(tlink);
785 	}
786 	tcon = tlink_tcon(tlink);
787 	server = tcon->ses->server;
788 
789 	page = alloc_dentry_path();
790 	full_path = build_path_from_dentry(file_dentry(file), page);
791 	if (IS_ERR(full_path)) {
792 		rc = PTR_ERR(full_path);
793 		goto out;
794 	}
795 
796 	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
797 		 inode, file->f_flags, full_path);
798 
799 	if (file->f_flags & O_DIRECT &&
800 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
801 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
802 			file->f_op = &cifs_file_direct_nobrl_ops;
803 		else
804 			file->f_op = &cifs_file_direct_ops;
805 	}
806 
807 	/* Get the cached handle as SMB2 close is deferred */
808 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
809 	if (rc == 0) {
810 		if (file->f_flags == cfile->f_flags) {
811 			file->private_data = cfile;
812 			spin_lock(&CIFS_I(inode)->deferred_lock);
813 			cifs_del_deferred_close(cfile);
814 			spin_unlock(&CIFS_I(inode)->deferred_lock);
815 			goto use_cache;
816 		} else {
817 			_cifsFileInfo_put(cfile, true, false);
818 		}
819 	}
820 
821 	if (server->oplocks)
822 		oplock = REQ_OPLOCK;
823 	else
824 		oplock = 0;
825 
826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
827 	if (!tcon->broken_posix_open && tcon->unix_ext &&
828 	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
829 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
830 		/* can not refresh inode info since size could be stale */
831 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
832 				cifs_sb->ctx->file_mode /* ignored */,
833 				file->f_flags, &oplock, &fid.netfid, xid);
834 		if (rc == 0) {
835 			cifs_dbg(FYI, "posix open succeeded\n");
836 			posix_open_ok = true;
837 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
838 			if (tcon->ses->serverNOS)
839 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
840 					 tcon->ses->ip_addr,
841 					 tcon->ses->serverNOS);
842 			tcon->broken_posix_open = true;
843 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
844 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
845 			goto out;
846 		/*
847 		 * Else fallthrough to retry open the old way on network i/o
848 		 * or DFS errors.
849 		 */
850 	}
851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
852 
853 	if (server->ops->get_lease_key)
854 		server->ops->get_lease_key(inode, &fid);
855 
856 	cifs_add_pending_open(&fid, tlink, &open);
857 
858 	if (!posix_open_ok) {
859 		if (server->ops->get_lease_key)
860 			server->ops->get_lease_key(inode, &fid);
861 
862 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
863 				  xid, &data);
864 		if (rc) {
865 			cifs_del_pending_open(&open);
866 			goto out;
867 		}
868 	}
869 
870 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
871 	if (cfile == NULL) {
872 		if (server->ops->close)
873 			server->ops->close(xid, tcon, &fid);
874 		cifs_del_pending_open(&open);
875 		rc = -ENOMEM;
876 		goto out;
877 	}
878 
879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
881 		/*
882 		 * Time to set mode which we can not set earlier due to
883 		 * problems creating new read-only files.
884 		 */
885 		struct cifs_unix_set_info_args args = {
886 			.mode	= inode->i_mode,
887 			.uid	= INVALID_UID, /* no change */
888 			.gid	= INVALID_GID, /* no change */
889 			.ctime	= NO_CHANGE_64,
890 			.atime	= NO_CHANGE_64,
891 			.mtime	= NO_CHANGE_64,
892 			.device	= 0,
893 		};
894 		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
895 				       cfile->pid);
896 	}
897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
898 
899 use_cache:
900 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
901 			   file->f_mode & FMODE_WRITE);
902 	if (!(file->f_flags & O_DIRECT))
903 		goto out;
904 	if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
905 		goto out;
906 	cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
907 
908 out:
909 	free_dentry_path(page);
910 	free_xid(xid);
911 	cifs_put_tlink(tlink);
912 	cifs_free_open_info(&data);
913 	return rc;
914 }
915 
916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
919 
920 /*
921  * Try to reacquire byte range locks that were released when session
922  * to server was lost.
923  */
924 static int
925 cifs_relock_file(struct cifsFileInfo *cfile)
926 {
927 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
928 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
929 	int rc = 0;
930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
931 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
933 
934 	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
935 	if (cinode->can_cache_brlcks) {
936 		/* can cache locks - no need to relock */
937 		up_read(&cinode->lock_sem);
938 		return rc;
939 	}
940 
941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
942 	if (cap_unix(tcon->ses) &&
943 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
944 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
945 		rc = cifs_push_posix_locks(cfile);
946 	else
947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
948 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
949 
950 	up_read(&cinode->lock_sem);
951 	return rc;
952 }
953 
954 static int
955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
956 {
957 	int rc = -EACCES;
958 	unsigned int xid;
959 	__u32 oplock;
960 	struct cifs_sb_info *cifs_sb;
961 	struct cifs_tcon *tcon;
962 	struct TCP_Server_Info *server;
963 	struct cifsInodeInfo *cinode;
964 	struct inode *inode;
965 	void *page;
966 	const char *full_path;
967 	int desired_access;
968 	int disposition = FILE_OPEN;
969 	int create_options = CREATE_NOT_DIR;
970 	struct cifs_open_parms oparms;
971 	int rdwr_for_fscache = 0;
972 
973 	xid = get_xid();
974 	mutex_lock(&cfile->fh_mutex);
975 	if (!cfile->invalidHandle) {
976 		mutex_unlock(&cfile->fh_mutex);
977 		free_xid(xid);
978 		return 0;
979 	}
980 
981 	inode = d_inode(cfile->dentry);
982 	cifs_sb = CIFS_SB(inode->i_sb);
983 	tcon = tlink_tcon(cfile->tlink);
984 	server = tcon->ses->server;
985 
986 	/*
987 	 * Can not grab rename sem here because various ops, including those
988 	 * that already have the rename sem can end up causing writepage to get
989 	 * called and if the server was down that means we end up here, and we
990 	 * can never tell if the caller already has the rename_sem.
991 	 */
992 	page = alloc_dentry_path();
993 	full_path = build_path_from_dentry(cfile->dentry, page);
994 	if (IS_ERR(full_path)) {
995 		mutex_unlock(&cfile->fh_mutex);
996 		free_dentry_path(page);
997 		free_xid(xid);
998 		return PTR_ERR(full_path);
999 	}
1000 
1001 	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
1002 		 inode, cfile->f_flags, full_path);
1003 
1004 	if (tcon->ses->server->oplocks)
1005 		oplock = REQ_OPLOCK;
1006 	else
1007 		oplock = 0;
1008 
1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1010 	if (tcon->unix_ext && cap_unix(tcon->ses) &&
1011 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
1012 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
1013 		/*
1014 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
1015 		 * original open. Must mask them off for a reopen.
1016 		 */
1017 		unsigned int oflags = cfile->f_flags &
1018 						~(O_CREAT | O_EXCL | O_TRUNC);
1019 
1020 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
1021 				     cifs_sb->ctx->file_mode /* ignored */,
1022 				     oflags, &oplock, &cfile->fid.netfid, xid);
1023 		if (rc == 0) {
1024 			cifs_dbg(FYI, "posix reopen succeeded\n");
1025 			oparms.reconnect = true;
1026 			goto reopen_success;
1027 		}
1028 		/*
1029 		 * fallthrough to retry open the old way on errors, especially
1030 		 * in the reconnect path it is important to retry hard
1031 		 */
1032 	}
1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1034 
1035 	/* If we're caching, we need to be able to fill in around partial writes. */
1036 	if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
1037 		rdwr_for_fscache = 1;
1038 
1039 	desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
1040 
1041 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
1042 	if (cfile->f_flags & O_SYNC)
1043 		create_options |= CREATE_WRITE_THROUGH;
1044 
1045 	if (cfile->f_flags & O_DIRECT)
1046 		create_options |= CREATE_NO_BUFFER;
1047 
1048 	if (server->ops->get_lease_key)
1049 		server->ops->get_lease_key(inode, &cfile->fid);
1050 
1051 retry_open:
1052 	oparms = (struct cifs_open_parms) {
1053 		.tcon = tcon,
1054 		.cifs_sb = cifs_sb,
1055 		.desired_access = desired_access,
1056 		.create_options = cifs_create_options(cifs_sb, create_options),
1057 		.disposition = disposition,
1058 		.path = full_path,
1059 		.fid = &cfile->fid,
1060 		.reconnect = true,
1061 	};
1062 
1063 	/*
1064 	 * Can not refresh inode by passing in file_info buf to be returned by
1065 	 * ops->open and then calling get_inode_info with returned buf since
1066 	 * file might have write behind data that needs to be flushed and server
1067 	 * version of file size can be stale. If we knew for sure that inode was
1068 	 * not dirty locally we could do this.
1069 	 */
1070 	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1071 	if (rc == -ENOENT && oparms.reconnect == false) {
1072 		/* durable handle timeout is expired - open the file again */
1073 		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1074 		/* indicate that we need to relock the file */
1075 		oparms.reconnect = true;
1076 	}
1077 	if (rc == -EACCES && rdwr_for_fscache == 1) {
1078 		desired_access = cifs_convert_flags(cfile->f_flags, 0);
1079 		rdwr_for_fscache = 2;
1080 		goto retry_open;
1081 	}
1082 
1083 	if (rc) {
1084 		mutex_unlock(&cfile->fh_mutex);
1085 		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1086 		cifs_dbg(FYI, "oplock: %d\n", oplock);
1087 		goto reopen_error_exit;
1088 	}
1089 
1090 	if (rdwr_for_fscache == 2)
1091 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
1092 
1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1094 reopen_success:
1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1096 	cfile->invalidHandle = false;
1097 	mutex_unlock(&cfile->fh_mutex);
1098 	cinode = CIFS_I(inode);
1099 
1100 	if (can_flush) {
1101 		rc = filemap_write_and_wait(inode->i_mapping);
1102 		if (!is_interrupt_error(rc))
1103 			mapping_set_error(inode->i_mapping, rc);
1104 
1105 		if (tcon->posix_extensions) {
1106 			rc = smb311_posix_get_inode_info(&inode, full_path,
1107 							 NULL, inode->i_sb, xid);
1108 		} else if (tcon->unix_ext) {
1109 			rc = cifs_get_inode_info_unix(&inode, full_path,
1110 						      inode->i_sb, xid);
1111 		} else {
1112 			rc = cifs_get_inode_info(&inode, full_path, NULL,
1113 						 inode->i_sb, xid, NULL);
1114 		}
1115 	}
1116 	/*
1117 	 * Else we are writing out data to server already and could deadlock if
1118 	 * we tried to flush data, and since we do not know if we have data that
1119 	 * would invalidate the current end of file on the server we can not go
1120 	 * to the server to get the new inode info.
1121 	 */
1122 
1123 	/*
1124 	 * If the server returned a read oplock and we have mandatory brlocks,
1125 	 * set oplock level to None.
1126 	 */
1127 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1128 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1129 		oplock = 0;
1130 	}
1131 
1132 	server->ops->set_fid(cfile, &cfile->fid, oplock);
1133 	if (oparms.reconnect)
1134 		cifs_relock_file(cfile);
1135 
1136 reopen_error_exit:
1137 	free_dentry_path(page);
1138 	free_xid(xid);
1139 	return rc;
1140 }
1141 
1142 void smb2_deferred_work_close(struct work_struct *work)
1143 {
1144 	struct cifsFileInfo *cfile = container_of(work,
1145 			struct cifsFileInfo, deferred.work);
1146 
1147 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1148 	cifs_del_deferred_close(cfile);
1149 	cfile->deferred_close_scheduled = false;
1150 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1151 	_cifsFileInfo_put(cfile, true, false);
1152 }
1153 
1154 int cifs_close(struct inode *inode, struct file *file)
1155 {
1156 	struct cifsFileInfo *cfile;
1157 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1158 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1159 	struct cifs_deferred_close *dclose;
1160 
1161 	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1162 
1163 	if (file->private_data != NULL) {
1164 		cfile = file->private_data;
1165 		file->private_data = NULL;
1166 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1167 		if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1168 		    && cinode->lease_granted &&
1169 		    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1170 		    dclose) {
1171 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1172 				inode_set_mtime_to_ts(inode,
1173 						      inode_set_ctime_current(inode));
1174 			}
1175 			spin_lock(&cinode->deferred_lock);
1176 			cifs_add_deferred_close(cfile, dclose);
1177 			if (cfile->deferred_close_scheduled &&
1178 			    delayed_work_pending(&cfile->deferred)) {
1179 				/*
1180 				 * If there is no pending work, mod_delayed_work queues new work.
1181 				 * So, Increase the ref count to avoid use-after-free.
1182 				 */
1183 				if (!mod_delayed_work(deferredclose_wq,
1184 						&cfile->deferred, cifs_sb->ctx->closetimeo))
1185 					cifsFileInfo_get(cfile);
1186 			} else {
1187 				/* Deferred close for files */
1188 				queue_delayed_work(deferredclose_wq,
1189 						&cfile->deferred, cifs_sb->ctx->closetimeo);
1190 				cfile->deferred_close_scheduled = true;
1191 				spin_unlock(&cinode->deferred_lock);
1192 				return 0;
1193 			}
1194 			spin_unlock(&cinode->deferred_lock);
1195 			_cifsFileInfo_put(cfile, true, false);
1196 		} else {
1197 			_cifsFileInfo_put(cfile, true, false);
1198 			kfree(dclose);
1199 		}
1200 	}
1201 
1202 	/* return code from the ->release op is always ignored */
1203 	return 0;
1204 }
1205 
1206 void
1207 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1208 {
1209 	struct cifsFileInfo *open_file, *tmp;
1210 	struct list_head tmp_list;
1211 
1212 	if (!tcon->use_persistent || !tcon->need_reopen_files)
1213 		return;
1214 
1215 	tcon->need_reopen_files = false;
1216 
1217 	cifs_dbg(FYI, "Reopen persistent handles\n");
1218 	INIT_LIST_HEAD(&tmp_list);
1219 
1220 	/* list all files open on tree connection, reopen resilient handles  */
1221 	spin_lock(&tcon->open_file_lock);
1222 	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1223 		if (!open_file->invalidHandle)
1224 			continue;
1225 		cifsFileInfo_get(open_file);
1226 		list_add_tail(&open_file->rlist, &tmp_list);
1227 	}
1228 	spin_unlock(&tcon->open_file_lock);
1229 
1230 	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1231 		if (cifs_reopen_file(open_file, false /* do not flush */))
1232 			tcon->need_reopen_files = true;
1233 		list_del_init(&open_file->rlist);
1234 		cifsFileInfo_put(open_file);
1235 	}
1236 }
1237 
1238 int cifs_closedir(struct inode *inode, struct file *file)
1239 {
1240 	int rc = 0;
1241 	unsigned int xid;
1242 	struct cifsFileInfo *cfile = file->private_data;
1243 	struct cifs_tcon *tcon;
1244 	struct TCP_Server_Info *server;
1245 	char *buf;
1246 
1247 	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1248 
1249 	if (cfile == NULL)
1250 		return rc;
1251 
1252 	xid = get_xid();
1253 	tcon = tlink_tcon(cfile->tlink);
1254 	server = tcon->ses->server;
1255 
1256 	cifs_dbg(FYI, "Freeing private data in close dir\n");
1257 	spin_lock(&cfile->file_info_lock);
1258 	if (server->ops->dir_needs_close(cfile)) {
1259 		cfile->invalidHandle = true;
1260 		spin_unlock(&cfile->file_info_lock);
1261 		if (server->ops->close_dir)
1262 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1263 		else
1264 			rc = -ENOSYS;
1265 		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1266 		/* not much we can do if it fails anyway, ignore rc */
1267 		rc = 0;
1268 	} else
1269 		spin_unlock(&cfile->file_info_lock);
1270 
1271 	buf = cfile->srch_inf.ntwrk_buf_start;
1272 	if (buf) {
1273 		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1274 		cfile->srch_inf.ntwrk_buf_start = NULL;
1275 		if (cfile->srch_inf.smallBuf)
1276 			cifs_small_buf_release(buf);
1277 		else
1278 			cifs_buf_release(buf);
1279 	}
1280 
1281 	cifs_put_tlink(cfile->tlink);
1282 	kfree(file->private_data);
1283 	file->private_data = NULL;
1284 	/* BB can we lock the filestruct while this is going on? */
1285 	free_xid(xid);
1286 	return rc;
1287 }
1288 
1289 static struct cifsLockInfo *
1290 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1291 {
1292 	struct cifsLockInfo *lock =
1293 		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1294 	if (!lock)
1295 		return lock;
1296 	lock->offset = offset;
1297 	lock->length = length;
1298 	lock->type = type;
1299 	lock->pid = current->tgid;
1300 	lock->flags = flags;
1301 	INIT_LIST_HEAD(&lock->blist);
1302 	init_waitqueue_head(&lock->block_q);
1303 	return lock;
1304 }
1305 
1306 void
1307 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1308 {
1309 	struct cifsLockInfo *li, *tmp;
1310 	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1311 		list_del_init(&li->blist);
1312 		wake_up(&li->block_q);
1313 	}
1314 }
1315 
1316 #define CIFS_LOCK_OP	0
1317 #define CIFS_READ_OP	1
1318 #define CIFS_WRITE_OP	2
1319 
1320 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1321 static bool
1322 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1323 			    __u64 length, __u8 type, __u16 flags,
1324 			    struct cifsFileInfo *cfile,
1325 			    struct cifsLockInfo **conf_lock, int rw_check)
1326 {
1327 	struct cifsLockInfo *li;
1328 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1329 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1330 
1331 	list_for_each_entry(li, &fdlocks->locks, llist) {
1332 		if (offset + length <= li->offset ||
1333 		    offset >= li->offset + li->length)
1334 			continue;
1335 		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1336 		    server->ops->compare_fids(cfile, cur_cfile)) {
1337 			/* shared lock prevents write op through the same fid */
1338 			if (!(li->type & server->vals->shared_lock_type) ||
1339 			    rw_check != CIFS_WRITE_OP)
1340 				continue;
1341 		}
1342 		if ((type & server->vals->shared_lock_type) &&
1343 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1344 		     current->tgid == li->pid) || type == li->type))
1345 			continue;
1346 		if (rw_check == CIFS_LOCK_OP &&
1347 		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1348 		    server->ops->compare_fids(cfile, cur_cfile))
1349 			continue;
1350 		if (conf_lock)
1351 			*conf_lock = li;
1352 		return true;
1353 	}
1354 	return false;
1355 }
1356 
1357 bool
1358 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1359 			__u8 type, __u16 flags,
1360 			struct cifsLockInfo **conf_lock, int rw_check)
1361 {
1362 	bool rc = false;
1363 	struct cifs_fid_locks *cur;
1364 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1365 
1366 	list_for_each_entry(cur, &cinode->llist, llist) {
1367 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1368 						 flags, cfile, conf_lock,
1369 						 rw_check);
1370 		if (rc)
1371 			break;
1372 	}
1373 
1374 	return rc;
1375 }
1376 
1377 /*
1378  * Check if there is another lock that prevents us to set the lock (mandatory
1379  * style). If such a lock exists, update the flock structure with its
1380  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1381  * or leave it the same if we can't. Returns 0 if we don't need to request to
1382  * the server or 1 otherwise.
1383  */
1384 static int
1385 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1386 	       __u8 type, struct file_lock *flock)
1387 {
1388 	int rc = 0;
1389 	struct cifsLockInfo *conf_lock;
1390 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1391 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1392 	bool exist;
1393 
1394 	down_read(&cinode->lock_sem);
1395 
1396 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1397 					flock->fl_flags, &conf_lock,
1398 					CIFS_LOCK_OP);
1399 	if (exist) {
1400 		flock->fl_start = conf_lock->offset;
1401 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1402 		flock->fl_pid = conf_lock->pid;
1403 		if (conf_lock->type & server->vals->shared_lock_type)
1404 			flock->fl_type = F_RDLCK;
1405 		else
1406 			flock->fl_type = F_WRLCK;
1407 	} else if (!cinode->can_cache_brlcks)
1408 		rc = 1;
1409 	else
1410 		flock->fl_type = F_UNLCK;
1411 
1412 	up_read(&cinode->lock_sem);
1413 	return rc;
1414 }
1415 
1416 static void
1417 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1418 {
1419 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1420 	cifs_down_write(&cinode->lock_sem);
1421 	list_add_tail(&lock->llist, &cfile->llist->locks);
1422 	up_write(&cinode->lock_sem);
1423 }
1424 
1425 /*
1426  * Set the byte-range lock (mandatory style). Returns:
1427  * 1) 0, if we set the lock and don't need to request to the server;
1428  * 2) 1, if no locks prevent us but we need to request to the server;
1429  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1430  */
1431 static int
1432 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1433 		 bool wait)
1434 {
1435 	struct cifsLockInfo *conf_lock;
1436 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1437 	bool exist;
1438 	int rc = 0;
1439 
1440 try_again:
1441 	exist = false;
1442 	cifs_down_write(&cinode->lock_sem);
1443 
1444 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1445 					lock->type, lock->flags, &conf_lock,
1446 					CIFS_LOCK_OP);
1447 	if (!exist && cinode->can_cache_brlcks) {
1448 		list_add_tail(&lock->llist, &cfile->llist->locks);
1449 		up_write(&cinode->lock_sem);
1450 		return rc;
1451 	}
1452 
1453 	if (!exist)
1454 		rc = 1;
1455 	else if (!wait)
1456 		rc = -EACCES;
1457 	else {
1458 		list_add_tail(&lock->blist, &conf_lock->blist);
1459 		up_write(&cinode->lock_sem);
1460 		rc = wait_event_interruptible(lock->block_q,
1461 					(lock->blist.prev == &lock->blist) &&
1462 					(lock->blist.next == &lock->blist));
1463 		if (!rc)
1464 			goto try_again;
1465 		cifs_down_write(&cinode->lock_sem);
1466 		list_del_init(&lock->blist);
1467 	}
1468 
1469 	up_write(&cinode->lock_sem);
1470 	return rc;
1471 }
1472 
1473 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1474 /*
1475  * Check if there is another lock that prevents us to set the lock (posix
1476  * style). If such a lock exists, update the flock structure with its
1477  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1478  * or leave it the same if we can't. Returns 0 if we don't need to request to
1479  * the server or 1 otherwise.
1480  */
1481 static int
1482 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1483 {
1484 	int rc = 0;
1485 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1486 	unsigned char saved_type = flock->fl_type;
1487 
1488 	if ((flock->fl_flags & FL_POSIX) == 0)
1489 		return 1;
1490 
1491 	down_read(&cinode->lock_sem);
1492 	posix_test_lock(file, flock);
1493 
1494 	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1495 		flock->fl_type = saved_type;
1496 		rc = 1;
1497 	}
1498 
1499 	up_read(&cinode->lock_sem);
1500 	return rc;
1501 }
1502 
1503 /*
1504  * Set the byte-range lock (posix style). Returns:
1505  * 1) <0, if the error occurs while setting the lock;
1506  * 2) 0, if we set the lock and don't need to request to the server;
1507  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1508  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1509  */
1510 static int
1511 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1512 {
1513 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1514 	int rc = FILE_LOCK_DEFERRED + 1;
1515 
1516 	if ((flock->fl_flags & FL_POSIX) == 0)
1517 		return rc;
1518 
1519 	cifs_down_write(&cinode->lock_sem);
1520 	if (!cinode->can_cache_brlcks) {
1521 		up_write(&cinode->lock_sem);
1522 		return rc;
1523 	}
1524 
1525 	rc = posix_lock_file(file, flock, NULL);
1526 	up_write(&cinode->lock_sem);
1527 	return rc;
1528 }
1529 
1530 int
1531 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1532 {
1533 	unsigned int xid;
1534 	int rc = 0, stored_rc;
1535 	struct cifsLockInfo *li, *tmp;
1536 	struct cifs_tcon *tcon;
1537 	unsigned int num, max_num, max_buf;
1538 	LOCKING_ANDX_RANGE *buf, *cur;
1539 	static const int types[] = {
1540 		LOCKING_ANDX_LARGE_FILES,
1541 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1542 	};
1543 	int i;
1544 
1545 	xid = get_xid();
1546 	tcon = tlink_tcon(cfile->tlink);
1547 
1548 	/*
1549 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1550 	 * and check it before using.
1551 	 */
1552 	max_buf = tcon->ses->server->maxBuf;
1553 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1554 		free_xid(xid);
1555 		return -EINVAL;
1556 	}
1557 
1558 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1559 		     PAGE_SIZE);
1560 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1561 			PAGE_SIZE);
1562 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1563 						sizeof(LOCKING_ANDX_RANGE);
1564 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1565 	if (!buf) {
1566 		free_xid(xid);
1567 		return -ENOMEM;
1568 	}
1569 
1570 	for (i = 0; i < 2; i++) {
1571 		cur = buf;
1572 		num = 0;
1573 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1574 			if (li->type != types[i])
1575 				continue;
1576 			cur->Pid = cpu_to_le16(li->pid);
1577 			cur->LengthLow = cpu_to_le32((u32)li->length);
1578 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1579 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1580 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1581 			if (++num == max_num) {
1582 				stored_rc = cifs_lockv(xid, tcon,
1583 						       cfile->fid.netfid,
1584 						       (__u8)li->type, 0, num,
1585 						       buf);
1586 				if (stored_rc)
1587 					rc = stored_rc;
1588 				cur = buf;
1589 				num = 0;
1590 			} else
1591 				cur++;
1592 		}
1593 
1594 		if (num) {
1595 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1596 					       (__u8)types[i], 0, num, buf);
1597 			if (stored_rc)
1598 				rc = stored_rc;
1599 		}
1600 	}
1601 
1602 	kfree(buf);
1603 	free_xid(xid);
1604 	return rc;
1605 }
1606 
1607 static __u32
1608 hash_lockowner(fl_owner_t owner)
1609 {
1610 	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1611 }
1612 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1613 
1614 struct lock_to_push {
1615 	struct list_head llist;
1616 	__u64 offset;
1617 	__u64 length;
1618 	__u32 pid;
1619 	__u16 netfid;
1620 	__u8 type;
1621 };
1622 
1623 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1624 static int
1625 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1626 {
1627 	struct inode *inode = d_inode(cfile->dentry);
1628 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1629 	struct file_lock *flock;
1630 	struct file_lock_context *flctx = locks_inode_context(inode);
1631 	unsigned int count = 0, i;
1632 	int rc = 0, xid, type;
1633 	struct list_head locks_to_send, *el;
1634 	struct lock_to_push *lck, *tmp;
1635 	__u64 length;
1636 
1637 	xid = get_xid();
1638 
1639 	if (!flctx)
1640 		goto out;
1641 
1642 	spin_lock(&flctx->flc_lock);
1643 	list_for_each(el, &flctx->flc_posix) {
1644 		count++;
1645 	}
1646 	spin_unlock(&flctx->flc_lock);
1647 
1648 	INIT_LIST_HEAD(&locks_to_send);
1649 
1650 	/*
1651 	 * Allocating count locks is enough because no FL_POSIX locks can be
1652 	 * added to the list while we are holding cinode->lock_sem that
1653 	 * protects locking operations of this inode.
1654 	 */
1655 	for (i = 0; i < count; i++) {
1656 		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1657 		if (!lck) {
1658 			rc = -ENOMEM;
1659 			goto err_out;
1660 		}
1661 		list_add_tail(&lck->llist, &locks_to_send);
1662 	}
1663 
1664 	el = locks_to_send.next;
1665 	spin_lock(&flctx->flc_lock);
1666 	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1667 		if (el == &locks_to_send) {
1668 			/*
1669 			 * The list ended. We don't have enough allocated
1670 			 * structures - something is really wrong.
1671 			 */
1672 			cifs_dbg(VFS, "Can't push all brlocks!\n");
1673 			break;
1674 		}
1675 		length = cifs_flock_len(flock);
1676 		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1677 			type = CIFS_RDLCK;
1678 		else
1679 			type = CIFS_WRLCK;
1680 		lck = list_entry(el, struct lock_to_push, llist);
1681 		lck->pid = hash_lockowner(flock->fl_owner);
1682 		lck->netfid = cfile->fid.netfid;
1683 		lck->length = length;
1684 		lck->type = type;
1685 		lck->offset = flock->fl_start;
1686 	}
1687 	spin_unlock(&flctx->flc_lock);
1688 
1689 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1690 		int stored_rc;
1691 
1692 		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1693 					     lck->offset, lck->length, NULL,
1694 					     lck->type, 0);
1695 		if (stored_rc)
1696 			rc = stored_rc;
1697 		list_del(&lck->llist);
1698 		kfree(lck);
1699 	}
1700 
1701 out:
1702 	free_xid(xid);
1703 	return rc;
1704 err_out:
1705 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1706 		list_del(&lck->llist);
1707 		kfree(lck);
1708 	}
1709 	goto out;
1710 }
1711 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1712 
1713 static int
1714 cifs_push_locks(struct cifsFileInfo *cfile)
1715 {
1716 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1717 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1718 	int rc = 0;
1719 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1720 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1721 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1722 
1723 	/* we are going to update can_cache_brlcks here - need a write access */
1724 	cifs_down_write(&cinode->lock_sem);
1725 	if (!cinode->can_cache_brlcks) {
1726 		up_write(&cinode->lock_sem);
1727 		return rc;
1728 	}
1729 
1730 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1731 	if (cap_unix(tcon->ses) &&
1732 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1733 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1734 		rc = cifs_push_posix_locks(cfile);
1735 	else
1736 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1737 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1738 
1739 	cinode->can_cache_brlcks = false;
1740 	up_write(&cinode->lock_sem);
1741 	return rc;
1742 }
1743 
1744 static void
1745 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1746 		bool *wait_flag, struct TCP_Server_Info *server)
1747 {
1748 	if (flock->fl_flags & FL_POSIX)
1749 		cifs_dbg(FYI, "Posix\n");
1750 	if (flock->fl_flags & FL_FLOCK)
1751 		cifs_dbg(FYI, "Flock\n");
1752 	if (flock->fl_flags & FL_SLEEP) {
1753 		cifs_dbg(FYI, "Blocking lock\n");
1754 		*wait_flag = true;
1755 	}
1756 	if (flock->fl_flags & FL_ACCESS)
1757 		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1758 	if (flock->fl_flags & FL_LEASE)
1759 		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1760 	if (flock->fl_flags &
1761 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1762 	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1763 		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1764 
1765 	*type = server->vals->large_lock_type;
1766 	if (flock->fl_type == F_WRLCK) {
1767 		cifs_dbg(FYI, "F_WRLCK\n");
1768 		*type |= server->vals->exclusive_lock_type;
1769 		*lock = 1;
1770 	} else if (flock->fl_type == F_UNLCK) {
1771 		cifs_dbg(FYI, "F_UNLCK\n");
1772 		*type |= server->vals->unlock_lock_type;
1773 		*unlock = 1;
1774 		/* Check if unlock includes more than one lock range */
1775 	} else if (flock->fl_type == F_RDLCK) {
1776 		cifs_dbg(FYI, "F_RDLCK\n");
1777 		*type |= server->vals->shared_lock_type;
1778 		*lock = 1;
1779 	} else if (flock->fl_type == F_EXLCK) {
1780 		cifs_dbg(FYI, "F_EXLCK\n");
1781 		*type |= server->vals->exclusive_lock_type;
1782 		*lock = 1;
1783 	} else if (flock->fl_type == F_SHLCK) {
1784 		cifs_dbg(FYI, "F_SHLCK\n");
1785 		*type |= server->vals->shared_lock_type;
1786 		*lock = 1;
1787 	} else
1788 		cifs_dbg(FYI, "Unknown type of lock\n");
1789 }
1790 
1791 static int
1792 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1793 	   bool wait_flag, bool posix_lck, unsigned int xid)
1794 {
1795 	int rc = 0;
1796 	__u64 length = cifs_flock_len(flock);
1797 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1798 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1799 	struct TCP_Server_Info *server = tcon->ses->server;
1800 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1801 	__u16 netfid = cfile->fid.netfid;
1802 
1803 	if (posix_lck) {
1804 		int posix_lock_type;
1805 
1806 		rc = cifs_posix_lock_test(file, flock);
1807 		if (!rc)
1808 			return rc;
1809 
1810 		if (type & server->vals->shared_lock_type)
1811 			posix_lock_type = CIFS_RDLCK;
1812 		else
1813 			posix_lock_type = CIFS_WRLCK;
1814 		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1815 				      hash_lockowner(flock->fl_owner),
1816 				      flock->fl_start, length, flock,
1817 				      posix_lock_type, wait_flag);
1818 		return rc;
1819 	}
1820 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1821 
1822 	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1823 	if (!rc)
1824 		return rc;
1825 
1826 	/* BB we could chain these into one lock request BB */
1827 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1828 				    1, 0, false);
1829 	if (rc == 0) {
1830 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1831 					    type, 0, 1, false);
1832 		flock->fl_type = F_UNLCK;
1833 		if (rc != 0)
1834 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1835 				 rc);
1836 		return 0;
1837 	}
1838 
1839 	if (type & server->vals->shared_lock_type) {
1840 		flock->fl_type = F_WRLCK;
1841 		return 0;
1842 	}
1843 
1844 	type &= ~server->vals->exclusive_lock_type;
1845 
1846 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1847 				    type | server->vals->shared_lock_type,
1848 				    1, 0, false);
1849 	if (rc == 0) {
1850 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1851 			type | server->vals->shared_lock_type, 0, 1, false);
1852 		flock->fl_type = F_RDLCK;
1853 		if (rc != 0)
1854 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1855 				 rc);
1856 	} else
1857 		flock->fl_type = F_WRLCK;
1858 
1859 	return 0;
1860 }
1861 
1862 void
1863 cifs_move_llist(struct list_head *source, struct list_head *dest)
1864 {
1865 	struct list_head *li, *tmp;
1866 	list_for_each_safe(li, tmp, source)
1867 		list_move(li, dest);
1868 }
1869 
1870 void
1871 cifs_free_llist(struct list_head *llist)
1872 {
1873 	struct cifsLockInfo *li, *tmp;
1874 	list_for_each_entry_safe(li, tmp, llist, llist) {
1875 		cifs_del_lock_waiters(li);
1876 		list_del(&li->llist);
1877 		kfree(li);
1878 	}
1879 }
1880 
1881 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1882 int
1883 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1884 		  unsigned int xid)
1885 {
1886 	int rc = 0, stored_rc;
1887 	static const int types[] = {
1888 		LOCKING_ANDX_LARGE_FILES,
1889 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1890 	};
1891 	unsigned int i;
1892 	unsigned int max_num, num, max_buf;
1893 	LOCKING_ANDX_RANGE *buf, *cur;
1894 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1895 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1896 	struct cifsLockInfo *li, *tmp;
1897 	__u64 length = cifs_flock_len(flock);
1898 	struct list_head tmp_llist;
1899 
1900 	INIT_LIST_HEAD(&tmp_llist);
1901 
1902 	/*
1903 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1904 	 * and check it before using.
1905 	 */
1906 	max_buf = tcon->ses->server->maxBuf;
1907 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1908 		return -EINVAL;
1909 
1910 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1911 		     PAGE_SIZE);
1912 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1913 			PAGE_SIZE);
1914 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1915 						sizeof(LOCKING_ANDX_RANGE);
1916 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1917 	if (!buf)
1918 		return -ENOMEM;
1919 
1920 	cifs_down_write(&cinode->lock_sem);
1921 	for (i = 0; i < 2; i++) {
1922 		cur = buf;
1923 		num = 0;
1924 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1925 			if (flock->fl_start > li->offset ||
1926 			    (flock->fl_start + length) <
1927 			    (li->offset + li->length))
1928 				continue;
1929 			if (current->tgid != li->pid)
1930 				continue;
1931 			if (types[i] != li->type)
1932 				continue;
1933 			if (cinode->can_cache_brlcks) {
1934 				/*
1935 				 * We can cache brlock requests - simply remove
1936 				 * a lock from the file's list.
1937 				 */
1938 				list_del(&li->llist);
1939 				cifs_del_lock_waiters(li);
1940 				kfree(li);
1941 				continue;
1942 			}
1943 			cur->Pid = cpu_to_le16(li->pid);
1944 			cur->LengthLow = cpu_to_le32((u32)li->length);
1945 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1946 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1947 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1948 			/*
1949 			 * We need to save a lock here to let us add it again to
1950 			 * the file's list if the unlock range request fails on
1951 			 * the server.
1952 			 */
1953 			list_move(&li->llist, &tmp_llist);
1954 			if (++num == max_num) {
1955 				stored_rc = cifs_lockv(xid, tcon,
1956 						       cfile->fid.netfid,
1957 						       li->type, num, 0, buf);
1958 				if (stored_rc) {
1959 					/*
1960 					 * We failed on the unlock range
1961 					 * request - add all locks from the tmp
1962 					 * list to the head of the file's list.
1963 					 */
1964 					cifs_move_llist(&tmp_llist,
1965 							&cfile->llist->locks);
1966 					rc = stored_rc;
1967 				} else
1968 					/*
1969 					 * The unlock range request succeed -
1970 					 * free the tmp list.
1971 					 */
1972 					cifs_free_llist(&tmp_llist);
1973 				cur = buf;
1974 				num = 0;
1975 			} else
1976 				cur++;
1977 		}
1978 		if (num) {
1979 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1980 					       types[i], num, 0, buf);
1981 			if (stored_rc) {
1982 				cifs_move_llist(&tmp_llist,
1983 						&cfile->llist->locks);
1984 				rc = stored_rc;
1985 			} else
1986 				cifs_free_llist(&tmp_llist);
1987 		}
1988 	}
1989 
1990 	up_write(&cinode->lock_sem);
1991 	kfree(buf);
1992 	return rc;
1993 }
1994 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1995 
1996 static int
1997 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1998 	   bool wait_flag, bool posix_lck, int lock, int unlock,
1999 	   unsigned int xid)
2000 {
2001 	int rc = 0;
2002 	__u64 length = cifs_flock_len(flock);
2003 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2004 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2005 	struct TCP_Server_Info *server = tcon->ses->server;
2006 	struct inode *inode = d_inode(cfile->dentry);
2007 
2008 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
2009 	if (posix_lck) {
2010 		int posix_lock_type;
2011 
2012 		rc = cifs_posix_lock_set(file, flock);
2013 		if (rc <= FILE_LOCK_DEFERRED)
2014 			return rc;
2015 
2016 		if (type & server->vals->shared_lock_type)
2017 			posix_lock_type = CIFS_RDLCK;
2018 		else
2019 			posix_lock_type = CIFS_WRLCK;
2020 
2021 		if (unlock == 1)
2022 			posix_lock_type = CIFS_UNLCK;
2023 
2024 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
2025 				      hash_lockowner(flock->fl_owner),
2026 				      flock->fl_start, length,
2027 				      NULL, posix_lock_type, wait_flag);
2028 		goto out;
2029 	}
2030 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2031 	if (lock) {
2032 		struct cifsLockInfo *lock;
2033 
2034 		lock = cifs_lock_init(flock->fl_start, length, type,
2035 				      flock->fl_flags);
2036 		if (!lock)
2037 			return -ENOMEM;
2038 
2039 		rc = cifs_lock_add_if(cfile, lock, wait_flag);
2040 		if (rc < 0) {
2041 			kfree(lock);
2042 			return rc;
2043 		}
2044 		if (!rc)
2045 			goto out;
2046 
2047 		/*
2048 		 * Windows 7 server can delay breaking lease from read to None
2049 		 * if we set a byte-range lock on a file - break it explicitly
2050 		 * before sending the lock to the server to be sure the next
2051 		 * read won't conflict with non-overlapted locks due to
2052 		 * pagereading.
2053 		 */
2054 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
2055 					CIFS_CACHE_READ(CIFS_I(inode))) {
2056 			cifs_zap_mapping(inode);
2057 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
2058 				 inode);
2059 			CIFS_I(inode)->oplock = 0;
2060 		}
2061 
2062 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
2063 					    type, 1, 0, wait_flag);
2064 		if (rc) {
2065 			kfree(lock);
2066 			return rc;
2067 		}
2068 
2069 		cifs_lock_add(cfile, lock);
2070 	} else if (unlock)
2071 		rc = server->ops->mand_unlock_range(cfile, flock, xid);
2072 
2073 out:
2074 	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
2075 		/*
2076 		 * If this is a request to remove all locks because we
2077 		 * are closing the file, it doesn't matter if the
2078 		 * unlocking failed as both cifs.ko and the SMB server
2079 		 * remove the lock on file close
2080 		 */
2081 		if (rc) {
2082 			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2083 			if (!(flock->fl_flags & FL_CLOSE))
2084 				return rc;
2085 		}
2086 		rc = locks_lock_file_wait(file, flock);
2087 	}
2088 	return rc;
2089 }
2090 
2091 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2092 {
2093 	int rc, xid;
2094 	int lock = 0, unlock = 0;
2095 	bool wait_flag = false;
2096 	bool posix_lck = false;
2097 	struct cifs_sb_info *cifs_sb;
2098 	struct cifs_tcon *tcon;
2099 	struct cifsFileInfo *cfile;
2100 	__u32 type;
2101 
2102 	xid = get_xid();
2103 
2104 	if (!(fl->fl_flags & FL_FLOCK)) {
2105 		rc = -ENOLCK;
2106 		free_xid(xid);
2107 		return rc;
2108 	}
2109 
2110 	cfile = (struct cifsFileInfo *)file->private_data;
2111 	tcon = tlink_tcon(cfile->tlink);
2112 
2113 	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2114 			tcon->ses->server);
2115 	cifs_sb = CIFS_FILE_SB(file);
2116 
2117 	if (cap_unix(tcon->ses) &&
2118 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2119 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2120 		posix_lck = true;
2121 
2122 	if (!lock && !unlock) {
2123 		/*
2124 		 * if no lock or unlock then nothing to do since we do not
2125 		 * know what it is
2126 		 */
2127 		rc = -EOPNOTSUPP;
2128 		free_xid(xid);
2129 		return rc;
2130 	}
2131 
2132 	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2133 			xid);
2134 	free_xid(xid);
2135 	return rc;
2136 
2137 
2138 }
2139 
2140 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2141 {
2142 	int rc, xid;
2143 	int lock = 0, unlock = 0;
2144 	bool wait_flag = false;
2145 	bool posix_lck = false;
2146 	struct cifs_sb_info *cifs_sb;
2147 	struct cifs_tcon *tcon;
2148 	struct cifsFileInfo *cfile;
2149 	__u32 type;
2150 
2151 	rc = -EACCES;
2152 	xid = get_xid();
2153 
2154 	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2155 		 flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2156 		 (long long)flock->fl_end);
2157 
2158 	cfile = (struct cifsFileInfo *)file->private_data;
2159 	tcon = tlink_tcon(cfile->tlink);
2160 
2161 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2162 			tcon->ses->server);
2163 	cifs_sb = CIFS_FILE_SB(file);
2164 	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2165 
2166 	if (cap_unix(tcon->ses) &&
2167 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2168 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2169 		posix_lck = true;
2170 	/*
2171 	 * BB add code here to normalize offset and length to account for
2172 	 * negative length which we can not accept over the wire.
2173 	 */
2174 	if (IS_GETLK(cmd)) {
2175 		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2176 		free_xid(xid);
2177 		return rc;
2178 	}
2179 
2180 	if (!lock && !unlock) {
2181 		/*
2182 		 * if no lock or unlock then nothing to do since we do not
2183 		 * know what it is
2184 		 */
2185 		free_xid(xid);
2186 		return -EOPNOTSUPP;
2187 	}
2188 
2189 	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2190 			xid);
2191 	free_xid(xid);
2192 	return rc;
2193 }
2194 
2195 /*
2196  * update the file size (if needed) after a write. Should be called with
2197  * the inode->i_lock held
2198  */
2199 void
2200 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2201 		      unsigned int bytes_written)
2202 {
2203 	loff_t end_of_write = offset + bytes_written;
2204 
2205 	if (end_of_write > cifsi->server_eof)
2206 		cifsi->server_eof = end_of_write;
2207 }
2208 
2209 static ssize_t
2210 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2211 	   size_t write_size, loff_t *offset)
2212 {
2213 	int rc = 0;
2214 	unsigned int bytes_written = 0;
2215 	unsigned int total_written;
2216 	struct cifs_tcon *tcon;
2217 	struct TCP_Server_Info *server;
2218 	unsigned int xid;
2219 	struct dentry *dentry = open_file->dentry;
2220 	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2221 	struct cifs_io_parms io_parms = {0};
2222 
2223 	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2224 		 write_size, *offset, dentry);
2225 
2226 	tcon = tlink_tcon(open_file->tlink);
2227 	server = tcon->ses->server;
2228 
2229 	if (!server->ops->sync_write)
2230 		return -ENOSYS;
2231 
2232 	xid = get_xid();
2233 
2234 	for (total_written = 0; write_size > total_written;
2235 	     total_written += bytes_written) {
2236 		rc = -EAGAIN;
2237 		while (rc == -EAGAIN) {
2238 			struct kvec iov[2];
2239 			unsigned int len;
2240 
2241 			if (open_file->invalidHandle) {
2242 				/* we could deadlock if we called
2243 				   filemap_fdatawait from here so tell
2244 				   reopen_file not to flush data to
2245 				   server now */
2246 				rc = cifs_reopen_file(open_file, false);
2247 				if (rc != 0)
2248 					break;
2249 			}
2250 
2251 			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2252 				  (unsigned int)write_size - total_written);
2253 			/* iov[0] is reserved for smb header */
2254 			iov[1].iov_base = (char *)write_data + total_written;
2255 			iov[1].iov_len = len;
2256 			io_parms.pid = pid;
2257 			io_parms.tcon = tcon;
2258 			io_parms.offset = *offset;
2259 			io_parms.length = len;
2260 			rc = server->ops->sync_write(xid, &open_file->fid,
2261 					&io_parms, &bytes_written, iov, 1);
2262 		}
2263 		if (rc || (bytes_written == 0)) {
2264 			if (total_written)
2265 				break;
2266 			else {
2267 				free_xid(xid);
2268 				return rc;
2269 			}
2270 		} else {
2271 			spin_lock(&d_inode(dentry)->i_lock);
2272 			cifs_update_eof(cifsi, *offset, bytes_written);
2273 			spin_unlock(&d_inode(dentry)->i_lock);
2274 			*offset += bytes_written;
2275 		}
2276 	}
2277 
2278 	cifs_stats_bytes_written(tcon, total_written);
2279 
2280 	if (total_written > 0) {
2281 		spin_lock(&d_inode(dentry)->i_lock);
2282 		if (*offset > d_inode(dentry)->i_size) {
2283 			i_size_write(d_inode(dentry), *offset);
2284 			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2285 		}
2286 		spin_unlock(&d_inode(dentry)->i_lock);
2287 	}
2288 	mark_inode_dirty_sync(d_inode(dentry));
2289 	free_xid(xid);
2290 	return total_written;
2291 }
2292 
2293 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2294 					bool fsuid_only)
2295 {
2296 	struct cifsFileInfo *open_file = NULL;
2297 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2298 
2299 	/* only filter by fsuid on multiuser mounts */
2300 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2301 		fsuid_only = false;
2302 
2303 	spin_lock(&cifs_inode->open_file_lock);
2304 	/* we could simply get the first_list_entry since write-only entries
2305 	   are always at the end of the list but since the first entry might
2306 	   have a close pending, we go through the whole list */
2307 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2308 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2309 			continue;
2310 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2311 			if ((!open_file->invalidHandle)) {
2312 				/* found a good file */
2313 				/* lock it so it will not be closed on us */
2314 				cifsFileInfo_get(open_file);
2315 				spin_unlock(&cifs_inode->open_file_lock);
2316 				return open_file;
2317 			} /* else might as well continue, and look for
2318 			     another, or simply have the caller reopen it
2319 			     again rather than trying to fix this handle */
2320 		} else /* write only file */
2321 			break; /* write only files are last so must be done */
2322 	}
2323 	spin_unlock(&cifs_inode->open_file_lock);
2324 	return NULL;
2325 }
2326 
2327 /* Return -EBADF if no handle is found and general rc otherwise */
2328 int
2329 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2330 		       struct cifsFileInfo **ret_file)
2331 {
2332 	struct cifsFileInfo *open_file, *inv_file = NULL;
2333 	struct cifs_sb_info *cifs_sb;
2334 	bool any_available = false;
2335 	int rc = -EBADF;
2336 	unsigned int refind = 0;
2337 	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2338 	bool with_delete = flags & FIND_WR_WITH_DELETE;
2339 	*ret_file = NULL;
2340 
2341 	/*
2342 	 * Having a null inode here (because mapping->host was set to zero by
2343 	 * the VFS or MM) should not happen but we had reports of on oops (due
2344 	 * to it being zero) during stress testcases so we need to check for it
2345 	 */
2346 
2347 	if (cifs_inode == NULL) {
2348 		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2349 		dump_stack();
2350 		return rc;
2351 	}
2352 
2353 	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2354 
2355 	/* only filter by fsuid on multiuser mounts */
2356 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2357 		fsuid_only = false;
2358 
2359 	spin_lock(&cifs_inode->open_file_lock);
2360 refind_writable:
2361 	if (refind > MAX_REOPEN_ATT) {
2362 		spin_unlock(&cifs_inode->open_file_lock);
2363 		return rc;
2364 	}
2365 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2366 		if (!any_available && open_file->pid != current->tgid)
2367 			continue;
2368 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2369 			continue;
2370 		if (with_delete && !(open_file->fid.access & DELETE))
2371 			continue;
2372 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2373 			if (!open_file->invalidHandle) {
2374 				/* found a good writable file */
2375 				cifsFileInfo_get(open_file);
2376 				spin_unlock(&cifs_inode->open_file_lock);
2377 				*ret_file = open_file;
2378 				return 0;
2379 			} else {
2380 				if (!inv_file)
2381 					inv_file = open_file;
2382 			}
2383 		}
2384 	}
2385 	/* couldn't find useable FH with same pid, try any available */
2386 	if (!any_available) {
2387 		any_available = true;
2388 		goto refind_writable;
2389 	}
2390 
2391 	if (inv_file) {
2392 		any_available = false;
2393 		cifsFileInfo_get(inv_file);
2394 	}
2395 
2396 	spin_unlock(&cifs_inode->open_file_lock);
2397 
2398 	if (inv_file) {
2399 		rc = cifs_reopen_file(inv_file, false);
2400 		if (!rc) {
2401 			*ret_file = inv_file;
2402 			return 0;
2403 		}
2404 
2405 		spin_lock(&cifs_inode->open_file_lock);
2406 		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2407 		spin_unlock(&cifs_inode->open_file_lock);
2408 		cifsFileInfo_put(inv_file);
2409 		++refind;
2410 		inv_file = NULL;
2411 		spin_lock(&cifs_inode->open_file_lock);
2412 		goto refind_writable;
2413 	}
2414 
2415 	return rc;
2416 }
2417 
2418 struct cifsFileInfo *
2419 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2420 {
2421 	struct cifsFileInfo *cfile;
2422 	int rc;
2423 
2424 	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2425 	if (rc)
2426 		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2427 
2428 	return cfile;
2429 }
2430 
2431 int
2432 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2433 		       int flags,
2434 		       struct cifsFileInfo **ret_file)
2435 {
2436 	struct cifsFileInfo *cfile;
2437 	void *page = alloc_dentry_path();
2438 
2439 	*ret_file = NULL;
2440 
2441 	spin_lock(&tcon->open_file_lock);
2442 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2443 		struct cifsInodeInfo *cinode;
2444 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2445 		if (IS_ERR(full_path)) {
2446 			spin_unlock(&tcon->open_file_lock);
2447 			free_dentry_path(page);
2448 			return PTR_ERR(full_path);
2449 		}
2450 		if (strcmp(full_path, name))
2451 			continue;
2452 
2453 		cinode = CIFS_I(d_inode(cfile->dentry));
2454 		spin_unlock(&tcon->open_file_lock);
2455 		free_dentry_path(page);
2456 		return cifs_get_writable_file(cinode, flags, ret_file);
2457 	}
2458 
2459 	spin_unlock(&tcon->open_file_lock);
2460 	free_dentry_path(page);
2461 	return -ENOENT;
2462 }
2463 
2464 int
2465 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2466 		       struct cifsFileInfo **ret_file)
2467 {
2468 	struct cifsFileInfo *cfile;
2469 	void *page = alloc_dentry_path();
2470 
2471 	*ret_file = NULL;
2472 
2473 	spin_lock(&tcon->open_file_lock);
2474 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2475 		struct cifsInodeInfo *cinode;
2476 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2477 		if (IS_ERR(full_path)) {
2478 			spin_unlock(&tcon->open_file_lock);
2479 			free_dentry_path(page);
2480 			return PTR_ERR(full_path);
2481 		}
2482 		if (strcmp(full_path, name))
2483 			continue;
2484 
2485 		cinode = CIFS_I(d_inode(cfile->dentry));
2486 		spin_unlock(&tcon->open_file_lock);
2487 		free_dentry_path(page);
2488 		*ret_file = find_readable_file(cinode, 0);
2489 		return *ret_file ? 0 : -ENOENT;
2490 	}
2491 
2492 	spin_unlock(&tcon->open_file_lock);
2493 	free_dentry_path(page);
2494 	return -ENOENT;
2495 }
2496 
2497 void
2498 cifs_writedata_release(struct kref *refcount)
2499 {
2500 	struct cifs_writedata *wdata = container_of(refcount,
2501 					struct cifs_writedata, refcount);
2502 #ifdef CONFIG_CIFS_SMB_DIRECT
2503 	if (wdata->mr) {
2504 		smbd_deregister_mr(wdata->mr);
2505 		wdata->mr = NULL;
2506 	}
2507 #endif
2508 
2509 	if (wdata->cfile)
2510 		cifsFileInfo_put(wdata->cfile);
2511 
2512 	kfree(wdata);
2513 }
2514 
2515 /*
2516  * Write failed with a retryable error. Resend the write request. It's also
2517  * possible that the page was redirtied so re-clean the page.
2518  */
2519 static void
2520 cifs_writev_requeue(struct cifs_writedata *wdata)
2521 {
2522 	int rc = 0;
2523 	struct inode *inode = d_inode(wdata->cfile->dentry);
2524 	struct TCP_Server_Info *server;
2525 	unsigned int rest_len = wdata->bytes;
2526 	loff_t fpos = wdata->offset;
2527 
2528 	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2529 	do {
2530 		struct cifs_writedata *wdata2;
2531 		unsigned int wsize, cur_len;
2532 
2533 		wsize = server->ops->wp_retry_size(inode);
2534 		if (wsize < rest_len) {
2535 			if (wsize < PAGE_SIZE) {
2536 				rc = -EOPNOTSUPP;
2537 				break;
2538 			}
2539 			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2540 		} else {
2541 			cur_len = rest_len;
2542 		}
2543 
2544 		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2545 		if (!wdata2) {
2546 			rc = -ENOMEM;
2547 			break;
2548 		}
2549 
2550 		wdata2->sync_mode = wdata->sync_mode;
2551 		wdata2->offset	= fpos;
2552 		wdata2->bytes	= cur_len;
2553 		wdata2->iter	= wdata->iter;
2554 
2555 		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2556 		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2557 
2558 		if (iov_iter_is_xarray(&wdata2->iter))
2559 			/* Check for pages having been redirtied and clean
2560 			 * them.  We can do this by walking the xarray.  If
2561 			 * it's not an xarray, then it's a DIO and we shouldn't
2562 			 * be mucking around with the page bits.
2563 			 */
2564 			cifs_undirty_folios(inode, fpos, cur_len);
2565 
2566 		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2567 					    &wdata2->cfile);
2568 		if (!wdata2->cfile) {
2569 			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2570 				 rc);
2571 			if (!is_retryable_error(rc))
2572 				rc = -EBADF;
2573 		} else {
2574 			wdata2->pid = wdata2->cfile->pid;
2575 			rc = server->ops->async_writev(wdata2,
2576 						       cifs_writedata_release);
2577 		}
2578 
2579 		kref_put(&wdata2->refcount, cifs_writedata_release);
2580 		if (rc) {
2581 			if (is_retryable_error(rc))
2582 				continue;
2583 			fpos += cur_len;
2584 			rest_len -= cur_len;
2585 			break;
2586 		}
2587 
2588 		fpos += cur_len;
2589 		rest_len -= cur_len;
2590 	} while (rest_len > 0);
2591 
2592 	/* Clean up remaining pages from the original wdata */
2593 	if (iov_iter_is_xarray(&wdata->iter))
2594 		cifs_pages_write_failed(inode, fpos, rest_len);
2595 
2596 	if (rc != 0 && !is_retryable_error(rc))
2597 		mapping_set_error(inode->i_mapping, rc);
2598 	kref_put(&wdata->refcount, cifs_writedata_release);
2599 }
2600 
2601 void
2602 cifs_writev_complete(struct work_struct *work)
2603 {
2604 	struct cifs_writedata *wdata = container_of(work,
2605 						struct cifs_writedata, work);
2606 	struct inode *inode = d_inode(wdata->cfile->dentry);
2607 
2608 	if (wdata->result == 0) {
2609 		spin_lock(&inode->i_lock);
2610 		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2611 		spin_unlock(&inode->i_lock);
2612 		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2613 					 wdata->bytes);
2614 	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2615 		return cifs_writev_requeue(wdata);
2616 
2617 	if (wdata->result == -EAGAIN)
2618 		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2619 	else if (wdata->result < 0)
2620 		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2621 	else
2622 		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2623 
2624 	if (wdata->result != -EAGAIN)
2625 		mapping_set_error(inode->i_mapping, wdata->result);
2626 	kref_put(&wdata->refcount, cifs_writedata_release);
2627 }
2628 
2629 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2630 {
2631 	struct cifs_writedata *wdata;
2632 
2633 	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2634 	if (wdata != NULL) {
2635 		kref_init(&wdata->refcount);
2636 		INIT_LIST_HEAD(&wdata->list);
2637 		init_completion(&wdata->done);
2638 		INIT_WORK(&wdata->work, complete);
2639 	}
2640 	return wdata;
2641 }
2642 
2643 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2644 {
2645 	struct address_space *mapping = page->mapping;
2646 	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2647 	char *write_data;
2648 	int rc = -EFAULT;
2649 	int bytes_written = 0;
2650 	struct inode *inode;
2651 	struct cifsFileInfo *open_file;
2652 
2653 	if (!mapping || !mapping->host)
2654 		return -EFAULT;
2655 
2656 	inode = page->mapping->host;
2657 
2658 	offset += (loff_t)from;
2659 	write_data = kmap(page);
2660 	write_data += from;
2661 
2662 	if ((to > PAGE_SIZE) || (from > to)) {
2663 		kunmap(page);
2664 		return -EIO;
2665 	}
2666 
2667 	/* racing with truncate? */
2668 	if (offset > mapping->host->i_size) {
2669 		kunmap(page);
2670 		return 0; /* don't care */
2671 	}
2672 
2673 	/* check to make sure that we are not extending the file */
2674 	if (mapping->host->i_size - offset < (loff_t)to)
2675 		to = (unsigned)(mapping->host->i_size - offset);
2676 
2677 	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2678 				    &open_file);
2679 	if (!rc) {
2680 		bytes_written = cifs_write(open_file, open_file->pid,
2681 					   write_data, to - from, &offset);
2682 		cifsFileInfo_put(open_file);
2683 		/* Does mm or vfs already set times? */
2684 		simple_inode_init_ts(inode);
2685 		if ((bytes_written > 0) && (offset))
2686 			rc = 0;
2687 		else if (bytes_written < 0)
2688 			rc = bytes_written;
2689 		else
2690 			rc = -EFAULT;
2691 	} else {
2692 		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2693 		if (!is_retryable_error(rc))
2694 			rc = -EIO;
2695 	}
2696 
2697 	kunmap(page);
2698 	return rc;
2699 }
2700 
2701 /*
2702  * Extend the region to be written back to include subsequent contiguously
2703  * dirty pages if possible, but don't sleep while doing so.
2704  */
2705 static void cifs_extend_writeback(struct address_space *mapping,
2706 				  struct xa_state *xas,
2707 				  long *_count,
2708 				  loff_t start,
2709 				  int max_pages,
2710 				  loff_t max_len,
2711 				  size_t *_len)
2712 {
2713 	struct folio_batch batch;
2714 	struct folio *folio;
2715 	unsigned int nr_pages;
2716 	pgoff_t index = (start + *_len) / PAGE_SIZE;
2717 	size_t len;
2718 	bool stop = true;
2719 	unsigned int i;
2720 
2721 	folio_batch_init(&batch);
2722 
2723 	do {
2724 		/* Firstly, we gather up a batch of contiguous dirty pages
2725 		 * under the RCU read lock - but we can't clear the dirty flags
2726 		 * there if any of those pages are mapped.
2727 		 */
2728 		rcu_read_lock();
2729 
2730 		xas_for_each(xas, folio, ULONG_MAX) {
2731 			stop = true;
2732 			if (xas_retry(xas, folio))
2733 				continue;
2734 			if (xa_is_value(folio))
2735 				break;
2736 			if (folio->index != index) {
2737 				xas_reset(xas);
2738 				break;
2739 			}
2740 
2741 			if (!folio_try_get_rcu(folio)) {
2742 				xas_reset(xas);
2743 				continue;
2744 			}
2745 			nr_pages = folio_nr_pages(folio);
2746 			if (nr_pages > max_pages) {
2747 				xas_reset(xas);
2748 				break;
2749 			}
2750 
2751 			/* Has the page moved or been split? */
2752 			if (unlikely(folio != xas_reload(xas))) {
2753 				folio_put(folio);
2754 				xas_reset(xas);
2755 				break;
2756 			}
2757 
2758 			if (!folio_trylock(folio)) {
2759 				folio_put(folio);
2760 				xas_reset(xas);
2761 				break;
2762 			}
2763 			if (!folio_test_dirty(folio) ||
2764 			    folio_test_writeback(folio)) {
2765 				folio_unlock(folio);
2766 				folio_put(folio);
2767 				xas_reset(xas);
2768 				break;
2769 			}
2770 
2771 			max_pages -= nr_pages;
2772 			len = folio_size(folio);
2773 			stop = false;
2774 
2775 			index += nr_pages;
2776 			*_count -= nr_pages;
2777 			*_len += len;
2778 			if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2779 				stop = true;
2780 
2781 			if (!folio_batch_add(&batch, folio))
2782 				break;
2783 			if (stop)
2784 				break;
2785 		}
2786 
2787 		xas_pause(xas);
2788 		rcu_read_unlock();
2789 
2790 		/* Now, if we obtained any pages, we can shift them to being
2791 		 * writable and mark them for caching.
2792 		 */
2793 		if (!folio_batch_count(&batch))
2794 			break;
2795 
2796 		for (i = 0; i < folio_batch_count(&batch); i++) {
2797 			folio = batch.folios[i];
2798 			/* The folio should be locked, dirty and not undergoing
2799 			 * writeback from the loop above.
2800 			 */
2801 			if (!folio_clear_dirty_for_io(folio))
2802 				WARN_ON(1);
2803 			folio_start_writeback(folio);
2804 			folio_unlock(folio);
2805 		}
2806 
2807 		folio_batch_release(&batch);
2808 		cond_resched();
2809 	} while (!stop);
2810 }
2811 
2812 /*
2813  * Write back the locked page and any subsequent non-locked dirty pages.
2814  */
2815 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2816 						 struct writeback_control *wbc,
2817 						 struct xa_state *xas,
2818 						 struct folio *folio,
2819 						 unsigned long long start,
2820 						 unsigned long long end)
2821 {
2822 	struct inode *inode = mapping->host;
2823 	struct TCP_Server_Info *server;
2824 	struct cifs_writedata *wdata;
2825 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2826 	struct cifs_credits credits_on_stack;
2827 	struct cifs_credits *credits = &credits_on_stack;
2828 	struct cifsFileInfo *cfile = NULL;
2829 	unsigned long long i_size = i_size_read(inode), max_len;
2830 	unsigned int xid, wsize;
2831 	size_t len = folio_size(folio);
2832 	long count = wbc->nr_to_write;
2833 	int rc;
2834 
2835 	/* The folio should be locked, dirty and not undergoing writeback. */
2836 	if (!folio_clear_dirty_for_io(folio))
2837 		WARN_ON_ONCE(1);
2838 	folio_start_writeback(folio);
2839 
2840 	count -= folio_nr_pages(folio);
2841 
2842 	xid = get_xid();
2843 	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2844 
2845 	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2846 	if (rc) {
2847 		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2848 		goto err_xid;
2849 	}
2850 
2851 	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2852 					   &wsize, credits);
2853 	if (rc != 0)
2854 		goto err_close;
2855 
2856 	wdata = cifs_writedata_alloc(cifs_writev_complete);
2857 	if (!wdata) {
2858 		rc = -ENOMEM;
2859 		goto err_uncredit;
2860 	}
2861 
2862 	wdata->sync_mode = wbc->sync_mode;
2863 	wdata->offset = folio_pos(folio);
2864 	wdata->pid = cfile->pid;
2865 	wdata->credits = credits_on_stack;
2866 	wdata->cfile = cfile;
2867 	wdata->server = server;
2868 	cfile = NULL;
2869 
2870 	/* Find all consecutive lockable dirty pages that have contiguous
2871 	 * written regions, stopping when we find a page that is not
2872 	 * immediately lockable, is not dirty or is missing, or we reach the
2873 	 * end of the range.
2874 	 */
2875 	if (start < i_size) {
2876 		/* Trim the write to the EOF; the extra data is ignored.  Also
2877 		 * put an upper limit on the size of a single storedata op.
2878 		 */
2879 		max_len = wsize;
2880 		max_len = min_t(unsigned long long, max_len, end - start + 1);
2881 		max_len = min_t(unsigned long long, max_len, i_size - start);
2882 
2883 		if (len < max_len) {
2884 			int max_pages = INT_MAX;
2885 
2886 #ifdef CONFIG_CIFS_SMB_DIRECT
2887 			if (server->smbd_conn)
2888 				max_pages = server->smbd_conn->max_frmr_depth;
2889 #endif
2890 			max_pages -= folio_nr_pages(folio);
2891 
2892 			if (max_pages > 0)
2893 				cifs_extend_writeback(mapping, xas, &count, start,
2894 						      max_pages, max_len, &len);
2895 		}
2896 	}
2897 	len = min_t(unsigned long long, len, i_size - start);
2898 
2899 	/* We now have a contiguous set of dirty pages, each with writeback
2900 	 * set; the first page is still locked at this point, but all the rest
2901 	 * have been unlocked.
2902 	 */
2903 	folio_unlock(folio);
2904 	wdata->bytes = len;
2905 
2906 	if (start < i_size) {
2907 		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2908 				start, len);
2909 
2910 		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2911 		if (rc)
2912 			goto err_wdata;
2913 
2914 		if (wdata->cfile->invalidHandle)
2915 			rc = -EAGAIN;
2916 		else
2917 			rc = wdata->server->ops->async_writev(wdata,
2918 							      cifs_writedata_release);
2919 		if (rc >= 0) {
2920 			kref_put(&wdata->refcount, cifs_writedata_release);
2921 			goto err_close;
2922 		}
2923 	} else {
2924 		/* The dirty region was entirely beyond the EOF. */
2925 		cifs_pages_written_back(inode, start, len);
2926 		rc = 0;
2927 	}
2928 
2929 err_wdata:
2930 	kref_put(&wdata->refcount, cifs_writedata_release);
2931 err_uncredit:
2932 	add_credits_and_wake_if(server, credits, 0);
2933 err_close:
2934 	if (cfile)
2935 		cifsFileInfo_put(cfile);
2936 err_xid:
2937 	free_xid(xid);
2938 	if (rc == 0) {
2939 		wbc->nr_to_write = count;
2940 		rc = len;
2941 	} else if (is_retryable_error(rc)) {
2942 		cifs_pages_write_redirty(inode, start, len);
2943 	} else {
2944 		cifs_pages_write_failed(inode, start, len);
2945 		mapping_set_error(mapping, rc);
2946 	}
2947 	/* Indication to update ctime and mtime as close is deferred */
2948 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2949 	return rc;
2950 }
2951 
2952 /*
2953  * write a region of pages back to the server
2954  */
2955 static ssize_t cifs_writepages_begin(struct address_space *mapping,
2956 				     struct writeback_control *wbc,
2957 				     struct xa_state *xas,
2958 				     unsigned long long *_start,
2959 				     unsigned long long end)
2960 {
2961 	struct folio *folio;
2962 	unsigned long long start = *_start;
2963 	ssize_t ret;
2964 	int skips = 0;
2965 
2966 search_again:
2967 	/* Find the first dirty page. */
2968 	rcu_read_lock();
2969 
2970 	for (;;) {
2971 		folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2972 		if (xas_retry(xas, folio) || xa_is_value(folio))
2973 			continue;
2974 		if (!folio)
2975 			break;
2976 
2977 		if (!folio_try_get_rcu(folio)) {
2978 			xas_reset(xas);
2979 			continue;
2980 		}
2981 
2982 		if (unlikely(folio != xas_reload(xas))) {
2983 			folio_put(folio);
2984 			xas_reset(xas);
2985 			continue;
2986 		}
2987 
2988 		xas_pause(xas);
2989 		break;
2990 	}
2991 	rcu_read_unlock();
2992 	if (!folio)
2993 		return 0;
2994 
2995 	start = folio_pos(folio); /* May regress with THPs */
2996 
2997 	/* At this point we hold neither the i_pages lock nor the page lock:
2998 	 * the page may be truncated or invalidated (changing page->mapping to
2999 	 * NULL), or even swizzled back from swapper_space to tmpfs file
3000 	 * mapping
3001 	 */
3002 lock_again:
3003 	if (wbc->sync_mode != WB_SYNC_NONE) {
3004 		ret = folio_lock_killable(folio);
3005 		if (ret < 0)
3006 			return ret;
3007 	} else {
3008 		if (!folio_trylock(folio))
3009 			goto search_again;
3010 	}
3011 
3012 	if (folio->mapping != mapping ||
3013 	    !folio_test_dirty(folio)) {
3014 		start += folio_size(folio);
3015 		folio_unlock(folio);
3016 		goto search_again;
3017 	}
3018 
3019 	if (folio_test_writeback(folio) ||
3020 	    folio_test_fscache(folio)) {
3021 		folio_unlock(folio);
3022 		if (wbc->sync_mode != WB_SYNC_NONE) {
3023 			folio_wait_writeback(folio);
3024 #ifdef CONFIG_CIFS_FSCACHE
3025 			folio_wait_fscache(folio);
3026 #endif
3027 			goto lock_again;
3028 		}
3029 
3030 		start += folio_size(folio);
3031 		if (wbc->sync_mode == WB_SYNC_NONE) {
3032 			if (skips >= 5 || need_resched()) {
3033 				ret = 0;
3034 				goto out;
3035 			}
3036 			skips++;
3037 		}
3038 		goto search_again;
3039 	}
3040 
3041 	ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
3042 out:
3043 	if (ret > 0)
3044 		*_start = start + ret;
3045 	return ret;
3046 }
3047 
3048 /*
3049  * Write a region of pages back to the server
3050  */
3051 static int cifs_writepages_region(struct address_space *mapping,
3052 				  struct writeback_control *wbc,
3053 				  unsigned long long *_start,
3054 				  unsigned long long end)
3055 {
3056 	ssize_t ret;
3057 
3058 	XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
3059 
3060 	do {
3061 		ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
3062 		if (ret > 0 && wbc->nr_to_write > 0)
3063 			cond_resched();
3064 	} while (ret > 0 && wbc->nr_to_write > 0);
3065 
3066 	return ret > 0 ? 0 : ret;
3067 }
3068 
3069 /*
3070  * Write some of the pending data back to the server
3071  */
3072 static int cifs_writepages(struct address_space *mapping,
3073 			   struct writeback_control *wbc)
3074 {
3075 	loff_t start, end;
3076 	int ret;
3077 
3078 	/* We have to be careful as we can end up racing with setattr()
3079 	 * truncating the pagecache since the caller doesn't take a lock here
3080 	 * to prevent it.
3081 	 */
3082 
3083 	if (wbc->range_cyclic && mapping->writeback_index) {
3084 		start = mapping->writeback_index * PAGE_SIZE;
3085 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3086 		if (ret < 0)
3087 			goto out;
3088 
3089 		if (wbc->nr_to_write <= 0) {
3090 			mapping->writeback_index = start / PAGE_SIZE;
3091 			goto out;
3092 		}
3093 
3094 		start = 0;
3095 		end = mapping->writeback_index * PAGE_SIZE;
3096 		mapping->writeback_index = 0;
3097 		ret = cifs_writepages_region(mapping, wbc, &start, end);
3098 		if (ret == 0)
3099 			mapping->writeback_index = start / PAGE_SIZE;
3100 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3101 		start = 0;
3102 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3103 		if (wbc->nr_to_write > 0 && ret == 0)
3104 			mapping->writeback_index = start / PAGE_SIZE;
3105 	} else {
3106 		start = wbc->range_start;
3107 		ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3108 	}
3109 
3110 out:
3111 	return ret;
3112 }
3113 
3114 static int
3115 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3116 {
3117 	int rc;
3118 	unsigned int xid;
3119 
3120 	xid = get_xid();
3121 /* BB add check for wbc flags */
3122 	get_page(page);
3123 	if (!PageUptodate(page))
3124 		cifs_dbg(FYI, "ppw - page not up to date\n");
3125 
3126 	/*
3127 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3128 	 *
3129 	 * A writepage() implementation always needs to do either this,
3130 	 * or re-dirty the page with "redirty_page_for_writepage()" in
3131 	 * the case of a failure.
3132 	 *
3133 	 * Just unlocking the page will cause the radix tree tag-bits
3134 	 * to fail to update with the state of the page correctly.
3135 	 */
3136 	set_page_writeback(page);
3137 retry_write:
3138 	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3139 	if (is_retryable_error(rc)) {
3140 		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3141 			goto retry_write;
3142 		redirty_page_for_writepage(wbc, page);
3143 	} else if (rc != 0) {
3144 		SetPageError(page);
3145 		mapping_set_error(page->mapping, rc);
3146 	} else {
3147 		SetPageUptodate(page);
3148 	}
3149 	end_page_writeback(page);
3150 	put_page(page);
3151 	free_xid(xid);
3152 	return rc;
3153 }
3154 
3155 static int cifs_write_end(struct file *file, struct address_space *mapping,
3156 			loff_t pos, unsigned len, unsigned copied,
3157 			struct page *page, void *fsdata)
3158 {
3159 	int rc;
3160 	struct inode *inode = mapping->host;
3161 	struct cifsFileInfo *cfile = file->private_data;
3162 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3163 	struct folio *folio = page_folio(page);
3164 	__u32 pid;
3165 
3166 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3167 		pid = cfile->pid;
3168 	else
3169 		pid = current->tgid;
3170 
3171 	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3172 		 page, pos, copied);
3173 
3174 	if (folio_test_checked(folio)) {
3175 		if (copied == len)
3176 			folio_mark_uptodate(folio);
3177 		folio_clear_checked(folio);
3178 	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3179 		folio_mark_uptodate(folio);
3180 
3181 	if (!folio_test_uptodate(folio)) {
3182 		char *page_data;
3183 		unsigned offset = pos & (PAGE_SIZE - 1);
3184 		unsigned int xid;
3185 
3186 		xid = get_xid();
3187 		/* this is probably better than directly calling
3188 		   partialpage_write since in this function the file handle is
3189 		   known which we might as well	leverage */
3190 		/* BB check if anything else missing out of ppw
3191 		   such as updating last write time */
3192 		page_data = kmap(page);
3193 		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3194 		/* if (rc < 0) should we set writebehind rc? */
3195 		kunmap(page);
3196 
3197 		free_xid(xid);
3198 	} else {
3199 		rc = copied;
3200 		pos += copied;
3201 		set_page_dirty(page);
3202 	}
3203 
3204 	if (rc > 0) {
3205 		spin_lock(&inode->i_lock);
3206 		if (pos > inode->i_size) {
3207 			loff_t additional_blocks = (512 - 1 + copied) >> 9;
3208 
3209 			i_size_write(inode, pos);
3210 			/*
3211 			 * Estimate new allocation size based on the amount written.
3212 			 * This will be updated from server on close (and on queryinfo)
3213 			 */
3214 			inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9,
3215 						inode->i_blocks + additional_blocks);
3216 		}
3217 		spin_unlock(&inode->i_lock);
3218 	}
3219 
3220 	unlock_page(page);
3221 	put_page(page);
3222 	/* Indication to update ctime and mtime as close is deferred */
3223 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3224 
3225 	return rc;
3226 }
3227 
3228 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3229 		      int datasync)
3230 {
3231 	unsigned int xid;
3232 	int rc = 0;
3233 	struct cifs_tcon *tcon;
3234 	struct TCP_Server_Info *server;
3235 	struct cifsFileInfo *smbfile = file->private_data;
3236 	struct inode *inode = file_inode(file);
3237 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3238 
3239 	rc = file_write_and_wait_range(file, start, end);
3240 	if (rc) {
3241 		trace_cifs_fsync_err(inode->i_ino, rc);
3242 		return rc;
3243 	}
3244 
3245 	xid = get_xid();
3246 
3247 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3248 		 file, datasync);
3249 
3250 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3251 		rc = cifs_zap_mapping(inode);
3252 		if (rc) {
3253 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3254 			rc = 0; /* don't care about it in fsync */
3255 		}
3256 	}
3257 
3258 	tcon = tlink_tcon(smbfile->tlink);
3259 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3260 		server = tcon->ses->server;
3261 		if (server->ops->flush == NULL) {
3262 			rc = -ENOSYS;
3263 			goto strict_fsync_exit;
3264 		}
3265 
3266 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3267 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3268 			if (smbfile) {
3269 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3270 				cifsFileInfo_put(smbfile);
3271 			} else
3272 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3273 		} else
3274 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3275 	}
3276 
3277 strict_fsync_exit:
3278 	free_xid(xid);
3279 	return rc;
3280 }
3281 
3282 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3283 {
3284 	unsigned int xid;
3285 	int rc = 0;
3286 	struct cifs_tcon *tcon;
3287 	struct TCP_Server_Info *server;
3288 	struct cifsFileInfo *smbfile = file->private_data;
3289 	struct inode *inode = file_inode(file);
3290 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3291 
3292 	rc = file_write_and_wait_range(file, start, end);
3293 	if (rc) {
3294 		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3295 		return rc;
3296 	}
3297 
3298 	xid = get_xid();
3299 
3300 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3301 		 file, datasync);
3302 
3303 	tcon = tlink_tcon(smbfile->tlink);
3304 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3305 		server = tcon->ses->server;
3306 		if (server->ops->flush == NULL) {
3307 			rc = -ENOSYS;
3308 			goto fsync_exit;
3309 		}
3310 
3311 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3312 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3313 			if (smbfile) {
3314 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3315 				cifsFileInfo_put(smbfile);
3316 			} else
3317 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3318 		} else
3319 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3320 	}
3321 
3322 fsync_exit:
3323 	free_xid(xid);
3324 	return rc;
3325 }
3326 
3327 /*
3328  * As file closes, flush all cached write data for this inode checking
3329  * for write behind errors.
3330  */
3331 int cifs_flush(struct file *file, fl_owner_t id)
3332 {
3333 	struct inode *inode = file_inode(file);
3334 	int rc = 0;
3335 
3336 	if (file->f_mode & FMODE_WRITE)
3337 		rc = filemap_write_and_wait(inode->i_mapping);
3338 
3339 	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3340 	if (rc) {
3341 		/* get more nuanced writeback errors */
3342 		rc = filemap_check_wb_err(file->f_mapping, 0);
3343 		trace_cifs_flush_err(inode->i_ino, rc);
3344 	}
3345 	return rc;
3346 }
3347 
3348 static void
3349 cifs_uncached_writedata_release(struct kref *refcount)
3350 {
3351 	struct cifs_writedata *wdata = container_of(refcount,
3352 					struct cifs_writedata, refcount);
3353 
3354 	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3355 	cifs_writedata_release(refcount);
3356 }
3357 
3358 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3359 
3360 static void
3361 cifs_uncached_writev_complete(struct work_struct *work)
3362 {
3363 	struct cifs_writedata *wdata = container_of(work,
3364 					struct cifs_writedata, work);
3365 	struct inode *inode = d_inode(wdata->cfile->dentry);
3366 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3367 
3368 	spin_lock(&inode->i_lock);
3369 	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3370 	if (cifsi->server_eof > inode->i_size)
3371 		i_size_write(inode, cifsi->server_eof);
3372 	spin_unlock(&inode->i_lock);
3373 
3374 	complete(&wdata->done);
3375 	collect_uncached_write_data(wdata->ctx);
3376 	/* the below call can possibly free the last ref to aio ctx */
3377 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3378 }
3379 
3380 static int
3381 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3382 	struct cifs_aio_ctx *ctx)
3383 {
3384 	unsigned int wsize;
3385 	struct cifs_credits credits;
3386 	int rc;
3387 	struct TCP_Server_Info *server = wdata->server;
3388 
3389 	do {
3390 		if (wdata->cfile->invalidHandle) {
3391 			rc = cifs_reopen_file(wdata->cfile, false);
3392 			if (rc == -EAGAIN)
3393 				continue;
3394 			else if (rc)
3395 				break;
3396 		}
3397 
3398 
3399 		/*
3400 		 * Wait for credits to resend this wdata.
3401 		 * Note: we are attempting to resend the whole wdata not in
3402 		 * segments
3403 		 */
3404 		do {
3405 			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3406 						&wsize, &credits);
3407 			if (rc)
3408 				goto fail;
3409 
3410 			if (wsize < wdata->bytes) {
3411 				add_credits_and_wake_if(server, &credits, 0);
3412 				msleep(1000);
3413 			}
3414 		} while (wsize < wdata->bytes);
3415 		wdata->credits = credits;
3416 
3417 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3418 
3419 		if (!rc) {
3420 			if (wdata->cfile->invalidHandle)
3421 				rc = -EAGAIN;
3422 			else {
3423 				wdata->replay = true;
3424 #ifdef CONFIG_CIFS_SMB_DIRECT
3425 				if (wdata->mr) {
3426 					wdata->mr->need_invalidate = true;
3427 					smbd_deregister_mr(wdata->mr);
3428 					wdata->mr = NULL;
3429 				}
3430 #endif
3431 				rc = server->ops->async_writev(wdata,
3432 					cifs_uncached_writedata_release);
3433 			}
3434 		}
3435 
3436 		/* If the write was successfully sent, we are done */
3437 		if (!rc) {
3438 			list_add_tail(&wdata->list, wdata_list);
3439 			return 0;
3440 		}
3441 
3442 		/* Roll back credits and retry if needed */
3443 		add_credits_and_wake_if(server, &wdata->credits, 0);
3444 	} while (rc == -EAGAIN);
3445 
3446 fail:
3447 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3448 	return rc;
3449 }
3450 
3451 /*
3452  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3453  * size and maximum number of segments.
3454  */
3455 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3456 				     size_t max_segs, unsigned int *_nsegs)
3457 {
3458 	const struct bio_vec *bvecs = iter->bvec;
3459 	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3460 	size_t len, span = 0, n = iter->count;
3461 	size_t skip = iter->iov_offset;
3462 
3463 	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3464 		return 0;
3465 
3466 	while (n && ix < nbv && skip) {
3467 		len = bvecs[ix].bv_len;
3468 		if (skip < len)
3469 			break;
3470 		skip -= len;
3471 		n -= len;
3472 		ix++;
3473 	}
3474 
3475 	while (n && ix < nbv) {
3476 		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3477 		span += len;
3478 		max_size -= len;
3479 		nsegs++;
3480 		ix++;
3481 		if (max_size == 0 || nsegs >= max_segs)
3482 			break;
3483 		skip = 0;
3484 		n -= len;
3485 	}
3486 
3487 	*_nsegs = nsegs;
3488 	return span;
3489 }
3490 
3491 static int
3492 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3493 		     struct cifsFileInfo *open_file,
3494 		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3495 		     struct cifs_aio_ctx *ctx)
3496 {
3497 	int rc = 0;
3498 	size_t cur_len, max_len;
3499 	struct cifs_writedata *wdata;
3500 	pid_t pid;
3501 	struct TCP_Server_Info *server;
3502 	unsigned int xid, max_segs = INT_MAX;
3503 
3504 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3505 		pid = open_file->pid;
3506 	else
3507 		pid = current->tgid;
3508 
3509 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3510 	xid = get_xid();
3511 
3512 #ifdef CONFIG_CIFS_SMB_DIRECT
3513 	if (server->smbd_conn)
3514 		max_segs = server->smbd_conn->max_frmr_depth;
3515 #endif
3516 
3517 	do {
3518 		struct cifs_credits credits_on_stack;
3519 		struct cifs_credits *credits = &credits_on_stack;
3520 		unsigned int wsize, nsegs = 0;
3521 
3522 		if (signal_pending(current)) {
3523 			rc = -EINTR;
3524 			break;
3525 		}
3526 
3527 		if (open_file->invalidHandle) {
3528 			rc = cifs_reopen_file(open_file, false);
3529 			if (rc == -EAGAIN)
3530 				continue;
3531 			else if (rc)
3532 				break;
3533 		}
3534 
3535 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3536 						   &wsize, credits);
3537 		if (rc)
3538 			break;
3539 
3540 		max_len = min_t(const size_t, len, wsize);
3541 		if (!max_len) {
3542 			rc = -EAGAIN;
3543 			add_credits_and_wake_if(server, credits, 0);
3544 			break;
3545 		}
3546 
3547 		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3548 		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3549 			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3550 		if (cur_len == 0) {
3551 			rc = -EIO;
3552 			add_credits_and_wake_if(server, credits, 0);
3553 			break;
3554 		}
3555 
3556 		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3557 		if (!wdata) {
3558 			rc = -ENOMEM;
3559 			add_credits_and_wake_if(server, credits, 0);
3560 			break;
3561 		}
3562 
3563 		wdata->sync_mode = WB_SYNC_ALL;
3564 		wdata->offset	= (__u64)fpos;
3565 		wdata->cfile	= cifsFileInfo_get(open_file);
3566 		wdata->server	= server;
3567 		wdata->pid	= pid;
3568 		wdata->bytes	= cur_len;
3569 		wdata->credits	= credits_on_stack;
3570 		wdata->iter	= *from;
3571 		wdata->ctx	= ctx;
3572 		kref_get(&ctx->refcount);
3573 
3574 		iov_iter_truncate(&wdata->iter, cur_len);
3575 
3576 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3577 
3578 		if (!rc) {
3579 			if (wdata->cfile->invalidHandle)
3580 				rc = -EAGAIN;
3581 			else
3582 				rc = server->ops->async_writev(wdata,
3583 					cifs_uncached_writedata_release);
3584 		}
3585 
3586 		if (rc) {
3587 			add_credits_and_wake_if(server, &wdata->credits, 0);
3588 			kref_put(&wdata->refcount,
3589 				 cifs_uncached_writedata_release);
3590 			if (rc == -EAGAIN)
3591 				continue;
3592 			break;
3593 		}
3594 
3595 		list_add_tail(&wdata->list, wdata_list);
3596 		iov_iter_advance(from, cur_len);
3597 		fpos += cur_len;
3598 		len -= cur_len;
3599 	} while (len > 0);
3600 
3601 	free_xid(xid);
3602 	return rc;
3603 }
3604 
3605 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3606 {
3607 	struct cifs_writedata *wdata, *tmp;
3608 	struct cifs_tcon *tcon;
3609 	struct cifs_sb_info *cifs_sb;
3610 	struct dentry *dentry = ctx->cfile->dentry;
3611 	ssize_t rc;
3612 
3613 	tcon = tlink_tcon(ctx->cfile->tlink);
3614 	cifs_sb = CIFS_SB(dentry->d_sb);
3615 
3616 	mutex_lock(&ctx->aio_mutex);
3617 
3618 	if (list_empty(&ctx->list)) {
3619 		mutex_unlock(&ctx->aio_mutex);
3620 		return;
3621 	}
3622 
3623 	rc = ctx->rc;
3624 	/*
3625 	 * Wait for and collect replies for any successful sends in order of
3626 	 * increasing offset. Once an error is hit, then return without waiting
3627 	 * for any more replies.
3628 	 */
3629 restart_loop:
3630 	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3631 		if (!rc) {
3632 			if (!try_wait_for_completion(&wdata->done)) {
3633 				mutex_unlock(&ctx->aio_mutex);
3634 				return;
3635 			}
3636 
3637 			if (wdata->result)
3638 				rc = wdata->result;
3639 			else
3640 				ctx->total_len += wdata->bytes;
3641 
3642 			/* resend call if it's a retryable error */
3643 			if (rc == -EAGAIN) {
3644 				struct list_head tmp_list;
3645 				struct iov_iter tmp_from = ctx->iter;
3646 
3647 				INIT_LIST_HEAD(&tmp_list);
3648 				list_del_init(&wdata->list);
3649 
3650 				if (ctx->direct_io)
3651 					rc = cifs_resend_wdata(
3652 						wdata, &tmp_list, ctx);
3653 				else {
3654 					iov_iter_advance(&tmp_from,
3655 						 wdata->offset - ctx->pos);
3656 
3657 					rc = cifs_write_from_iter(wdata->offset,
3658 						wdata->bytes, &tmp_from,
3659 						ctx->cfile, cifs_sb, &tmp_list,
3660 						ctx);
3661 
3662 					kref_put(&wdata->refcount,
3663 						cifs_uncached_writedata_release);
3664 				}
3665 
3666 				list_splice(&tmp_list, &ctx->list);
3667 				goto restart_loop;
3668 			}
3669 		}
3670 		list_del_init(&wdata->list);
3671 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3672 	}
3673 
3674 	cifs_stats_bytes_written(tcon, ctx->total_len);
3675 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3676 
3677 	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3678 
3679 	mutex_unlock(&ctx->aio_mutex);
3680 
3681 	if (ctx->iocb && ctx->iocb->ki_complete)
3682 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3683 	else
3684 		complete(&ctx->done);
3685 }
3686 
3687 static ssize_t __cifs_writev(
3688 	struct kiocb *iocb, struct iov_iter *from, bool direct)
3689 {
3690 	struct file *file = iocb->ki_filp;
3691 	ssize_t total_written = 0;
3692 	struct cifsFileInfo *cfile;
3693 	struct cifs_tcon *tcon;
3694 	struct cifs_sb_info *cifs_sb;
3695 	struct cifs_aio_ctx *ctx;
3696 	int rc;
3697 
3698 	rc = generic_write_checks(iocb, from);
3699 	if (rc <= 0)
3700 		return rc;
3701 
3702 	cifs_sb = CIFS_FILE_SB(file);
3703 	cfile = file->private_data;
3704 	tcon = tlink_tcon(cfile->tlink);
3705 
3706 	if (!tcon->ses->server->ops->async_writev)
3707 		return -ENOSYS;
3708 
3709 	ctx = cifs_aio_ctx_alloc();
3710 	if (!ctx)
3711 		return -ENOMEM;
3712 
3713 	ctx->cfile = cifsFileInfo_get(cfile);
3714 
3715 	if (!is_sync_kiocb(iocb))
3716 		ctx->iocb = iocb;
3717 
3718 	ctx->pos = iocb->ki_pos;
3719 	ctx->direct_io = direct;
3720 	ctx->nr_pinned_pages = 0;
3721 
3722 	if (user_backed_iter(from)) {
3723 		/*
3724 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3725 		 * they contain references to the calling process's virtual
3726 		 * memory layout which won't be available in an async worker
3727 		 * thread.  This also takes a pin on every folio involved.
3728 		 */
3729 		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3730 					     &ctx->iter, 0);
3731 		if (rc < 0) {
3732 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3733 			return rc;
3734 		}
3735 
3736 		ctx->nr_pinned_pages = rc;
3737 		ctx->bv = (void *)ctx->iter.bvec;
3738 		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3739 	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3740 		   !is_sync_kiocb(iocb)) {
3741 		/*
3742 		 * If the op is asynchronous, we need to copy the list attached
3743 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3744 		 * will be pinned by the caller; in any case, we may or may not
3745 		 * be able to pin the pages, so we don't try.
3746 		 */
3747 		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3748 		if (!ctx->bv) {
3749 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3750 			return -ENOMEM;
3751 		}
3752 	} else {
3753 		/*
3754 		 * Otherwise, we just pass the iterator down as-is and rely on
3755 		 * the caller to make sure the pages referred to by the
3756 		 * iterator don't evaporate.
3757 		 */
3758 		ctx->iter = *from;
3759 	}
3760 
3761 	ctx->len = iov_iter_count(&ctx->iter);
3762 
3763 	/* grab a lock here due to read response handlers can access ctx */
3764 	mutex_lock(&ctx->aio_mutex);
3765 
3766 	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3767 				  cfile, cifs_sb, &ctx->list, ctx);
3768 
3769 	/*
3770 	 * If at least one write was successfully sent, then discard any rc
3771 	 * value from the later writes. If the other write succeeds, then
3772 	 * we'll end up returning whatever was written. If it fails, then
3773 	 * we'll get a new rc value from that.
3774 	 */
3775 	if (!list_empty(&ctx->list))
3776 		rc = 0;
3777 
3778 	mutex_unlock(&ctx->aio_mutex);
3779 
3780 	if (rc) {
3781 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3782 		return rc;
3783 	}
3784 
3785 	if (!is_sync_kiocb(iocb)) {
3786 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3787 		return -EIOCBQUEUED;
3788 	}
3789 
3790 	rc = wait_for_completion_killable(&ctx->done);
3791 	if (rc) {
3792 		mutex_lock(&ctx->aio_mutex);
3793 		ctx->rc = rc = -EINTR;
3794 		total_written = ctx->total_len;
3795 		mutex_unlock(&ctx->aio_mutex);
3796 	} else {
3797 		rc = ctx->rc;
3798 		total_written = ctx->total_len;
3799 	}
3800 
3801 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3802 
3803 	if (unlikely(!total_written))
3804 		return rc;
3805 
3806 	iocb->ki_pos += total_written;
3807 	return total_written;
3808 }
3809 
3810 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3811 {
3812 	struct file *file = iocb->ki_filp;
3813 
3814 	cifs_revalidate_mapping(file->f_inode);
3815 	return __cifs_writev(iocb, from, true);
3816 }
3817 
3818 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3819 {
3820 	return __cifs_writev(iocb, from, false);
3821 }
3822 
3823 static ssize_t
3824 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3825 {
3826 	struct file *file = iocb->ki_filp;
3827 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3828 	struct inode *inode = file->f_mapping->host;
3829 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3830 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3831 	ssize_t rc;
3832 
3833 	inode_lock(inode);
3834 	/*
3835 	 * We need to hold the sem to be sure nobody modifies lock list
3836 	 * with a brlock that prevents writing.
3837 	 */
3838 	down_read(&cinode->lock_sem);
3839 
3840 	rc = generic_write_checks(iocb, from);
3841 	if (rc <= 0)
3842 		goto out;
3843 
3844 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3845 				     server->vals->exclusive_lock_type, 0,
3846 				     NULL, CIFS_WRITE_OP))
3847 		rc = __generic_file_write_iter(iocb, from);
3848 	else
3849 		rc = -EACCES;
3850 out:
3851 	up_read(&cinode->lock_sem);
3852 	inode_unlock(inode);
3853 
3854 	if (rc > 0)
3855 		rc = generic_write_sync(iocb, rc);
3856 	return rc;
3857 }
3858 
3859 ssize_t
3860 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3861 {
3862 	struct inode *inode = file_inode(iocb->ki_filp);
3863 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3864 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3865 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3866 						iocb->ki_filp->private_data;
3867 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3868 	ssize_t written;
3869 
3870 	written = cifs_get_writer(cinode);
3871 	if (written)
3872 		return written;
3873 
3874 	if (CIFS_CACHE_WRITE(cinode)) {
3875 		if (cap_unix(tcon->ses) &&
3876 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3877 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3878 			written = generic_file_write_iter(iocb, from);
3879 			goto out;
3880 		}
3881 		written = cifs_writev(iocb, from);
3882 		goto out;
3883 	}
3884 	/*
3885 	 * For non-oplocked files in strict cache mode we need to write the data
3886 	 * to the server exactly from the pos to pos+len-1 rather than flush all
3887 	 * affected pages because it may cause a error with mandatory locks on
3888 	 * these pages but not on the region from pos to ppos+len-1.
3889 	 */
3890 	written = cifs_user_writev(iocb, from);
3891 	if (CIFS_CACHE_READ(cinode)) {
3892 		/*
3893 		 * We have read level caching and we have just sent a write
3894 		 * request to the server thus making data in the cache stale.
3895 		 * Zap the cache and set oplock/lease level to NONE to avoid
3896 		 * reading stale data from the cache. All subsequent read
3897 		 * operations will read new data from the server.
3898 		 */
3899 		cifs_zap_mapping(inode);
3900 		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3901 			 inode);
3902 		cinode->oplock = 0;
3903 	}
3904 out:
3905 	cifs_put_writer(cinode);
3906 	return written;
3907 }
3908 
3909 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3910 {
3911 	struct cifs_readdata *rdata;
3912 
3913 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3914 	if (rdata) {
3915 		kref_init(&rdata->refcount);
3916 		INIT_LIST_HEAD(&rdata->list);
3917 		init_completion(&rdata->done);
3918 		INIT_WORK(&rdata->work, complete);
3919 	}
3920 
3921 	return rdata;
3922 }
3923 
3924 void
3925 cifs_readdata_release(struct kref *refcount)
3926 {
3927 	struct cifs_readdata *rdata = container_of(refcount,
3928 					struct cifs_readdata, refcount);
3929 
3930 	if (rdata->ctx)
3931 		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3932 #ifdef CONFIG_CIFS_SMB_DIRECT
3933 	if (rdata->mr) {
3934 		smbd_deregister_mr(rdata->mr);
3935 		rdata->mr = NULL;
3936 	}
3937 #endif
3938 	if (rdata->cfile)
3939 		cifsFileInfo_put(rdata->cfile);
3940 
3941 	kfree(rdata);
3942 }
3943 
3944 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3945 
3946 static void
3947 cifs_uncached_readv_complete(struct work_struct *work)
3948 {
3949 	struct cifs_readdata *rdata = container_of(work,
3950 						struct cifs_readdata, work);
3951 
3952 	complete(&rdata->done);
3953 	collect_uncached_read_data(rdata->ctx);
3954 	/* the below call can possibly free the last ref to aio ctx */
3955 	kref_put(&rdata->refcount, cifs_readdata_release);
3956 }
3957 
3958 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3959 			struct list_head *rdata_list,
3960 			struct cifs_aio_ctx *ctx)
3961 {
3962 	unsigned int rsize;
3963 	struct cifs_credits credits;
3964 	int rc;
3965 	struct TCP_Server_Info *server;
3966 
3967 	/* XXX: should we pick a new channel here? */
3968 	server = rdata->server;
3969 
3970 	do {
3971 		if (rdata->cfile->invalidHandle) {
3972 			rc = cifs_reopen_file(rdata->cfile, true);
3973 			if (rc == -EAGAIN)
3974 				continue;
3975 			else if (rc)
3976 				break;
3977 		}
3978 
3979 		/*
3980 		 * Wait for credits to resend this rdata.
3981 		 * Note: we are attempting to resend the whole rdata not in
3982 		 * segments
3983 		 */
3984 		do {
3985 			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3986 						&rsize, &credits);
3987 
3988 			if (rc)
3989 				goto fail;
3990 
3991 			if (rsize < rdata->bytes) {
3992 				add_credits_and_wake_if(server, &credits, 0);
3993 				msleep(1000);
3994 			}
3995 		} while (rsize < rdata->bytes);
3996 		rdata->credits = credits;
3997 
3998 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3999 		if (!rc) {
4000 			if (rdata->cfile->invalidHandle)
4001 				rc = -EAGAIN;
4002 			else {
4003 #ifdef CONFIG_CIFS_SMB_DIRECT
4004 				if (rdata->mr) {
4005 					rdata->mr->need_invalidate = true;
4006 					smbd_deregister_mr(rdata->mr);
4007 					rdata->mr = NULL;
4008 				}
4009 #endif
4010 				rc = server->ops->async_readv(rdata);
4011 			}
4012 		}
4013 
4014 		/* If the read was successfully sent, we are done */
4015 		if (!rc) {
4016 			/* Add to aio pending list */
4017 			list_add_tail(&rdata->list, rdata_list);
4018 			return 0;
4019 		}
4020 
4021 		/* Roll back credits and retry if needed */
4022 		add_credits_and_wake_if(server, &rdata->credits, 0);
4023 	} while (rc == -EAGAIN);
4024 
4025 fail:
4026 	kref_put(&rdata->refcount, cifs_readdata_release);
4027 	return rc;
4028 }
4029 
4030 static int
4031 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
4032 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4033 		     struct cifs_aio_ctx *ctx)
4034 {
4035 	struct cifs_readdata *rdata;
4036 	unsigned int rsize, nsegs, max_segs = INT_MAX;
4037 	struct cifs_credits credits_on_stack;
4038 	struct cifs_credits *credits = &credits_on_stack;
4039 	size_t cur_len, max_len;
4040 	int rc;
4041 	pid_t pid;
4042 	struct TCP_Server_Info *server;
4043 
4044 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4045 
4046 #ifdef CONFIG_CIFS_SMB_DIRECT
4047 	if (server->smbd_conn)
4048 		max_segs = server->smbd_conn->max_frmr_depth;
4049 #endif
4050 
4051 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4052 		pid = open_file->pid;
4053 	else
4054 		pid = current->tgid;
4055 
4056 	do {
4057 		if (open_file->invalidHandle) {
4058 			rc = cifs_reopen_file(open_file, true);
4059 			if (rc == -EAGAIN)
4060 				continue;
4061 			else if (rc)
4062 				break;
4063 		}
4064 
4065 		if (cifs_sb->ctx->rsize == 0)
4066 			cifs_sb->ctx->rsize =
4067 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4068 							     cifs_sb->ctx);
4069 
4070 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4071 						   &rsize, credits);
4072 		if (rc)
4073 			break;
4074 
4075 		max_len = min_t(size_t, len, rsize);
4076 
4077 		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4078 						 max_segs, &nsegs);
4079 		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4080 			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4081 		if (cur_len == 0) {
4082 			rc = -EIO;
4083 			add_credits_and_wake_if(server, credits, 0);
4084 			break;
4085 		}
4086 
4087 		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4088 		if (!rdata) {
4089 			add_credits_and_wake_if(server, credits, 0);
4090 			rc = -ENOMEM;
4091 			break;
4092 		}
4093 
4094 		rdata->server	= server;
4095 		rdata->cfile	= cifsFileInfo_get(open_file);
4096 		rdata->offset	= fpos;
4097 		rdata->bytes	= cur_len;
4098 		rdata->pid	= pid;
4099 		rdata->credits	= credits_on_stack;
4100 		rdata->ctx	= ctx;
4101 		kref_get(&ctx->refcount);
4102 
4103 		rdata->iter	= ctx->iter;
4104 		iov_iter_truncate(&rdata->iter, cur_len);
4105 
4106 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4107 
4108 		if (!rc) {
4109 			if (rdata->cfile->invalidHandle)
4110 				rc = -EAGAIN;
4111 			else
4112 				rc = server->ops->async_readv(rdata);
4113 		}
4114 
4115 		if (rc) {
4116 			add_credits_and_wake_if(server, &rdata->credits, 0);
4117 			kref_put(&rdata->refcount, cifs_readdata_release);
4118 			if (rc == -EAGAIN)
4119 				continue;
4120 			break;
4121 		}
4122 
4123 		list_add_tail(&rdata->list, rdata_list);
4124 		iov_iter_advance(&ctx->iter, cur_len);
4125 		fpos += cur_len;
4126 		len -= cur_len;
4127 	} while (len > 0);
4128 
4129 	return rc;
4130 }
4131 
4132 static void
4133 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4134 {
4135 	struct cifs_readdata *rdata, *tmp;
4136 	struct cifs_sb_info *cifs_sb;
4137 	int rc;
4138 
4139 	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4140 
4141 	mutex_lock(&ctx->aio_mutex);
4142 
4143 	if (list_empty(&ctx->list)) {
4144 		mutex_unlock(&ctx->aio_mutex);
4145 		return;
4146 	}
4147 
4148 	rc = ctx->rc;
4149 	/* the loop below should proceed in the order of increasing offsets */
4150 again:
4151 	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4152 		if (!rc) {
4153 			if (!try_wait_for_completion(&rdata->done)) {
4154 				mutex_unlock(&ctx->aio_mutex);
4155 				return;
4156 			}
4157 
4158 			if (rdata->result == -EAGAIN) {
4159 				/* resend call if it's a retryable error */
4160 				struct list_head tmp_list;
4161 				unsigned int got_bytes = rdata->got_bytes;
4162 
4163 				list_del_init(&rdata->list);
4164 				INIT_LIST_HEAD(&tmp_list);
4165 
4166 				if (ctx->direct_io) {
4167 					/*
4168 					 * Re-use rdata as this is a
4169 					 * direct I/O
4170 					 */
4171 					rc = cifs_resend_rdata(
4172 						rdata,
4173 						&tmp_list, ctx);
4174 				} else {
4175 					rc = cifs_send_async_read(
4176 						rdata->offset + got_bytes,
4177 						rdata->bytes - got_bytes,
4178 						rdata->cfile, cifs_sb,
4179 						&tmp_list, ctx);
4180 
4181 					kref_put(&rdata->refcount,
4182 						cifs_readdata_release);
4183 				}
4184 
4185 				list_splice(&tmp_list, &ctx->list);
4186 
4187 				goto again;
4188 			} else if (rdata->result)
4189 				rc = rdata->result;
4190 
4191 			/* if there was a short read -- discard anything left */
4192 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4193 				rc = -ENODATA;
4194 
4195 			ctx->total_len += rdata->got_bytes;
4196 		}
4197 		list_del_init(&rdata->list);
4198 		kref_put(&rdata->refcount, cifs_readdata_release);
4199 	}
4200 
4201 	/* mask nodata case */
4202 	if (rc == -ENODATA)
4203 		rc = 0;
4204 
4205 	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4206 
4207 	mutex_unlock(&ctx->aio_mutex);
4208 
4209 	if (ctx->iocb && ctx->iocb->ki_complete)
4210 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4211 	else
4212 		complete(&ctx->done);
4213 }
4214 
4215 static ssize_t __cifs_readv(
4216 	struct kiocb *iocb, struct iov_iter *to, bool direct)
4217 {
4218 	size_t len;
4219 	struct file *file = iocb->ki_filp;
4220 	struct cifs_sb_info *cifs_sb;
4221 	struct cifsFileInfo *cfile;
4222 	struct cifs_tcon *tcon;
4223 	ssize_t rc, total_read = 0;
4224 	loff_t offset = iocb->ki_pos;
4225 	struct cifs_aio_ctx *ctx;
4226 
4227 	len = iov_iter_count(to);
4228 	if (!len)
4229 		return 0;
4230 
4231 	cifs_sb = CIFS_FILE_SB(file);
4232 	cfile = file->private_data;
4233 	tcon = tlink_tcon(cfile->tlink);
4234 
4235 	if (!tcon->ses->server->ops->async_readv)
4236 		return -ENOSYS;
4237 
4238 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4239 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4240 
4241 	ctx = cifs_aio_ctx_alloc();
4242 	if (!ctx)
4243 		return -ENOMEM;
4244 
4245 	ctx->pos	= offset;
4246 	ctx->direct_io	= direct;
4247 	ctx->len	= len;
4248 	ctx->cfile	= cifsFileInfo_get(cfile);
4249 	ctx->nr_pinned_pages = 0;
4250 
4251 	if (!is_sync_kiocb(iocb))
4252 		ctx->iocb = iocb;
4253 
4254 	if (user_backed_iter(to)) {
4255 		/*
4256 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4257 		 * they contain references to the calling process's virtual
4258 		 * memory layout which won't be available in an async worker
4259 		 * thread.  This also takes a pin on every folio involved.
4260 		 */
4261 		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4262 					     &ctx->iter, 0);
4263 		if (rc < 0) {
4264 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4265 			return rc;
4266 		}
4267 
4268 		ctx->nr_pinned_pages = rc;
4269 		ctx->bv = (void *)ctx->iter.bvec;
4270 		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4271 		ctx->should_dirty = true;
4272 	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4273 		   !is_sync_kiocb(iocb)) {
4274 		/*
4275 		 * If the op is asynchronous, we need to copy the list attached
4276 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4277 		 * will be retained by the caller; in any case, we may or may
4278 		 * not be able to pin the pages, so we don't try.
4279 		 */
4280 		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4281 		if (!ctx->bv) {
4282 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4283 			return -ENOMEM;
4284 		}
4285 	} else {
4286 		/*
4287 		 * Otherwise, we just pass the iterator down as-is and rely on
4288 		 * the caller to make sure the pages referred to by the
4289 		 * iterator don't evaporate.
4290 		 */
4291 		ctx->iter = *to;
4292 	}
4293 
4294 	if (direct) {
4295 		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4296 						  offset, offset + len - 1);
4297 		if (rc) {
4298 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4299 			return -EAGAIN;
4300 		}
4301 	}
4302 
4303 	/* grab a lock here due to read response handlers can access ctx */
4304 	mutex_lock(&ctx->aio_mutex);
4305 
4306 	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4307 
4308 	/* if at least one read request send succeeded, then reset rc */
4309 	if (!list_empty(&ctx->list))
4310 		rc = 0;
4311 
4312 	mutex_unlock(&ctx->aio_mutex);
4313 
4314 	if (rc) {
4315 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4316 		return rc;
4317 	}
4318 
4319 	if (!is_sync_kiocb(iocb)) {
4320 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4321 		return -EIOCBQUEUED;
4322 	}
4323 
4324 	rc = wait_for_completion_killable(&ctx->done);
4325 	if (rc) {
4326 		mutex_lock(&ctx->aio_mutex);
4327 		ctx->rc = rc = -EINTR;
4328 		total_read = ctx->total_len;
4329 		mutex_unlock(&ctx->aio_mutex);
4330 	} else {
4331 		rc = ctx->rc;
4332 		total_read = ctx->total_len;
4333 	}
4334 
4335 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4336 
4337 	if (total_read) {
4338 		iocb->ki_pos += total_read;
4339 		return total_read;
4340 	}
4341 	return rc;
4342 }
4343 
4344 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4345 {
4346 	return __cifs_readv(iocb, to, true);
4347 }
4348 
4349 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4350 {
4351 	return __cifs_readv(iocb, to, false);
4352 }
4353 
4354 ssize_t
4355 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4356 {
4357 	struct inode *inode = file_inode(iocb->ki_filp);
4358 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4359 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4360 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4361 						iocb->ki_filp->private_data;
4362 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4363 	int rc = -EACCES;
4364 
4365 	/*
4366 	 * In strict cache mode we need to read from the server all the time
4367 	 * if we don't have level II oplock because the server can delay mtime
4368 	 * change - so we can't make a decision about inode invalidating.
4369 	 * And we can also fail with pagereading if there are mandatory locks
4370 	 * on pages affected by this read but not on the region from pos to
4371 	 * pos+len-1.
4372 	 */
4373 	if (!CIFS_CACHE_READ(cinode))
4374 		return cifs_user_readv(iocb, to);
4375 
4376 	if (cap_unix(tcon->ses) &&
4377 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4378 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4379 		return generic_file_read_iter(iocb, to);
4380 
4381 	/*
4382 	 * We need to hold the sem to be sure nobody modifies lock list
4383 	 * with a brlock that prevents reading.
4384 	 */
4385 	down_read(&cinode->lock_sem);
4386 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4387 				     tcon->ses->server->vals->shared_lock_type,
4388 				     0, NULL, CIFS_READ_OP))
4389 		rc = generic_file_read_iter(iocb, to);
4390 	up_read(&cinode->lock_sem);
4391 	return rc;
4392 }
4393 
4394 static ssize_t
4395 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4396 {
4397 	int rc = -EACCES;
4398 	unsigned int bytes_read = 0;
4399 	unsigned int total_read;
4400 	unsigned int current_read_size;
4401 	unsigned int rsize;
4402 	struct cifs_sb_info *cifs_sb;
4403 	struct cifs_tcon *tcon;
4404 	struct TCP_Server_Info *server;
4405 	unsigned int xid;
4406 	char *cur_offset;
4407 	struct cifsFileInfo *open_file;
4408 	struct cifs_io_parms io_parms = {0};
4409 	int buf_type = CIFS_NO_BUFFER;
4410 	__u32 pid;
4411 
4412 	xid = get_xid();
4413 	cifs_sb = CIFS_FILE_SB(file);
4414 
4415 	/* FIXME: set up handlers for larger reads and/or convert to async */
4416 	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4417 
4418 	if (file->private_data == NULL) {
4419 		rc = -EBADF;
4420 		free_xid(xid);
4421 		return rc;
4422 	}
4423 	open_file = file->private_data;
4424 	tcon = tlink_tcon(open_file->tlink);
4425 	server = cifs_pick_channel(tcon->ses);
4426 
4427 	if (!server->ops->sync_read) {
4428 		free_xid(xid);
4429 		return -ENOSYS;
4430 	}
4431 
4432 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4433 		pid = open_file->pid;
4434 	else
4435 		pid = current->tgid;
4436 
4437 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4438 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4439 
4440 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4441 	     total_read += bytes_read, cur_offset += bytes_read) {
4442 		do {
4443 			current_read_size = min_t(uint, read_size - total_read,
4444 						  rsize);
4445 			/*
4446 			 * For windows me and 9x we do not want to request more
4447 			 * than it negotiated since it will refuse the read
4448 			 * then.
4449 			 */
4450 			if (!(tcon->ses->capabilities &
4451 				tcon->ses->server->vals->cap_large_files)) {
4452 				current_read_size = min_t(uint,
4453 					current_read_size, CIFSMaxBufSize);
4454 			}
4455 			if (open_file->invalidHandle) {
4456 				rc = cifs_reopen_file(open_file, true);
4457 				if (rc != 0)
4458 					break;
4459 			}
4460 			io_parms.pid = pid;
4461 			io_parms.tcon = tcon;
4462 			io_parms.offset = *offset;
4463 			io_parms.length = current_read_size;
4464 			io_parms.server = server;
4465 			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4466 						    &bytes_read, &cur_offset,
4467 						    &buf_type);
4468 		} while (rc == -EAGAIN);
4469 
4470 		if (rc || (bytes_read == 0)) {
4471 			if (total_read) {
4472 				break;
4473 			} else {
4474 				free_xid(xid);
4475 				return rc;
4476 			}
4477 		} else {
4478 			cifs_stats_bytes_read(tcon, total_read);
4479 			*offset += bytes_read;
4480 		}
4481 	}
4482 	free_xid(xid);
4483 	return total_read;
4484 }
4485 
4486 /*
4487  * If the page is mmap'ed into a process' page tables, then we need to make
4488  * sure that it doesn't change while being written back.
4489  */
4490 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4491 {
4492 	struct folio *folio = page_folio(vmf->page);
4493 
4494 	/* Wait for the folio to be written to the cache before we allow it to
4495 	 * be modified.  We then assume the entire folio will need writing back.
4496 	 */
4497 #ifdef CONFIG_CIFS_FSCACHE
4498 	if (folio_test_fscache(folio) &&
4499 	    folio_wait_fscache_killable(folio) < 0)
4500 		return VM_FAULT_RETRY;
4501 #endif
4502 
4503 	folio_wait_writeback(folio);
4504 
4505 	if (folio_lock_killable(folio) < 0)
4506 		return VM_FAULT_RETRY;
4507 	return VM_FAULT_LOCKED;
4508 }
4509 
4510 static const struct vm_operations_struct cifs_file_vm_ops = {
4511 	.fault = filemap_fault,
4512 	.map_pages = filemap_map_pages,
4513 	.page_mkwrite = cifs_page_mkwrite,
4514 };
4515 
4516 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4517 {
4518 	int xid, rc = 0;
4519 	struct inode *inode = file_inode(file);
4520 
4521 	xid = get_xid();
4522 
4523 	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4524 		rc = cifs_zap_mapping(inode);
4525 	if (!rc)
4526 		rc = generic_file_mmap(file, vma);
4527 	if (!rc)
4528 		vma->vm_ops = &cifs_file_vm_ops;
4529 
4530 	free_xid(xid);
4531 	return rc;
4532 }
4533 
4534 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4535 {
4536 	int rc, xid;
4537 
4538 	xid = get_xid();
4539 
4540 	rc = cifs_revalidate_file(file);
4541 	if (rc)
4542 		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4543 			 rc);
4544 	if (!rc)
4545 		rc = generic_file_mmap(file, vma);
4546 	if (!rc)
4547 		vma->vm_ops = &cifs_file_vm_ops;
4548 
4549 	free_xid(xid);
4550 	return rc;
4551 }
4552 
4553 /*
4554  * Unlock a bunch of folios in the pagecache.
4555  */
4556 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4557 {
4558 	struct folio *folio;
4559 	XA_STATE(xas, &mapping->i_pages, first);
4560 
4561 	rcu_read_lock();
4562 	xas_for_each(&xas, folio, last) {
4563 		folio_unlock(folio);
4564 	}
4565 	rcu_read_unlock();
4566 }
4567 
4568 static void cifs_readahead_complete(struct work_struct *work)
4569 {
4570 	struct cifs_readdata *rdata = container_of(work,
4571 						   struct cifs_readdata, work);
4572 	struct folio *folio;
4573 	pgoff_t last;
4574 	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4575 
4576 	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4577 
4578 	if (good)
4579 		cifs_readahead_to_fscache(rdata->mapping->host,
4580 					  rdata->offset, rdata->bytes);
4581 
4582 	if (iov_iter_count(&rdata->iter) > 0)
4583 		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4584 
4585 	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4586 
4587 	rcu_read_lock();
4588 	xas_for_each(&xas, folio, last) {
4589 		if (good) {
4590 			flush_dcache_folio(folio);
4591 			folio_mark_uptodate(folio);
4592 		}
4593 		folio_unlock(folio);
4594 	}
4595 	rcu_read_unlock();
4596 
4597 	kref_put(&rdata->refcount, cifs_readdata_release);
4598 }
4599 
4600 static void cifs_readahead(struct readahead_control *ractl)
4601 {
4602 	struct cifsFileInfo *open_file = ractl->file->private_data;
4603 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4604 	struct TCP_Server_Info *server;
4605 	unsigned int xid, nr_pages, cache_nr_pages = 0;
4606 	unsigned int ra_pages;
4607 	pgoff_t next_cached = ULONG_MAX, ra_index;
4608 	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4609 		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4610 	bool check_cache = caching;
4611 	pid_t pid;
4612 	int rc = 0;
4613 
4614 	/* Note that readahead_count() lags behind our dequeuing of pages from
4615 	 * the ractl, wo we have to keep track for ourselves.
4616 	 */
4617 	ra_pages = readahead_count(ractl);
4618 	ra_index = readahead_index(ractl);
4619 
4620 	xid = get_xid();
4621 
4622 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4623 		pid = open_file->pid;
4624 	else
4625 		pid = current->tgid;
4626 
4627 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4628 
4629 	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4630 		 __func__, ractl->file, ractl->mapping, ra_pages);
4631 
4632 	/*
4633 	 * Chop the readahead request up into rsize-sized read requests.
4634 	 */
4635 	while ((nr_pages = ra_pages)) {
4636 		unsigned int i, rsize;
4637 		struct cifs_readdata *rdata;
4638 		struct cifs_credits credits_on_stack;
4639 		struct cifs_credits *credits = &credits_on_stack;
4640 		struct folio *folio;
4641 		pgoff_t fsize;
4642 
4643 		/*
4644 		 * Find out if we have anything cached in the range of
4645 		 * interest, and if so, where the next chunk of cached data is.
4646 		 */
4647 		if (caching) {
4648 			if (check_cache) {
4649 				rc = cifs_fscache_query_occupancy(
4650 					ractl->mapping->host, ra_index, nr_pages,
4651 					&next_cached, &cache_nr_pages);
4652 				if (rc < 0)
4653 					caching = false;
4654 				check_cache = false;
4655 			}
4656 
4657 			if (ra_index == next_cached) {
4658 				/*
4659 				 * TODO: Send a whole batch of pages to be read
4660 				 * by the cache.
4661 				 */
4662 				folio = readahead_folio(ractl);
4663 				fsize = folio_nr_pages(folio);
4664 				ra_pages -= fsize;
4665 				ra_index += fsize;
4666 				if (cifs_readpage_from_fscache(ractl->mapping->host,
4667 							       &folio->page) < 0) {
4668 					/*
4669 					 * TODO: Deal with cache read failure
4670 					 * here, but for the moment, delegate
4671 					 * that to readpage.
4672 					 */
4673 					caching = false;
4674 				}
4675 				folio_unlock(folio);
4676 				next_cached += fsize;
4677 				cache_nr_pages -= fsize;
4678 				if (cache_nr_pages == 0)
4679 					check_cache = true;
4680 				continue;
4681 			}
4682 		}
4683 
4684 		if (open_file->invalidHandle) {
4685 			rc = cifs_reopen_file(open_file, true);
4686 			if (rc) {
4687 				if (rc == -EAGAIN)
4688 					continue;
4689 				break;
4690 			}
4691 		}
4692 
4693 		if (cifs_sb->ctx->rsize == 0)
4694 			cifs_sb->ctx->rsize =
4695 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4696 							     cifs_sb->ctx);
4697 
4698 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4699 						   &rsize, credits);
4700 		if (rc)
4701 			break;
4702 		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4703 		if (next_cached != ULONG_MAX)
4704 			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4705 
4706 		/*
4707 		 * Give up immediately if rsize is too small to read an entire
4708 		 * page. The VFS will fall back to readpage. We should never
4709 		 * reach this point however since we set ra_pages to 0 when the
4710 		 * rsize is smaller than a cache page.
4711 		 */
4712 		if (unlikely(!nr_pages)) {
4713 			add_credits_and_wake_if(server, credits, 0);
4714 			break;
4715 		}
4716 
4717 		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4718 		if (!rdata) {
4719 			/* best to give up if we're out of mem */
4720 			add_credits_and_wake_if(server, credits, 0);
4721 			break;
4722 		}
4723 
4724 		rdata->offset	= ra_index * PAGE_SIZE;
4725 		rdata->bytes	= nr_pages * PAGE_SIZE;
4726 		rdata->cfile	= cifsFileInfo_get(open_file);
4727 		rdata->server	= server;
4728 		rdata->mapping	= ractl->mapping;
4729 		rdata->pid	= pid;
4730 		rdata->credits	= credits_on_stack;
4731 
4732 		for (i = 0; i < nr_pages; i++) {
4733 			if (!readahead_folio(ractl))
4734 				WARN_ON(1);
4735 		}
4736 		ra_pages -= nr_pages;
4737 		ra_index += nr_pages;
4738 
4739 		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4740 				rdata->offset, rdata->bytes);
4741 
4742 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4743 		if (!rc) {
4744 			if (rdata->cfile->invalidHandle)
4745 				rc = -EAGAIN;
4746 			else
4747 				rc = server->ops->async_readv(rdata);
4748 		}
4749 
4750 		if (rc) {
4751 			add_credits_and_wake_if(server, &rdata->credits, 0);
4752 			cifs_unlock_folios(rdata->mapping,
4753 					   rdata->offset / PAGE_SIZE,
4754 					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4755 			/* Fallback to the readpage in error/reconnect cases */
4756 			kref_put(&rdata->refcount, cifs_readdata_release);
4757 			break;
4758 		}
4759 
4760 		kref_put(&rdata->refcount, cifs_readdata_release);
4761 	}
4762 
4763 	free_xid(xid);
4764 }
4765 
4766 /*
4767  * cifs_readpage_worker must be called with the page pinned
4768  */
4769 static int cifs_readpage_worker(struct file *file, struct page *page,
4770 	loff_t *poffset)
4771 {
4772 	struct inode *inode = file_inode(file);
4773 	struct timespec64 atime, mtime;
4774 	char *read_data;
4775 	int rc;
4776 
4777 	/* Is the page cached? */
4778 	rc = cifs_readpage_from_fscache(inode, page);
4779 	if (rc == 0)
4780 		goto read_complete;
4781 
4782 	read_data = kmap(page);
4783 	/* for reads over a certain size could initiate async read ahead */
4784 
4785 	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4786 
4787 	if (rc < 0)
4788 		goto io_error;
4789 	else
4790 		cifs_dbg(FYI, "Bytes read %d\n", rc);
4791 
4792 	/* we do not want atime to be less than mtime, it broke some apps */
4793 	atime = inode_set_atime_to_ts(inode, current_time(inode));
4794 	mtime = inode_get_mtime(inode);
4795 	if (timespec64_compare(&atime, &mtime) < 0)
4796 		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4797 
4798 	if (PAGE_SIZE > rc)
4799 		memset(read_data + rc, 0, PAGE_SIZE - rc);
4800 
4801 	flush_dcache_page(page);
4802 	SetPageUptodate(page);
4803 	rc = 0;
4804 
4805 io_error:
4806 	kunmap(page);
4807 
4808 read_complete:
4809 	unlock_page(page);
4810 	return rc;
4811 }
4812 
4813 static int cifs_read_folio(struct file *file, struct folio *folio)
4814 {
4815 	struct page *page = &folio->page;
4816 	loff_t offset = page_file_offset(page);
4817 	int rc = -EACCES;
4818 	unsigned int xid;
4819 
4820 	xid = get_xid();
4821 
4822 	if (file->private_data == NULL) {
4823 		rc = -EBADF;
4824 		free_xid(xid);
4825 		return rc;
4826 	}
4827 
4828 	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4829 		 page, (int)offset, (int)offset);
4830 
4831 	rc = cifs_readpage_worker(file, page, &offset);
4832 
4833 	free_xid(xid);
4834 	return rc;
4835 }
4836 
4837 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4838 {
4839 	struct cifsFileInfo *open_file;
4840 
4841 	spin_lock(&cifs_inode->open_file_lock);
4842 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4843 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4844 			spin_unlock(&cifs_inode->open_file_lock);
4845 			return 1;
4846 		}
4847 	}
4848 	spin_unlock(&cifs_inode->open_file_lock);
4849 	return 0;
4850 }
4851 
4852 /* We do not want to update the file size from server for inodes
4853    open for write - to avoid races with writepage extending
4854    the file - in the future we could consider allowing
4855    refreshing the inode only on increases in the file size
4856    but this is tricky to do without racing with writebehind
4857    page caching in the current Linux kernel design */
4858 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4859 			    bool from_readdir)
4860 {
4861 	if (!cifsInode)
4862 		return true;
4863 
4864 	if (is_inode_writable(cifsInode) ||
4865 		((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4866 		/* This inode is open for write at least once */
4867 		struct cifs_sb_info *cifs_sb;
4868 
4869 		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4870 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4871 			/* since no page cache to corrupt on directio
4872 			we can change size safely */
4873 			return true;
4874 		}
4875 
4876 		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4877 			return true;
4878 
4879 		return false;
4880 	} else
4881 		return true;
4882 }
4883 
4884 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4885 			loff_t pos, unsigned len,
4886 			struct page **pagep, void **fsdata)
4887 {
4888 	int oncethru = 0;
4889 	pgoff_t index = pos >> PAGE_SHIFT;
4890 	loff_t offset = pos & (PAGE_SIZE - 1);
4891 	loff_t page_start = pos & PAGE_MASK;
4892 	loff_t i_size;
4893 	struct page *page;
4894 	int rc = 0;
4895 
4896 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4897 
4898 start:
4899 	page = grab_cache_page_write_begin(mapping, index);
4900 	if (!page) {
4901 		rc = -ENOMEM;
4902 		goto out;
4903 	}
4904 
4905 	if (PageUptodate(page))
4906 		goto out;
4907 
4908 	/*
4909 	 * If we write a full page it will be up to date, no need to read from
4910 	 * the server. If the write is short, we'll end up doing a sync write
4911 	 * instead.
4912 	 */
4913 	if (len == PAGE_SIZE)
4914 		goto out;
4915 
4916 	/*
4917 	 * optimize away the read when we have an oplock, and we're not
4918 	 * expecting to use any of the data we'd be reading in. That
4919 	 * is, when the page lies beyond the EOF, or straddles the EOF
4920 	 * and the write will cover all of the existing data.
4921 	 */
4922 	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4923 		i_size = i_size_read(mapping->host);
4924 		if (page_start >= i_size ||
4925 		    (offset == 0 && (pos + len) >= i_size)) {
4926 			zero_user_segments(page, 0, offset,
4927 					   offset + len,
4928 					   PAGE_SIZE);
4929 			/*
4930 			 * PageChecked means that the parts of the page
4931 			 * to which we're not writing are considered up
4932 			 * to date. Once the data is copied to the
4933 			 * page, it can be set uptodate.
4934 			 */
4935 			SetPageChecked(page);
4936 			goto out;
4937 		}
4938 	}
4939 
4940 	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4941 		/*
4942 		 * might as well read a page, it is fast enough. If we get
4943 		 * an error, we don't need to return it. cifs_write_end will
4944 		 * do a sync write instead since PG_uptodate isn't set.
4945 		 */
4946 		cifs_readpage_worker(file, page, &page_start);
4947 		put_page(page);
4948 		oncethru = 1;
4949 		goto start;
4950 	} else {
4951 		/* we could try using another file handle if there is one -
4952 		   but how would we lock it to prevent close of that handle
4953 		   racing with this read? In any case
4954 		   this will be written out by write_end so is fine */
4955 	}
4956 out:
4957 	*pagep = page;
4958 	return rc;
4959 }
4960 
4961 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4962 {
4963 	if (folio_test_private(folio))
4964 		return 0;
4965 	if (folio_test_fscache(folio)) {
4966 		if (current_is_kswapd() || !(gfp & __GFP_FS))
4967 			return false;
4968 		folio_wait_fscache(folio);
4969 	}
4970 	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4971 	return true;
4972 }
4973 
4974 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4975 				 size_t length)
4976 {
4977 	folio_wait_fscache(folio);
4978 }
4979 
4980 static int cifs_launder_folio(struct folio *folio)
4981 {
4982 	int rc = 0;
4983 	loff_t range_start = folio_pos(folio);
4984 	loff_t range_end = range_start + folio_size(folio);
4985 	struct writeback_control wbc = {
4986 		.sync_mode = WB_SYNC_ALL,
4987 		.nr_to_write = 0,
4988 		.range_start = range_start,
4989 		.range_end = range_end,
4990 	};
4991 
4992 	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4993 
4994 	if (folio_clear_dirty_for_io(folio))
4995 		rc = cifs_writepage_locked(&folio->page, &wbc);
4996 
4997 	folio_wait_fscache(folio);
4998 	return rc;
4999 }
5000 
5001 void cifs_oplock_break(struct work_struct *work)
5002 {
5003 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5004 						  oplock_break);
5005 	struct inode *inode = d_inode(cfile->dentry);
5006 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5007 	struct cifsInodeInfo *cinode = CIFS_I(inode);
5008 	struct cifs_tcon *tcon;
5009 	struct TCP_Server_Info *server;
5010 	struct tcon_link *tlink;
5011 	int rc = 0;
5012 	bool purge_cache = false, oplock_break_cancelled;
5013 	__u64 persistent_fid, volatile_fid;
5014 	__u16 net_fid;
5015 
5016 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5017 			TASK_UNINTERRUPTIBLE);
5018 
5019 	tlink = cifs_sb_tlink(cifs_sb);
5020 	if (IS_ERR(tlink))
5021 		goto out;
5022 	tcon = tlink_tcon(tlink);
5023 	server = tcon->ses->server;
5024 
5025 	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5026 				      cfile->oplock_epoch, &purge_cache);
5027 
5028 	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5029 						cifs_has_mand_locks(cinode)) {
5030 		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5031 			 inode);
5032 		cinode->oplock = 0;
5033 	}
5034 
5035 	if (inode && S_ISREG(inode->i_mode)) {
5036 		if (CIFS_CACHE_READ(cinode))
5037 			break_lease(inode, O_RDONLY);
5038 		else
5039 			break_lease(inode, O_WRONLY);
5040 		rc = filemap_fdatawrite(inode->i_mapping);
5041 		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5042 			rc = filemap_fdatawait(inode->i_mapping);
5043 			mapping_set_error(inode->i_mapping, rc);
5044 			cifs_zap_mapping(inode);
5045 		}
5046 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5047 		if (CIFS_CACHE_WRITE(cinode))
5048 			goto oplock_break_ack;
5049 	}
5050 
5051 	rc = cifs_push_locks(cfile);
5052 	if (rc)
5053 		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5054 
5055 oplock_break_ack:
5056 	/*
5057 	 * When oplock break is received and there are no active
5058 	 * file handles but cached, then schedule deferred close immediately.
5059 	 * So, new open will not use cached handle.
5060 	 */
5061 
5062 	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5063 		cifs_close_deferred_file(cinode);
5064 
5065 	persistent_fid = cfile->fid.persistent_fid;
5066 	volatile_fid = cfile->fid.volatile_fid;
5067 	net_fid = cfile->fid.netfid;
5068 	oplock_break_cancelled = cfile->oplock_break_cancelled;
5069 
5070 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5071 	/*
5072 	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5073 	 * an acknowledgment to be sent when the file has already been closed.
5074 	 */
5075 	spin_lock(&cinode->open_file_lock);
5076 	/* check list empty since can race with kill_sb calling tree disconnect */
5077 	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5078 		spin_unlock(&cinode->open_file_lock);
5079 		rc = server->ops->oplock_response(tcon, persistent_fid,
5080 						  volatile_fid, net_fid, cinode);
5081 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5082 	} else
5083 		spin_unlock(&cinode->open_file_lock);
5084 
5085 	cifs_put_tlink(tlink);
5086 out:
5087 	cifs_done_oplock_break(cinode);
5088 }
5089 
5090 /*
5091  * The presence of cifs_direct_io() in the address space ops vector
5092  * allowes open() O_DIRECT flags which would have failed otherwise.
5093  *
5094  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5095  * so this method should never be called.
5096  *
5097  * Direct IO is not yet supported in the cached mode.
5098  */
5099 static ssize_t
5100 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5101 {
5102         /*
5103          * FIXME
5104          * Eventually need to support direct IO for non forcedirectio mounts
5105          */
5106         return -EINVAL;
5107 }
5108 
5109 static int cifs_swap_activate(struct swap_info_struct *sis,
5110 			      struct file *swap_file, sector_t *span)
5111 {
5112 	struct cifsFileInfo *cfile = swap_file->private_data;
5113 	struct inode *inode = swap_file->f_mapping->host;
5114 	unsigned long blocks;
5115 	long long isize;
5116 
5117 	cifs_dbg(FYI, "swap activate\n");
5118 
5119 	if (!swap_file->f_mapping->a_ops->swap_rw)
5120 		/* Cannot support swap */
5121 		return -EINVAL;
5122 
5123 	spin_lock(&inode->i_lock);
5124 	blocks = inode->i_blocks;
5125 	isize = inode->i_size;
5126 	spin_unlock(&inode->i_lock);
5127 	if (blocks*512 < isize) {
5128 		pr_warn("swap activate: swapfile has holes\n");
5129 		return -EINVAL;
5130 	}
5131 	*span = sis->pages;
5132 
5133 	pr_warn_once("Swap support over SMB3 is experimental\n");
5134 
5135 	/*
5136 	 * TODO: consider adding ACL (or documenting how) to prevent other
5137 	 * users (on this or other systems) from reading it
5138 	 */
5139 
5140 
5141 	/* TODO: add sk_set_memalloc(inet) or similar */
5142 
5143 	if (cfile)
5144 		cfile->swapfile = true;
5145 	/*
5146 	 * TODO: Since file already open, we can't open with DENY_ALL here
5147 	 * but we could add call to grab a byte range lock to prevent others
5148 	 * from reading or writing the file
5149 	 */
5150 
5151 	sis->flags |= SWP_FS_OPS;
5152 	return add_swap_extent(sis, 0, sis->max, 0);
5153 }
5154 
5155 static void cifs_swap_deactivate(struct file *file)
5156 {
5157 	struct cifsFileInfo *cfile = file->private_data;
5158 
5159 	cifs_dbg(FYI, "swap deactivate\n");
5160 
5161 	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5162 
5163 	if (cfile)
5164 		cfile->swapfile = false;
5165 
5166 	/* do we need to unpin (or unlock) the file */
5167 }
5168 
5169 /*
5170  * Mark a page as having been made dirty and thus needing writeback.  We also
5171  * need to pin the cache object to write back to.
5172  */
5173 #ifdef CONFIG_CIFS_FSCACHE
5174 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5175 {
5176 	return fscache_dirty_folio(mapping, folio,
5177 					cifs_inode_cookie(mapping->host));
5178 }
5179 #else
5180 #define cifs_dirty_folio filemap_dirty_folio
5181 #endif
5182 
5183 const struct address_space_operations cifs_addr_ops = {
5184 	.read_folio = cifs_read_folio,
5185 	.readahead = cifs_readahead,
5186 	.writepages = cifs_writepages,
5187 	.write_begin = cifs_write_begin,
5188 	.write_end = cifs_write_end,
5189 	.dirty_folio = cifs_dirty_folio,
5190 	.release_folio = cifs_release_folio,
5191 	.direct_IO = cifs_direct_io,
5192 	.invalidate_folio = cifs_invalidate_folio,
5193 	.launder_folio = cifs_launder_folio,
5194 	.migrate_folio = filemap_migrate_folio,
5195 	/*
5196 	 * TODO: investigate and if useful we could add an is_dirty_writeback
5197 	 * helper if needed
5198 	 */
5199 	.swap_activate = cifs_swap_activate,
5200 	.swap_deactivate = cifs_swap_deactivate,
5201 };
5202 
5203 /*
5204  * cifs_readahead requires the server to support a buffer large enough to
5205  * contain the header plus one complete page of data.  Otherwise, we need
5206  * to leave cifs_readahead out of the address space operations.
5207  */
5208 const struct address_space_operations cifs_addr_ops_smallbuf = {
5209 	.read_folio = cifs_read_folio,
5210 	.writepages = cifs_writepages,
5211 	.write_begin = cifs_write_begin,
5212 	.write_end = cifs_write_end,
5213 	.dirty_folio = cifs_dirty_folio,
5214 	.release_folio = cifs_release_folio,
5215 	.invalidate_folio = cifs_invalidate_folio,
5216 	.launder_folio = cifs_launder_folio,
5217 	.migrate_folio = filemap_migrate_folio,
5218 };
5219