xref: /openbmc/linux/fs/smb/client/file.c (revision 91cdeb0d)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39 
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45 	struct address_space *mapping = inode->i_mapping;
46 	struct folio *folio;
47 	pgoff_t end;
48 
49 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50 
51 	rcu_read_lock();
52 
53 	end = (start + len - 1) / PAGE_SIZE;
54 	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 		if (xas_retry(&xas, folio))
56 			continue;
57 		xas_pause(&xas);
58 		rcu_read_unlock();
59 		folio_lock(folio);
60 		folio_clear_dirty_for_io(folio);
61 		folio_unlock(folio);
62 		rcu_read_lock();
63 	}
64 
65 	rcu_read_unlock();
66 }
67 
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73 	struct address_space *mapping = inode->i_mapping;
74 	struct folio *folio;
75 	pgoff_t end;
76 
77 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78 
79 	if (!len)
80 		return;
81 
82 	rcu_read_lock();
83 
84 	end = (start + len - 1) / PAGE_SIZE;
85 	xas_for_each(&xas, folio, end) {
86 		if (xas_retry(&xas, folio))
87 			continue;
88 		if (!folio_test_writeback(folio)) {
89 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 				  len, start, folio->index, end);
91 			continue;
92 		}
93 
94 		folio_detach_private(folio);
95 		folio_end_writeback(folio);
96 	}
97 
98 	rcu_read_unlock();
99 }
100 
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106 	struct address_space *mapping = inode->i_mapping;
107 	struct folio *folio;
108 	pgoff_t end;
109 
110 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111 
112 	if (!len)
113 		return;
114 
115 	rcu_read_lock();
116 
117 	end = (start + len - 1) / PAGE_SIZE;
118 	xas_for_each(&xas, folio, end) {
119 		if (xas_retry(&xas, folio))
120 			continue;
121 		if (!folio_test_writeback(folio)) {
122 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 				  len, start, folio->index, end);
124 			continue;
125 		}
126 
127 		folio_set_error(folio);
128 		folio_end_writeback(folio);
129 	}
130 
131 	rcu_read_unlock();
132 }
133 
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139 	struct address_space *mapping = inode->i_mapping;
140 	struct folio *folio;
141 	pgoff_t end;
142 
143 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144 
145 	if (!len)
146 		return;
147 
148 	rcu_read_lock();
149 
150 	end = (start + len - 1) / PAGE_SIZE;
151 	xas_for_each(&xas, folio, end) {
152 		if (!folio_test_writeback(folio)) {
153 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 				  len, start, folio->index, end);
155 			continue;
156 		}
157 
158 		filemap_dirty_folio(folio->mapping, folio);
159 		folio_end_writeback(folio);
160 	}
161 
162 	rcu_read_unlock();
163 }
164 
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172 	struct cifsFileInfo *open_file = NULL;
173 	struct list_head *tmp;
174 	struct list_head *tmp1;
175 
176 	/* only send once per connect */
177 	spin_lock(&tcon->tc_lock);
178 	if (tcon->need_reconnect)
179 		tcon->status = TID_NEED_RECON;
180 
181 	if (tcon->status != TID_NEED_RECON) {
182 		spin_unlock(&tcon->tc_lock);
183 		return;
184 	}
185 	tcon->status = TID_IN_FILES_INVALIDATE;
186 	spin_unlock(&tcon->tc_lock);
187 
188 	/* list all files open on tree connection and mark them invalid */
189 	spin_lock(&tcon->open_file_lock);
190 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192 		open_file->invalidHandle = true;
193 		open_file->oplock_break_cancelled = true;
194 	}
195 	spin_unlock(&tcon->open_file_lock);
196 
197 	invalidate_all_cached_dirs(tcon);
198 	spin_lock(&tcon->tc_lock);
199 	if (tcon->status == TID_IN_FILES_INVALIDATE)
200 		tcon->status = TID_NEED_TCON;
201 	spin_unlock(&tcon->tc_lock);
202 
203 	/*
204 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205 	 * to this tcon.
206 	 */
207 }
208 
209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
210 {
211 	if ((flags & O_ACCMODE) == O_RDONLY)
212 		return GENERIC_READ;
213 	else if ((flags & O_ACCMODE) == O_WRONLY)
214 		return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
215 	else if ((flags & O_ACCMODE) == O_RDWR) {
216 		/* GENERIC_ALL is too much permission to request
217 		   can cause unnecessary access denied on create */
218 		/* return GENERIC_ALL; */
219 		return (GENERIC_READ | GENERIC_WRITE);
220 	}
221 
222 	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223 		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224 		FILE_READ_DATA);
225 }
226 
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
229 {
230 	u32 posix_flags = 0;
231 
232 	if ((flags & O_ACCMODE) == O_RDONLY)
233 		posix_flags = SMB_O_RDONLY;
234 	else if ((flags & O_ACCMODE) == O_WRONLY)
235 		posix_flags = SMB_O_WRONLY;
236 	else if ((flags & O_ACCMODE) == O_RDWR)
237 		posix_flags = SMB_O_RDWR;
238 
239 	if (flags & O_CREAT) {
240 		posix_flags |= SMB_O_CREAT;
241 		if (flags & O_EXCL)
242 			posix_flags |= SMB_O_EXCL;
243 	} else if (flags & O_EXCL)
244 		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245 			 current->comm, current->tgid);
246 
247 	if (flags & O_TRUNC)
248 		posix_flags |= SMB_O_TRUNC;
249 	/* be safe and imply O_SYNC for O_DSYNC */
250 	if (flags & O_DSYNC)
251 		posix_flags |= SMB_O_SYNC;
252 	if (flags & O_DIRECTORY)
253 		posix_flags |= SMB_O_DIRECTORY;
254 	if (flags & O_NOFOLLOW)
255 		posix_flags |= SMB_O_NOFOLLOW;
256 	if (flags & O_DIRECT)
257 		posix_flags |= SMB_O_DIRECT;
258 
259 	return posix_flags;
260 }
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262 
263 static inline int cifs_get_disposition(unsigned int flags)
264 {
265 	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266 		return FILE_CREATE;
267 	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268 		return FILE_OVERWRITE_IF;
269 	else if ((flags & O_CREAT) == O_CREAT)
270 		return FILE_OPEN_IF;
271 	else if ((flags & O_TRUNC) == O_TRUNC)
272 		return FILE_OVERWRITE;
273 	else
274 		return FILE_OPEN;
275 }
276 
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279 			struct super_block *sb, int mode, unsigned int f_flags,
280 			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
281 {
282 	int rc;
283 	FILE_UNIX_BASIC_INFO *presp_data;
284 	__u32 posix_flags = 0;
285 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286 	struct cifs_fattr fattr;
287 	struct tcon_link *tlink;
288 	struct cifs_tcon *tcon;
289 
290 	cifs_dbg(FYI, "posix open %s\n", full_path);
291 
292 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293 	if (presp_data == NULL)
294 		return -ENOMEM;
295 
296 	tlink = cifs_sb_tlink(cifs_sb);
297 	if (IS_ERR(tlink)) {
298 		rc = PTR_ERR(tlink);
299 		goto posix_open_ret;
300 	}
301 
302 	tcon = tlink_tcon(tlink);
303 	mode &= ~current_umask();
304 
305 	posix_flags = cifs_posix_convert_flags(f_flags);
306 	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307 			     poplock, full_path, cifs_sb->local_nls,
308 			     cifs_remap(cifs_sb));
309 	cifs_put_tlink(tlink);
310 
311 	if (rc)
312 		goto posix_open_ret;
313 
314 	if (presp_data->Type == cpu_to_le32(-1))
315 		goto posix_open_ret; /* open ok, caller does qpathinfo */
316 
317 	if (!pinode)
318 		goto posix_open_ret; /* caller does not need info */
319 
320 	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321 
322 	/* get new inode and set it up */
323 	if (*pinode == NULL) {
324 		cifs_fill_uniqueid(sb, &fattr);
325 		*pinode = cifs_iget(sb, &fattr);
326 		if (!*pinode) {
327 			rc = -ENOMEM;
328 			goto posix_open_ret;
329 		}
330 	} else {
331 		cifs_revalidate_mapping(*pinode);
332 		rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333 	}
334 
335 posix_open_ret:
336 	kfree(presp_data);
337 	return rc;
338 }
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340 
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342 			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343 			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344 {
345 	int rc;
346 	int desired_access;
347 	int disposition;
348 	int create_options = CREATE_NOT_DIR;
349 	struct TCP_Server_Info *server = tcon->ses->server;
350 	struct cifs_open_parms oparms;
351 	int rdwr_for_fscache = 0;
352 
353 	if (!server->ops->open)
354 		return -ENOSYS;
355 
356 	/* If we're caching, we need to be able to fill in around partial writes. */
357 	if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
358 		rdwr_for_fscache = 1;
359 
360 	desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
361 
362 /*********************************************************************
363  *  open flag mapping table:
364  *
365  *	POSIX Flag            CIFS Disposition
366  *	----------            ----------------
367  *	O_CREAT               FILE_OPEN_IF
368  *	O_CREAT | O_EXCL      FILE_CREATE
369  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
370  *	O_TRUNC               FILE_OVERWRITE
371  *	none of the above     FILE_OPEN
372  *
373  *	Note that there is not a direct match between disposition
374  *	FILE_SUPERSEDE (ie create whether or not file exists although
375  *	O_CREAT | O_TRUNC is similar but truncates the existing
376  *	file rather than creating a new file as FILE_SUPERSEDE does
377  *	(which uses the attributes / metadata passed in on open call)
378  *?
379  *?  O_SYNC is a reasonable match to CIFS writethrough flag
380  *?  and the read write flags match reasonably.  O_LARGEFILE
381  *?  is irrelevant because largefile support is always used
382  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
383  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
384  *********************************************************************/
385 
386 	disposition = cifs_get_disposition(f_flags);
387 
388 	/* BB pass O_SYNC flag through on file attributes .. BB */
389 
390 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
391 	if (f_flags & O_SYNC)
392 		create_options |= CREATE_WRITE_THROUGH;
393 
394 	if (f_flags & O_DIRECT)
395 		create_options |= CREATE_NO_BUFFER;
396 
397 retry_open:
398 	oparms = (struct cifs_open_parms) {
399 		.tcon = tcon,
400 		.cifs_sb = cifs_sb,
401 		.desired_access = desired_access,
402 		.create_options = cifs_create_options(cifs_sb, create_options),
403 		.disposition = disposition,
404 		.path = full_path,
405 		.fid = fid,
406 	};
407 
408 	rc = server->ops->open(xid, &oparms, oplock, buf);
409 	if (rc) {
410 		if (rc == -EACCES && rdwr_for_fscache == 1) {
411 			desired_access = cifs_convert_flags(f_flags, 0);
412 			rdwr_for_fscache = 2;
413 			goto retry_open;
414 		}
415 		return rc;
416 	}
417 	if (rdwr_for_fscache == 2)
418 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
419 
420 	/* TODO: Add support for calling posix query info but with passing in fid */
421 	if (tcon->unix_ext)
422 		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
423 					      xid);
424 	else
425 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
426 					 xid, fid);
427 
428 	if (rc) {
429 		server->ops->close(xid, tcon, fid);
430 		if (rc == -ESTALE)
431 			rc = -EOPENSTALE;
432 	}
433 
434 	return rc;
435 }
436 
437 static bool
438 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
439 {
440 	struct cifs_fid_locks *cur;
441 	bool has_locks = false;
442 
443 	down_read(&cinode->lock_sem);
444 	list_for_each_entry(cur, &cinode->llist, llist) {
445 		if (!list_empty(&cur->locks)) {
446 			has_locks = true;
447 			break;
448 		}
449 	}
450 	up_read(&cinode->lock_sem);
451 	return has_locks;
452 }
453 
454 void
455 cifs_down_write(struct rw_semaphore *sem)
456 {
457 	while (!down_write_trylock(sem))
458 		msleep(10);
459 }
460 
461 static void cifsFileInfo_put_work(struct work_struct *work);
462 void serverclose_work(struct work_struct *work);
463 
464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
465 				       struct tcon_link *tlink, __u32 oplock,
466 				       const char *symlink_target)
467 {
468 	struct dentry *dentry = file_dentry(file);
469 	struct inode *inode = d_inode(dentry);
470 	struct cifsInodeInfo *cinode = CIFS_I(inode);
471 	struct cifsFileInfo *cfile;
472 	struct cifs_fid_locks *fdlocks;
473 	struct cifs_tcon *tcon = tlink_tcon(tlink);
474 	struct TCP_Server_Info *server = tcon->ses->server;
475 
476 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
477 	if (cfile == NULL)
478 		return cfile;
479 
480 	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
481 	if (!fdlocks) {
482 		kfree(cfile);
483 		return NULL;
484 	}
485 
486 	if (symlink_target) {
487 		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
488 		if (!cfile->symlink_target) {
489 			kfree(fdlocks);
490 			kfree(cfile);
491 			return NULL;
492 		}
493 	}
494 
495 	INIT_LIST_HEAD(&fdlocks->locks);
496 	fdlocks->cfile = cfile;
497 	cfile->llist = fdlocks;
498 
499 	cfile->count = 1;
500 	cfile->pid = current->tgid;
501 	cfile->uid = current_fsuid();
502 	cfile->dentry = dget(dentry);
503 	cfile->f_flags = file->f_flags;
504 	cfile->status_file_deleted = false;
505 	cfile->invalidHandle = false;
506 	cfile->deferred_close_scheduled = false;
507 	cfile->tlink = cifs_get_tlink(tlink);
508 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
509 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
510 	INIT_WORK(&cfile->serverclose, serverclose_work);
511 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
512 	mutex_init(&cfile->fh_mutex);
513 	spin_lock_init(&cfile->file_info_lock);
514 
515 	cifs_sb_active(inode->i_sb);
516 
517 	/*
518 	 * If the server returned a read oplock and we have mandatory brlocks,
519 	 * set oplock level to None.
520 	 */
521 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
522 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
523 		oplock = 0;
524 	}
525 
526 	cifs_down_write(&cinode->lock_sem);
527 	list_add(&fdlocks->llist, &cinode->llist);
528 	up_write(&cinode->lock_sem);
529 
530 	spin_lock(&tcon->open_file_lock);
531 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
532 		oplock = fid->pending_open->oplock;
533 	list_del(&fid->pending_open->olist);
534 
535 	fid->purge_cache = false;
536 	server->ops->set_fid(cfile, fid, oplock);
537 
538 	list_add(&cfile->tlist, &tcon->openFileList);
539 	atomic_inc(&tcon->num_local_opens);
540 
541 	/* if readable file instance put first in list*/
542 	spin_lock(&cinode->open_file_lock);
543 	if (file->f_mode & FMODE_READ)
544 		list_add(&cfile->flist, &cinode->openFileList);
545 	else
546 		list_add_tail(&cfile->flist, &cinode->openFileList);
547 	spin_unlock(&cinode->open_file_lock);
548 	spin_unlock(&tcon->open_file_lock);
549 
550 	if (fid->purge_cache)
551 		cifs_zap_mapping(inode);
552 
553 	file->private_data = cfile;
554 	return cfile;
555 }
556 
557 struct cifsFileInfo *
558 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
559 {
560 	spin_lock(&cifs_file->file_info_lock);
561 	cifsFileInfo_get_locked(cifs_file);
562 	spin_unlock(&cifs_file->file_info_lock);
563 	return cifs_file;
564 }
565 
566 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
567 {
568 	struct inode *inode = d_inode(cifs_file->dentry);
569 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
570 	struct cifsLockInfo *li, *tmp;
571 	struct super_block *sb = inode->i_sb;
572 
573 	/*
574 	 * Delete any outstanding lock records. We'll lose them when the file
575 	 * is closed anyway.
576 	 */
577 	cifs_down_write(&cifsi->lock_sem);
578 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
579 		list_del(&li->llist);
580 		cifs_del_lock_waiters(li);
581 		kfree(li);
582 	}
583 	list_del(&cifs_file->llist->llist);
584 	kfree(cifs_file->llist);
585 	up_write(&cifsi->lock_sem);
586 
587 	cifs_put_tlink(cifs_file->tlink);
588 	dput(cifs_file->dentry);
589 	cifs_sb_deactive(sb);
590 	kfree(cifs_file->symlink_target);
591 	kfree(cifs_file);
592 }
593 
594 static void cifsFileInfo_put_work(struct work_struct *work)
595 {
596 	struct cifsFileInfo *cifs_file = container_of(work,
597 			struct cifsFileInfo, put);
598 
599 	cifsFileInfo_put_final(cifs_file);
600 }
601 
602 void serverclose_work(struct work_struct *work)
603 {
604 	struct cifsFileInfo *cifs_file = container_of(work,
605 			struct cifsFileInfo, serverclose);
606 
607 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
608 
609 	struct TCP_Server_Info *server = tcon->ses->server;
610 	int rc = 0;
611 	int retries = 0;
612 	int MAX_RETRIES = 4;
613 
614 	do {
615 		if (server->ops->close_getattr)
616 			rc = server->ops->close_getattr(0, tcon, cifs_file);
617 		else if (server->ops->close)
618 			rc = server->ops->close(0, tcon, &cifs_file->fid);
619 
620 		if (rc == -EBUSY || rc == -EAGAIN) {
621 			retries++;
622 			msleep(250);
623 		}
624 	} while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
625 	);
626 
627 	if (retries == MAX_RETRIES)
628 		pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
629 
630 	if (cifs_file->offload)
631 		queue_work(fileinfo_put_wq, &cifs_file->put);
632 	else
633 		cifsFileInfo_put_final(cifs_file);
634 }
635 
636 /**
637  * cifsFileInfo_put - release a reference of file priv data
638  *
639  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
640  *
641  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
642  */
643 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
644 {
645 	_cifsFileInfo_put(cifs_file, true, true);
646 }
647 
648 /**
649  * _cifsFileInfo_put - release a reference of file priv data
650  *
651  * This may involve closing the filehandle @cifs_file out on the
652  * server. Must be called without holding tcon->open_file_lock,
653  * cinode->open_file_lock and cifs_file->file_info_lock.
654  *
655  * If @wait_for_oplock_handler is true and we are releasing the last
656  * reference, wait for any running oplock break handler of the file
657  * and cancel any pending one.
658  *
659  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
660  * @wait_oplock_handler: must be false if called from oplock_break_handler
661  * @offload:	not offloaded on close and oplock breaks
662  *
663  */
664 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
665 		       bool wait_oplock_handler, bool offload)
666 {
667 	struct inode *inode = d_inode(cifs_file->dentry);
668 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
669 	struct TCP_Server_Info *server = tcon->ses->server;
670 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
671 	struct super_block *sb = inode->i_sb;
672 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
673 	struct cifs_fid fid = {};
674 	struct cifs_pending_open open;
675 	bool oplock_break_cancelled;
676 	bool serverclose_offloaded = false;
677 
678 	spin_lock(&tcon->open_file_lock);
679 	spin_lock(&cifsi->open_file_lock);
680 	spin_lock(&cifs_file->file_info_lock);
681 
682 	cifs_file->offload = offload;
683 	if (--cifs_file->count > 0) {
684 		spin_unlock(&cifs_file->file_info_lock);
685 		spin_unlock(&cifsi->open_file_lock);
686 		spin_unlock(&tcon->open_file_lock);
687 		return;
688 	}
689 	spin_unlock(&cifs_file->file_info_lock);
690 
691 	if (server->ops->get_lease_key)
692 		server->ops->get_lease_key(inode, &fid);
693 
694 	/* store open in pending opens to make sure we don't miss lease break */
695 	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
696 
697 	/* remove it from the lists */
698 	list_del(&cifs_file->flist);
699 	list_del(&cifs_file->tlist);
700 	atomic_dec(&tcon->num_local_opens);
701 
702 	if (list_empty(&cifsi->openFileList)) {
703 		cifs_dbg(FYI, "closing last open instance for inode %p\n",
704 			 d_inode(cifs_file->dentry));
705 		/*
706 		 * In strict cache mode we need invalidate mapping on the last
707 		 * close  because it may cause a error when we open this file
708 		 * again and get at least level II oplock.
709 		 */
710 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
711 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
712 		cifs_set_oplock_level(cifsi, 0);
713 	}
714 
715 	spin_unlock(&cifsi->open_file_lock);
716 	spin_unlock(&tcon->open_file_lock);
717 
718 	oplock_break_cancelled = wait_oplock_handler ?
719 		cancel_work_sync(&cifs_file->oplock_break) : false;
720 
721 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
722 		struct TCP_Server_Info *server = tcon->ses->server;
723 		unsigned int xid;
724 		int rc = 0;
725 
726 		xid = get_xid();
727 		if (server->ops->close_getattr)
728 			rc = server->ops->close_getattr(xid, tcon, cifs_file);
729 		else if (server->ops->close)
730 			rc = server->ops->close(xid, tcon, &cifs_file->fid);
731 		_free_xid(xid);
732 
733 		if (rc == -EBUSY || rc == -EAGAIN) {
734 			// Server close failed, hence offloading it as an async op
735 			queue_work(serverclose_wq, &cifs_file->serverclose);
736 			serverclose_offloaded = true;
737 		}
738 	}
739 
740 	if (oplock_break_cancelled)
741 		cifs_done_oplock_break(cifsi);
742 
743 	cifs_del_pending_open(&open);
744 
745 	// if serverclose has been offloaded to wq (on failure), it will
746 	// handle offloading put as well. If serverclose not offloaded,
747 	// we need to handle offloading put here.
748 	if (!serverclose_offloaded) {
749 		if (offload)
750 			queue_work(fileinfo_put_wq, &cifs_file->put);
751 		else
752 			cifsFileInfo_put_final(cifs_file);
753 	}
754 }
755 
756 int cifs_open(struct inode *inode, struct file *file)
757 
758 {
759 	int rc = -EACCES;
760 	unsigned int xid;
761 	__u32 oplock;
762 	struct cifs_sb_info *cifs_sb;
763 	struct TCP_Server_Info *server;
764 	struct cifs_tcon *tcon;
765 	struct tcon_link *tlink;
766 	struct cifsFileInfo *cfile = NULL;
767 	void *page;
768 	const char *full_path;
769 	bool posix_open_ok = false;
770 	struct cifs_fid fid = {};
771 	struct cifs_pending_open open;
772 	struct cifs_open_info_data data = {};
773 
774 	xid = get_xid();
775 
776 	cifs_sb = CIFS_SB(inode->i_sb);
777 	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
778 		free_xid(xid);
779 		return -EIO;
780 	}
781 
782 	tlink = cifs_sb_tlink(cifs_sb);
783 	if (IS_ERR(tlink)) {
784 		free_xid(xid);
785 		return PTR_ERR(tlink);
786 	}
787 	tcon = tlink_tcon(tlink);
788 	server = tcon->ses->server;
789 
790 	page = alloc_dentry_path();
791 	full_path = build_path_from_dentry(file_dentry(file), page);
792 	if (IS_ERR(full_path)) {
793 		rc = PTR_ERR(full_path);
794 		goto out;
795 	}
796 
797 	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
798 		 inode, file->f_flags, full_path);
799 
800 	if (file->f_flags & O_DIRECT &&
801 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
802 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
803 			file->f_op = &cifs_file_direct_nobrl_ops;
804 		else
805 			file->f_op = &cifs_file_direct_ops;
806 	}
807 
808 	/* Get the cached handle as SMB2 close is deferred */
809 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
810 	if (rc == 0) {
811 		if (file->f_flags == cfile->f_flags) {
812 			file->private_data = cfile;
813 			spin_lock(&CIFS_I(inode)->deferred_lock);
814 			cifs_del_deferred_close(cfile);
815 			spin_unlock(&CIFS_I(inode)->deferred_lock);
816 			goto use_cache;
817 		} else {
818 			_cifsFileInfo_put(cfile, true, false);
819 		}
820 	}
821 
822 	if (server->oplocks)
823 		oplock = REQ_OPLOCK;
824 	else
825 		oplock = 0;
826 
827 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
828 	if (!tcon->broken_posix_open && tcon->unix_ext &&
829 	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
830 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
831 		/* can not refresh inode info since size could be stale */
832 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
833 				cifs_sb->ctx->file_mode /* ignored */,
834 				file->f_flags, &oplock, &fid.netfid, xid);
835 		if (rc == 0) {
836 			cifs_dbg(FYI, "posix open succeeded\n");
837 			posix_open_ok = true;
838 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
839 			if (tcon->ses->serverNOS)
840 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
841 					 tcon->ses->ip_addr,
842 					 tcon->ses->serverNOS);
843 			tcon->broken_posix_open = true;
844 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
845 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
846 			goto out;
847 		/*
848 		 * Else fallthrough to retry open the old way on network i/o
849 		 * or DFS errors.
850 		 */
851 	}
852 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
853 
854 	if (server->ops->get_lease_key)
855 		server->ops->get_lease_key(inode, &fid);
856 
857 	cifs_add_pending_open(&fid, tlink, &open);
858 
859 	if (!posix_open_ok) {
860 		if (server->ops->get_lease_key)
861 			server->ops->get_lease_key(inode, &fid);
862 
863 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
864 				  xid, &data);
865 		if (rc) {
866 			cifs_del_pending_open(&open);
867 			goto out;
868 		}
869 	}
870 
871 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
872 	if (cfile == NULL) {
873 		if (server->ops->close)
874 			server->ops->close(xid, tcon, &fid);
875 		cifs_del_pending_open(&open);
876 		rc = -ENOMEM;
877 		goto out;
878 	}
879 
880 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
881 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
882 		/*
883 		 * Time to set mode which we can not set earlier due to
884 		 * problems creating new read-only files.
885 		 */
886 		struct cifs_unix_set_info_args args = {
887 			.mode	= inode->i_mode,
888 			.uid	= INVALID_UID, /* no change */
889 			.gid	= INVALID_GID, /* no change */
890 			.ctime	= NO_CHANGE_64,
891 			.atime	= NO_CHANGE_64,
892 			.mtime	= NO_CHANGE_64,
893 			.device	= 0,
894 		};
895 		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
896 				       cfile->pid);
897 	}
898 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
899 
900 use_cache:
901 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
902 			   file->f_mode & FMODE_WRITE);
903 	if (!(file->f_flags & O_DIRECT))
904 		goto out;
905 	if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
906 		goto out;
907 	cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
908 
909 out:
910 	free_dentry_path(page);
911 	free_xid(xid);
912 	cifs_put_tlink(tlink);
913 	cifs_free_open_info(&data);
914 	return rc;
915 }
916 
917 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
918 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
919 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
920 
921 /*
922  * Try to reacquire byte range locks that were released when session
923  * to server was lost.
924  */
925 static int
926 cifs_relock_file(struct cifsFileInfo *cfile)
927 {
928 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
929 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
930 	int rc = 0;
931 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
932 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
933 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
934 
935 	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
936 	if (cinode->can_cache_brlcks) {
937 		/* can cache locks - no need to relock */
938 		up_read(&cinode->lock_sem);
939 		return rc;
940 	}
941 
942 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
943 	if (cap_unix(tcon->ses) &&
944 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
945 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
946 		rc = cifs_push_posix_locks(cfile);
947 	else
948 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
949 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
950 
951 	up_read(&cinode->lock_sem);
952 	return rc;
953 }
954 
955 static int
956 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
957 {
958 	int rc = -EACCES;
959 	unsigned int xid;
960 	__u32 oplock;
961 	struct cifs_sb_info *cifs_sb;
962 	struct cifs_tcon *tcon;
963 	struct TCP_Server_Info *server;
964 	struct cifsInodeInfo *cinode;
965 	struct inode *inode;
966 	void *page;
967 	const char *full_path;
968 	int desired_access;
969 	int disposition = FILE_OPEN;
970 	int create_options = CREATE_NOT_DIR;
971 	struct cifs_open_parms oparms;
972 	int rdwr_for_fscache = 0;
973 
974 	xid = get_xid();
975 	mutex_lock(&cfile->fh_mutex);
976 	if (!cfile->invalidHandle) {
977 		mutex_unlock(&cfile->fh_mutex);
978 		free_xid(xid);
979 		return 0;
980 	}
981 
982 	inode = d_inode(cfile->dentry);
983 	cifs_sb = CIFS_SB(inode->i_sb);
984 	tcon = tlink_tcon(cfile->tlink);
985 	server = tcon->ses->server;
986 
987 	/*
988 	 * Can not grab rename sem here because various ops, including those
989 	 * that already have the rename sem can end up causing writepage to get
990 	 * called and if the server was down that means we end up here, and we
991 	 * can never tell if the caller already has the rename_sem.
992 	 */
993 	page = alloc_dentry_path();
994 	full_path = build_path_from_dentry(cfile->dentry, page);
995 	if (IS_ERR(full_path)) {
996 		mutex_unlock(&cfile->fh_mutex);
997 		free_dentry_path(page);
998 		free_xid(xid);
999 		return PTR_ERR(full_path);
1000 	}
1001 
1002 	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
1003 		 inode, cfile->f_flags, full_path);
1004 
1005 	if (tcon->ses->server->oplocks)
1006 		oplock = REQ_OPLOCK;
1007 	else
1008 		oplock = 0;
1009 
1010 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1011 	if (tcon->unix_ext && cap_unix(tcon->ses) &&
1012 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
1013 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
1014 		/*
1015 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
1016 		 * original open. Must mask them off for a reopen.
1017 		 */
1018 		unsigned int oflags = cfile->f_flags &
1019 						~(O_CREAT | O_EXCL | O_TRUNC);
1020 
1021 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
1022 				     cifs_sb->ctx->file_mode /* ignored */,
1023 				     oflags, &oplock, &cfile->fid.netfid, xid);
1024 		if (rc == 0) {
1025 			cifs_dbg(FYI, "posix reopen succeeded\n");
1026 			oparms.reconnect = true;
1027 			goto reopen_success;
1028 		}
1029 		/*
1030 		 * fallthrough to retry open the old way on errors, especially
1031 		 * in the reconnect path it is important to retry hard
1032 		 */
1033 	}
1034 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1035 
1036 	/* If we're caching, we need to be able to fill in around partial writes. */
1037 	if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
1038 		rdwr_for_fscache = 1;
1039 
1040 	desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
1041 
1042 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
1043 	if (cfile->f_flags & O_SYNC)
1044 		create_options |= CREATE_WRITE_THROUGH;
1045 
1046 	if (cfile->f_flags & O_DIRECT)
1047 		create_options |= CREATE_NO_BUFFER;
1048 
1049 	if (server->ops->get_lease_key)
1050 		server->ops->get_lease_key(inode, &cfile->fid);
1051 
1052 retry_open:
1053 	oparms = (struct cifs_open_parms) {
1054 		.tcon = tcon,
1055 		.cifs_sb = cifs_sb,
1056 		.desired_access = desired_access,
1057 		.create_options = cifs_create_options(cifs_sb, create_options),
1058 		.disposition = disposition,
1059 		.path = full_path,
1060 		.fid = &cfile->fid,
1061 		.reconnect = true,
1062 	};
1063 
1064 	/*
1065 	 * Can not refresh inode by passing in file_info buf to be returned by
1066 	 * ops->open and then calling get_inode_info with returned buf since
1067 	 * file might have write behind data that needs to be flushed and server
1068 	 * version of file size can be stale. If we knew for sure that inode was
1069 	 * not dirty locally we could do this.
1070 	 */
1071 	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1072 	if (rc == -ENOENT && oparms.reconnect == false) {
1073 		/* durable handle timeout is expired - open the file again */
1074 		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1075 		/* indicate that we need to relock the file */
1076 		oparms.reconnect = true;
1077 	}
1078 	if (rc == -EACCES && rdwr_for_fscache == 1) {
1079 		desired_access = cifs_convert_flags(cfile->f_flags, 0);
1080 		rdwr_for_fscache = 2;
1081 		goto retry_open;
1082 	}
1083 
1084 	if (rc) {
1085 		mutex_unlock(&cfile->fh_mutex);
1086 		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1087 		cifs_dbg(FYI, "oplock: %d\n", oplock);
1088 		goto reopen_error_exit;
1089 	}
1090 
1091 	if (rdwr_for_fscache == 2)
1092 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
1093 
1094 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1095 reopen_success:
1096 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1097 	cfile->invalidHandle = false;
1098 	mutex_unlock(&cfile->fh_mutex);
1099 	cinode = CIFS_I(inode);
1100 
1101 	if (can_flush) {
1102 		rc = filemap_write_and_wait(inode->i_mapping);
1103 		if (!is_interrupt_error(rc))
1104 			mapping_set_error(inode->i_mapping, rc);
1105 
1106 		if (tcon->posix_extensions) {
1107 			rc = smb311_posix_get_inode_info(&inode, full_path,
1108 							 NULL, inode->i_sb, xid);
1109 		} else if (tcon->unix_ext) {
1110 			rc = cifs_get_inode_info_unix(&inode, full_path,
1111 						      inode->i_sb, xid);
1112 		} else {
1113 			rc = cifs_get_inode_info(&inode, full_path, NULL,
1114 						 inode->i_sb, xid, NULL);
1115 		}
1116 	}
1117 	/*
1118 	 * Else we are writing out data to server already and could deadlock if
1119 	 * we tried to flush data, and since we do not know if we have data that
1120 	 * would invalidate the current end of file on the server we can not go
1121 	 * to the server to get the new inode info.
1122 	 */
1123 
1124 	/*
1125 	 * If the server returned a read oplock and we have mandatory brlocks,
1126 	 * set oplock level to None.
1127 	 */
1128 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1129 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1130 		oplock = 0;
1131 	}
1132 
1133 	server->ops->set_fid(cfile, &cfile->fid, oplock);
1134 	if (oparms.reconnect)
1135 		cifs_relock_file(cfile);
1136 
1137 reopen_error_exit:
1138 	free_dentry_path(page);
1139 	free_xid(xid);
1140 	return rc;
1141 }
1142 
1143 void smb2_deferred_work_close(struct work_struct *work)
1144 {
1145 	struct cifsFileInfo *cfile = container_of(work,
1146 			struct cifsFileInfo, deferred.work);
1147 
1148 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1149 	cifs_del_deferred_close(cfile);
1150 	cfile->deferred_close_scheduled = false;
1151 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1152 	_cifsFileInfo_put(cfile, true, false);
1153 }
1154 
1155 static bool
1156 smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose)
1157 {
1158 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1159 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1160 
1161 	return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose &&
1162 			(cinode->oplock == CIFS_CACHE_RHW_FLG ||
1163 			 cinode->oplock == CIFS_CACHE_RH_FLG) &&
1164 			!test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags));
1165 
1166 }
1167 
1168 int cifs_close(struct inode *inode, struct file *file)
1169 {
1170 	struct cifsFileInfo *cfile;
1171 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1172 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1173 	struct cifs_deferred_close *dclose;
1174 
1175 	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1176 
1177 	if (file->private_data != NULL) {
1178 		cfile = file->private_data;
1179 		file->private_data = NULL;
1180 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1181 		if ((cfile->status_file_deleted == false) &&
1182 		    (smb2_can_defer_close(inode, dclose))) {
1183 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1184 				inode_set_mtime_to_ts(inode,
1185 						      inode_set_ctime_current(inode));
1186 			}
1187 			spin_lock(&cinode->deferred_lock);
1188 			cifs_add_deferred_close(cfile, dclose);
1189 			if (cfile->deferred_close_scheduled &&
1190 			    delayed_work_pending(&cfile->deferred)) {
1191 				/*
1192 				 * If there is no pending work, mod_delayed_work queues new work.
1193 				 * So, Increase the ref count to avoid use-after-free.
1194 				 */
1195 				if (!mod_delayed_work(deferredclose_wq,
1196 						&cfile->deferred, cifs_sb->ctx->closetimeo))
1197 					cifsFileInfo_get(cfile);
1198 			} else {
1199 				/* Deferred close for files */
1200 				queue_delayed_work(deferredclose_wq,
1201 						&cfile->deferred, cifs_sb->ctx->closetimeo);
1202 				cfile->deferred_close_scheduled = true;
1203 				spin_unlock(&cinode->deferred_lock);
1204 				return 0;
1205 			}
1206 			spin_unlock(&cinode->deferred_lock);
1207 			_cifsFileInfo_put(cfile, true, false);
1208 		} else {
1209 			_cifsFileInfo_put(cfile, true, false);
1210 			kfree(dclose);
1211 		}
1212 	}
1213 
1214 	/* return code from the ->release op is always ignored */
1215 	return 0;
1216 }
1217 
1218 void
1219 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1220 {
1221 	struct cifsFileInfo *open_file, *tmp;
1222 	struct list_head tmp_list;
1223 
1224 	if (!tcon->use_persistent || !tcon->need_reopen_files)
1225 		return;
1226 
1227 	tcon->need_reopen_files = false;
1228 
1229 	cifs_dbg(FYI, "Reopen persistent handles\n");
1230 	INIT_LIST_HEAD(&tmp_list);
1231 
1232 	/* list all files open on tree connection, reopen resilient handles  */
1233 	spin_lock(&tcon->open_file_lock);
1234 	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1235 		if (!open_file->invalidHandle)
1236 			continue;
1237 		cifsFileInfo_get(open_file);
1238 		list_add_tail(&open_file->rlist, &tmp_list);
1239 	}
1240 	spin_unlock(&tcon->open_file_lock);
1241 
1242 	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1243 		if (cifs_reopen_file(open_file, false /* do not flush */))
1244 			tcon->need_reopen_files = true;
1245 		list_del_init(&open_file->rlist);
1246 		cifsFileInfo_put(open_file);
1247 	}
1248 }
1249 
1250 int cifs_closedir(struct inode *inode, struct file *file)
1251 {
1252 	int rc = 0;
1253 	unsigned int xid;
1254 	struct cifsFileInfo *cfile = file->private_data;
1255 	struct cifs_tcon *tcon;
1256 	struct TCP_Server_Info *server;
1257 	char *buf;
1258 
1259 	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1260 
1261 	if (cfile == NULL)
1262 		return rc;
1263 
1264 	xid = get_xid();
1265 	tcon = tlink_tcon(cfile->tlink);
1266 	server = tcon->ses->server;
1267 
1268 	cifs_dbg(FYI, "Freeing private data in close dir\n");
1269 	spin_lock(&cfile->file_info_lock);
1270 	if (server->ops->dir_needs_close(cfile)) {
1271 		cfile->invalidHandle = true;
1272 		spin_unlock(&cfile->file_info_lock);
1273 		if (server->ops->close_dir)
1274 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1275 		else
1276 			rc = -ENOSYS;
1277 		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1278 		/* not much we can do if it fails anyway, ignore rc */
1279 		rc = 0;
1280 	} else
1281 		spin_unlock(&cfile->file_info_lock);
1282 
1283 	buf = cfile->srch_inf.ntwrk_buf_start;
1284 	if (buf) {
1285 		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1286 		cfile->srch_inf.ntwrk_buf_start = NULL;
1287 		if (cfile->srch_inf.smallBuf)
1288 			cifs_small_buf_release(buf);
1289 		else
1290 			cifs_buf_release(buf);
1291 	}
1292 
1293 	cifs_put_tlink(cfile->tlink);
1294 	kfree(file->private_data);
1295 	file->private_data = NULL;
1296 	/* BB can we lock the filestruct while this is going on? */
1297 	free_xid(xid);
1298 	return rc;
1299 }
1300 
1301 static struct cifsLockInfo *
1302 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1303 {
1304 	struct cifsLockInfo *lock =
1305 		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1306 	if (!lock)
1307 		return lock;
1308 	lock->offset = offset;
1309 	lock->length = length;
1310 	lock->type = type;
1311 	lock->pid = current->tgid;
1312 	lock->flags = flags;
1313 	INIT_LIST_HEAD(&lock->blist);
1314 	init_waitqueue_head(&lock->block_q);
1315 	return lock;
1316 }
1317 
1318 void
1319 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1320 {
1321 	struct cifsLockInfo *li, *tmp;
1322 	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1323 		list_del_init(&li->blist);
1324 		wake_up(&li->block_q);
1325 	}
1326 }
1327 
1328 #define CIFS_LOCK_OP	0
1329 #define CIFS_READ_OP	1
1330 #define CIFS_WRITE_OP	2
1331 
1332 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1333 static bool
1334 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1335 			    __u64 length, __u8 type, __u16 flags,
1336 			    struct cifsFileInfo *cfile,
1337 			    struct cifsLockInfo **conf_lock, int rw_check)
1338 {
1339 	struct cifsLockInfo *li;
1340 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1341 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1342 
1343 	list_for_each_entry(li, &fdlocks->locks, llist) {
1344 		if (offset + length <= li->offset ||
1345 		    offset >= li->offset + li->length)
1346 			continue;
1347 		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1348 		    server->ops->compare_fids(cfile, cur_cfile)) {
1349 			/* shared lock prevents write op through the same fid */
1350 			if (!(li->type & server->vals->shared_lock_type) ||
1351 			    rw_check != CIFS_WRITE_OP)
1352 				continue;
1353 		}
1354 		if ((type & server->vals->shared_lock_type) &&
1355 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1356 		     current->tgid == li->pid) || type == li->type))
1357 			continue;
1358 		if (rw_check == CIFS_LOCK_OP &&
1359 		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1360 		    server->ops->compare_fids(cfile, cur_cfile))
1361 			continue;
1362 		if (conf_lock)
1363 			*conf_lock = li;
1364 		return true;
1365 	}
1366 	return false;
1367 }
1368 
1369 bool
1370 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1371 			__u8 type, __u16 flags,
1372 			struct cifsLockInfo **conf_lock, int rw_check)
1373 {
1374 	bool rc = false;
1375 	struct cifs_fid_locks *cur;
1376 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1377 
1378 	list_for_each_entry(cur, &cinode->llist, llist) {
1379 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1380 						 flags, cfile, conf_lock,
1381 						 rw_check);
1382 		if (rc)
1383 			break;
1384 	}
1385 
1386 	return rc;
1387 }
1388 
1389 /*
1390  * Check if there is another lock that prevents us to set the lock (mandatory
1391  * style). If such a lock exists, update the flock structure with its
1392  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1393  * or leave it the same if we can't. Returns 0 if we don't need to request to
1394  * the server or 1 otherwise.
1395  */
1396 static int
1397 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1398 	       __u8 type, struct file_lock *flock)
1399 {
1400 	int rc = 0;
1401 	struct cifsLockInfo *conf_lock;
1402 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1403 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1404 	bool exist;
1405 
1406 	down_read(&cinode->lock_sem);
1407 
1408 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1409 					flock->fl_flags, &conf_lock,
1410 					CIFS_LOCK_OP);
1411 	if (exist) {
1412 		flock->fl_start = conf_lock->offset;
1413 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1414 		flock->fl_pid = conf_lock->pid;
1415 		if (conf_lock->type & server->vals->shared_lock_type)
1416 			flock->fl_type = F_RDLCK;
1417 		else
1418 			flock->fl_type = F_WRLCK;
1419 	} else if (!cinode->can_cache_brlcks)
1420 		rc = 1;
1421 	else
1422 		flock->fl_type = F_UNLCK;
1423 
1424 	up_read(&cinode->lock_sem);
1425 	return rc;
1426 }
1427 
1428 static void
1429 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1430 {
1431 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1432 	cifs_down_write(&cinode->lock_sem);
1433 	list_add_tail(&lock->llist, &cfile->llist->locks);
1434 	up_write(&cinode->lock_sem);
1435 }
1436 
1437 /*
1438  * Set the byte-range lock (mandatory style). Returns:
1439  * 1) 0, if we set the lock and don't need to request to the server;
1440  * 2) 1, if no locks prevent us but we need to request to the server;
1441  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1442  */
1443 static int
1444 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1445 		 bool wait)
1446 {
1447 	struct cifsLockInfo *conf_lock;
1448 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1449 	bool exist;
1450 	int rc = 0;
1451 
1452 try_again:
1453 	exist = false;
1454 	cifs_down_write(&cinode->lock_sem);
1455 
1456 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1457 					lock->type, lock->flags, &conf_lock,
1458 					CIFS_LOCK_OP);
1459 	if (!exist && cinode->can_cache_brlcks) {
1460 		list_add_tail(&lock->llist, &cfile->llist->locks);
1461 		up_write(&cinode->lock_sem);
1462 		return rc;
1463 	}
1464 
1465 	if (!exist)
1466 		rc = 1;
1467 	else if (!wait)
1468 		rc = -EACCES;
1469 	else {
1470 		list_add_tail(&lock->blist, &conf_lock->blist);
1471 		up_write(&cinode->lock_sem);
1472 		rc = wait_event_interruptible(lock->block_q,
1473 					(lock->blist.prev == &lock->blist) &&
1474 					(lock->blist.next == &lock->blist));
1475 		if (!rc)
1476 			goto try_again;
1477 		cifs_down_write(&cinode->lock_sem);
1478 		list_del_init(&lock->blist);
1479 	}
1480 
1481 	up_write(&cinode->lock_sem);
1482 	return rc;
1483 }
1484 
1485 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1486 /*
1487  * Check if there is another lock that prevents us to set the lock (posix
1488  * style). If such a lock exists, update the flock structure with its
1489  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1490  * or leave it the same if we can't. Returns 0 if we don't need to request to
1491  * the server or 1 otherwise.
1492  */
1493 static int
1494 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1495 {
1496 	int rc = 0;
1497 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1498 	unsigned char saved_type = flock->fl_type;
1499 
1500 	if ((flock->fl_flags & FL_POSIX) == 0)
1501 		return 1;
1502 
1503 	down_read(&cinode->lock_sem);
1504 	posix_test_lock(file, flock);
1505 
1506 	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1507 		flock->fl_type = saved_type;
1508 		rc = 1;
1509 	}
1510 
1511 	up_read(&cinode->lock_sem);
1512 	return rc;
1513 }
1514 
1515 /*
1516  * Set the byte-range lock (posix style). Returns:
1517  * 1) <0, if the error occurs while setting the lock;
1518  * 2) 0, if we set the lock and don't need to request to the server;
1519  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1520  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1521  */
1522 static int
1523 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1524 {
1525 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1526 	int rc = FILE_LOCK_DEFERRED + 1;
1527 
1528 	if ((flock->fl_flags & FL_POSIX) == 0)
1529 		return rc;
1530 
1531 	cifs_down_write(&cinode->lock_sem);
1532 	if (!cinode->can_cache_brlcks) {
1533 		up_write(&cinode->lock_sem);
1534 		return rc;
1535 	}
1536 
1537 	rc = posix_lock_file(file, flock, NULL);
1538 	up_write(&cinode->lock_sem);
1539 	return rc;
1540 }
1541 
1542 int
1543 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1544 {
1545 	unsigned int xid;
1546 	int rc = 0, stored_rc;
1547 	struct cifsLockInfo *li, *tmp;
1548 	struct cifs_tcon *tcon;
1549 	unsigned int num, max_num, max_buf;
1550 	LOCKING_ANDX_RANGE *buf, *cur;
1551 	static const int types[] = {
1552 		LOCKING_ANDX_LARGE_FILES,
1553 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1554 	};
1555 	int i;
1556 
1557 	xid = get_xid();
1558 	tcon = tlink_tcon(cfile->tlink);
1559 
1560 	/*
1561 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1562 	 * and check it before using.
1563 	 */
1564 	max_buf = tcon->ses->server->maxBuf;
1565 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1566 		free_xid(xid);
1567 		return -EINVAL;
1568 	}
1569 
1570 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1571 		     PAGE_SIZE);
1572 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1573 			PAGE_SIZE);
1574 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1575 						sizeof(LOCKING_ANDX_RANGE);
1576 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1577 	if (!buf) {
1578 		free_xid(xid);
1579 		return -ENOMEM;
1580 	}
1581 
1582 	for (i = 0; i < 2; i++) {
1583 		cur = buf;
1584 		num = 0;
1585 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1586 			if (li->type != types[i])
1587 				continue;
1588 			cur->Pid = cpu_to_le16(li->pid);
1589 			cur->LengthLow = cpu_to_le32((u32)li->length);
1590 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1591 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1592 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1593 			if (++num == max_num) {
1594 				stored_rc = cifs_lockv(xid, tcon,
1595 						       cfile->fid.netfid,
1596 						       (__u8)li->type, 0, num,
1597 						       buf);
1598 				if (stored_rc)
1599 					rc = stored_rc;
1600 				cur = buf;
1601 				num = 0;
1602 			} else
1603 				cur++;
1604 		}
1605 
1606 		if (num) {
1607 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1608 					       (__u8)types[i], 0, num, buf);
1609 			if (stored_rc)
1610 				rc = stored_rc;
1611 		}
1612 	}
1613 
1614 	kfree(buf);
1615 	free_xid(xid);
1616 	return rc;
1617 }
1618 
1619 static __u32
1620 hash_lockowner(fl_owner_t owner)
1621 {
1622 	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1623 }
1624 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1625 
1626 struct lock_to_push {
1627 	struct list_head llist;
1628 	__u64 offset;
1629 	__u64 length;
1630 	__u32 pid;
1631 	__u16 netfid;
1632 	__u8 type;
1633 };
1634 
1635 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1636 static int
1637 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1638 {
1639 	struct inode *inode = d_inode(cfile->dentry);
1640 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1641 	struct file_lock *flock;
1642 	struct file_lock_context *flctx = locks_inode_context(inode);
1643 	unsigned int count = 0, i;
1644 	int rc = 0, xid, type;
1645 	struct list_head locks_to_send, *el;
1646 	struct lock_to_push *lck, *tmp;
1647 	__u64 length;
1648 
1649 	xid = get_xid();
1650 
1651 	if (!flctx)
1652 		goto out;
1653 
1654 	spin_lock(&flctx->flc_lock);
1655 	list_for_each(el, &flctx->flc_posix) {
1656 		count++;
1657 	}
1658 	spin_unlock(&flctx->flc_lock);
1659 
1660 	INIT_LIST_HEAD(&locks_to_send);
1661 
1662 	/*
1663 	 * Allocating count locks is enough because no FL_POSIX locks can be
1664 	 * added to the list while we are holding cinode->lock_sem that
1665 	 * protects locking operations of this inode.
1666 	 */
1667 	for (i = 0; i < count; i++) {
1668 		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1669 		if (!lck) {
1670 			rc = -ENOMEM;
1671 			goto err_out;
1672 		}
1673 		list_add_tail(&lck->llist, &locks_to_send);
1674 	}
1675 
1676 	el = locks_to_send.next;
1677 	spin_lock(&flctx->flc_lock);
1678 	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1679 		if (el == &locks_to_send) {
1680 			/*
1681 			 * The list ended. We don't have enough allocated
1682 			 * structures - something is really wrong.
1683 			 */
1684 			cifs_dbg(VFS, "Can't push all brlocks!\n");
1685 			break;
1686 		}
1687 		length = cifs_flock_len(flock);
1688 		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1689 			type = CIFS_RDLCK;
1690 		else
1691 			type = CIFS_WRLCK;
1692 		lck = list_entry(el, struct lock_to_push, llist);
1693 		lck->pid = hash_lockowner(flock->fl_owner);
1694 		lck->netfid = cfile->fid.netfid;
1695 		lck->length = length;
1696 		lck->type = type;
1697 		lck->offset = flock->fl_start;
1698 	}
1699 	spin_unlock(&flctx->flc_lock);
1700 
1701 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1702 		int stored_rc;
1703 
1704 		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1705 					     lck->offset, lck->length, NULL,
1706 					     lck->type, 0);
1707 		if (stored_rc)
1708 			rc = stored_rc;
1709 		list_del(&lck->llist);
1710 		kfree(lck);
1711 	}
1712 
1713 out:
1714 	free_xid(xid);
1715 	return rc;
1716 err_out:
1717 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1718 		list_del(&lck->llist);
1719 		kfree(lck);
1720 	}
1721 	goto out;
1722 }
1723 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1724 
1725 static int
1726 cifs_push_locks(struct cifsFileInfo *cfile)
1727 {
1728 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1729 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1730 	int rc = 0;
1731 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1732 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1733 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1734 
1735 	/* we are going to update can_cache_brlcks here - need a write access */
1736 	cifs_down_write(&cinode->lock_sem);
1737 	if (!cinode->can_cache_brlcks) {
1738 		up_write(&cinode->lock_sem);
1739 		return rc;
1740 	}
1741 
1742 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1743 	if (cap_unix(tcon->ses) &&
1744 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1745 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1746 		rc = cifs_push_posix_locks(cfile);
1747 	else
1748 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1749 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1750 
1751 	cinode->can_cache_brlcks = false;
1752 	up_write(&cinode->lock_sem);
1753 	return rc;
1754 }
1755 
1756 static void
1757 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1758 		bool *wait_flag, struct TCP_Server_Info *server)
1759 {
1760 	if (flock->fl_flags & FL_POSIX)
1761 		cifs_dbg(FYI, "Posix\n");
1762 	if (flock->fl_flags & FL_FLOCK)
1763 		cifs_dbg(FYI, "Flock\n");
1764 	if (flock->fl_flags & FL_SLEEP) {
1765 		cifs_dbg(FYI, "Blocking lock\n");
1766 		*wait_flag = true;
1767 	}
1768 	if (flock->fl_flags & FL_ACCESS)
1769 		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1770 	if (flock->fl_flags & FL_LEASE)
1771 		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1772 	if (flock->fl_flags &
1773 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1774 	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1775 		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1776 
1777 	*type = server->vals->large_lock_type;
1778 	if (flock->fl_type == F_WRLCK) {
1779 		cifs_dbg(FYI, "F_WRLCK\n");
1780 		*type |= server->vals->exclusive_lock_type;
1781 		*lock = 1;
1782 	} else if (flock->fl_type == F_UNLCK) {
1783 		cifs_dbg(FYI, "F_UNLCK\n");
1784 		*type |= server->vals->unlock_lock_type;
1785 		*unlock = 1;
1786 		/* Check if unlock includes more than one lock range */
1787 	} else if (flock->fl_type == F_RDLCK) {
1788 		cifs_dbg(FYI, "F_RDLCK\n");
1789 		*type |= server->vals->shared_lock_type;
1790 		*lock = 1;
1791 	} else if (flock->fl_type == F_EXLCK) {
1792 		cifs_dbg(FYI, "F_EXLCK\n");
1793 		*type |= server->vals->exclusive_lock_type;
1794 		*lock = 1;
1795 	} else if (flock->fl_type == F_SHLCK) {
1796 		cifs_dbg(FYI, "F_SHLCK\n");
1797 		*type |= server->vals->shared_lock_type;
1798 		*lock = 1;
1799 	} else
1800 		cifs_dbg(FYI, "Unknown type of lock\n");
1801 }
1802 
1803 static int
1804 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1805 	   bool wait_flag, bool posix_lck, unsigned int xid)
1806 {
1807 	int rc = 0;
1808 	__u64 length = cifs_flock_len(flock);
1809 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1810 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1811 	struct TCP_Server_Info *server = tcon->ses->server;
1812 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1813 	__u16 netfid = cfile->fid.netfid;
1814 
1815 	if (posix_lck) {
1816 		int posix_lock_type;
1817 
1818 		rc = cifs_posix_lock_test(file, flock);
1819 		if (!rc)
1820 			return rc;
1821 
1822 		if (type & server->vals->shared_lock_type)
1823 			posix_lock_type = CIFS_RDLCK;
1824 		else
1825 			posix_lock_type = CIFS_WRLCK;
1826 		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1827 				      hash_lockowner(flock->fl_owner),
1828 				      flock->fl_start, length, flock,
1829 				      posix_lock_type, wait_flag);
1830 		return rc;
1831 	}
1832 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1833 
1834 	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1835 	if (!rc)
1836 		return rc;
1837 
1838 	/* BB we could chain these into one lock request BB */
1839 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1840 				    1, 0, false);
1841 	if (rc == 0) {
1842 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1843 					    type, 0, 1, false);
1844 		flock->fl_type = F_UNLCK;
1845 		if (rc != 0)
1846 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1847 				 rc);
1848 		return 0;
1849 	}
1850 
1851 	if (type & server->vals->shared_lock_type) {
1852 		flock->fl_type = F_WRLCK;
1853 		return 0;
1854 	}
1855 
1856 	type &= ~server->vals->exclusive_lock_type;
1857 
1858 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1859 				    type | server->vals->shared_lock_type,
1860 				    1, 0, false);
1861 	if (rc == 0) {
1862 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1863 			type | server->vals->shared_lock_type, 0, 1, false);
1864 		flock->fl_type = F_RDLCK;
1865 		if (rc != 0)
1866 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1867 				 rc);
1868 	} else
1869 		flock->fl_type = F_WRLCK;
1870 
1871 	return 0;
1872 }
1873 
1874 void
1875 cifs_move_llist(struct list_head *source, struct list_head *dest)
1876 {
1877 	struct list_head *li, *tmp;
1878 	list_for_each_safe(li, tmp, source)
1879 		list_move(li, dest);
1880 }
1881 
1882 void
1883 cifs_free_llist(struct list_head *llist)
1884 {
1885 	struct cifsLockInfo *li, *tmp;
1886 	list_for_each_entry_safe(li, tmp, llist, llist) {
1887 		cifs_del_lock_waiters(li);
1888 		list_del(&li->llist);
1889 		kfree(li);
1890 	}
1891 }
1892 
1893 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1894 int
1895 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1896 		  unsigned int xid)
1897 {
1898 	int rc = 0, stored_rc;
1899 	static const int types[] = {
1900 		LOCKING_ANDX_LARGE_FILES,
1901 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1902 	};
1903 	unsigned int i;
1904 	unsigned int max_num, num, max_buf;
1905 	LOCKING_ANDX_RANGE *buf, *cur;
1906 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1907 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1908 	struct cifsLockInfo *li, *tmp;
1909 	__u64 length = cifs_flock_len(flock);
1910 	struct list_head tmp_llist;
1911 
1912 	INIT_LIST_HEAD(&tmp_llist);
1913 
1914 	/*
1915 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1916 	 * and check it before using.
1917 	 */
1918 	max_buf = tcon->ses->server->maxBuf;
1919 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1920 		return -EINVAL;
1921 
1922 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1923 		     PAGE_SIZE);
1924 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1925 			PAGE_SIZE);
1926 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1927 						sizeof(LOCKING_ANDX_RANGE);
1928 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1929 	if (!buf)
1930 		return -ENOMEM;
1931 
1932 	cifs_down_write(&cinode->lock_sem);
1933 	for (i = 0; i < 2; i++) {
1934 		cur = buf;
1935 		num = 0;
1936 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1937 			if (flock->fl_start > li->offset ||
1938 			    (flock->fl_start + length) <
1939 			    (li->offset + li->length))
1940 				continue;
1941 			if (current->tgid != li->pid)
1942 				continue;
1943 			if (types[i] != li->type)
1944 				continue;
1945 			if (cinode->can_cache_brlcks) {
1946 				/*
1947 				 * We can cache brlock requests - simply remove
1948 				 * a lock from the file's list.
1949 				 */
1950 				list_del(&li->llist);
1951 				cifs_del_lock_waiters(li);
1952 				kfree(li);
1953 				continue;
1954 			}
1955 			cur->Pid = cpu_to_le16(li->pid);
1956 			cur->LengthLow = cpu_to_le32((u32)li->length);
1957 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1958 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1959 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1960 			/*
1961 			 * We need to save a lock here to let us add it again to
1962 			 * the file's list if the unlock range request fails on
1963 			 * the server.
1964 			 */
1965 			list_move(&li->llist, &tmp_llist);
1966 			if (++num == max_num) {
1967 				stored_rc = cifs_lockv(xid, tcon,
1968 						       cfile->fid.netfid,
1969 						       li->type, num, 0, buf);
1970 				if (stored_rc) {
1971 					/*
1972 					 * We failed on the unlock range
1973 					 * request - add all locks from the tmp
1974 					 * list to the head of the file's list.
1975 					 */
1976 					cifs_move_llist(&tmp_llist,
1977 							&cfile->llist->locks);
1978 					rc = stored_rc;
1979 				} else
1980 					/*
1981 					 * The unlock range request succeed -
1982 					 * free the tmp list.
1983 					 */
1984 					cifs_free_llist(&tmp_llist);
1985 				cur = buf;
1986 				num = 0;
1987 			} else
1988 				cur++;
1989 		}
1990 		if (num) {
1991 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1992 					       types[i], num, 0, buf);
1993 			if (stored_rc) {
1994 				cifs_move_llist(&tmp_llist,
1995 						&cfile->llist->locks);
1996 				rc = stored_rc;
1997 			} else
1998 				cifs_free_llist(&tmp_llist);
1999 		}
2000 	}
2001 
2002 	up_write(&cinode->lock_sem);
2003 	kfree(buf);
2004 	return rc;
2005 }
2006 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2007 
2008 static int
2009 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
2010 	   bool wait_flag, bool posix_lck, int lock, int unlock,
2011 	   unsigned int xid)
2012 {
2013 	int rc = 0;
2014 	__u64 length = cifs_flock_len(flock);
2015 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2016 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2017 	struct TCP_Server_Info *server = tcon->ses->server;
2018 	struct inode *inode = d_inode(cfile->dentry);
2019 
2020 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
2021 	if (posix_lck) {
2022 		int posix_lock_type;
2023 
2024 		rc = cifs_posix_lock_set(file, flock);
2025 		if (rc <= FILE_LOCK_DEFERRED)
2026 			return rc;
2027 
2028 		if (type & server->vals->shared_lock_type)
2029 			posix_lock_type = CIFS_RDLCK;
2030 		else
2031 			posix_lock_type = CIFS_WRLCK;
2032 
2033 		if (unlock == 1)
2034 			posix_lock_type = CIFS_UNLCK;
2035 
2036 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
2037 				      hash_lockowner(flock->fl_owner),
2038 				      flock->fl_start, length,
2039 				      NULL, posix_lock_type, wait_flag);
2040 		goto out;
2041 	}
2042 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2043 	if (lock) {
2044 		struct cifsLockInfo *lock;
2045 
2046 		lock = cifs_lock_init(flock->fl_start, length, type,
2047 				      flock->fl_flags);
2048 		if (!lock)
2049 			return -ENOMEM;
2050 
2051 		rc = cifs_lock_add_if(cfile, lock, wait_flag);
2052 		if (rc < 0) {
2053 			kfree(lock);
2054 			return rc;
2055 		}
2056 		if (!rc)
2057 			goto out;
2058 
2059 		/*
2060 		 * Windows 7 server can delay breaking lease from read to None
2061 		 * if we set a byte-range lock on a file - break it explicitly
2062 		 * before sending the lock to the server to be sure the next
2063 		 * read won't conflict with non-overlapted locks due to
2064 		 * pagereading.
2065 		 */
2066 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
2067 					CIFS_CACHE_READ(CIFS_I(inode))) {
2068 			cifs_zap_mapping(inode);
2069 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
2070 				 inode);
2071 			CIFS_I(inode)->oplock = 0;
2072 		}
2073 
2074 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
2075 					    type, 1, 0, wait_flag);
2076 		if (rc) {
2077 			kfree(lock);
2078 			return rc;
2079 		}
2080 
2081 		cifs_lock_add(cfile, lock);
2082 	} else if (unlock)
2083 		rc = server->ops->mand_unlock_range(cfile, flock, xid);
2084 
2085 out:
2086 	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
2087 		/*
2088 		 * If this is a request to remove all locks because we
2089 		 * are closing the file, it doesn't matter if the
2090 		 * unlocking failed as both cifs.ko and the SMB server
2091 		 * remove the lock on file close
2092 		 */
2093 		if (rc) {
2094 			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2095 			if (!(flock->fl_flags & FL_CLOSE))
2096 				return rc;
2097 		}
2098 		rc = locks_lock_file_wait(file, flock);
2099 	}
2100 	return rc;
2101 }
2102 
2103 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2104 {
2105 	int rc, xid;
2106 	int lock = 0, unlock = 0;
2107 	bool wait_flag = false;
2108 	bool posix_lck = false;
2109 	struct cifs_sb_info *cifs_sb;
2110 	struct cifs_tcon *tcon;
2111 	struct cifsFileInfo *cfile;
2112 	__u32 type;
2113 
2114 	xid = get_xid();
2115 
2116 	if (!(fl->fl_flags & FL_FLOCK)) {
2117 		rc = -ENOLCK;
2118 		free_xid(xid);
2119 		return rc;
2120 	}
2121 
2122 	cfile = (struct cifsFileInfo *)file->private_data;
2123 	tcon = tlink_tcon(cfile->tlink);
2124 
2125 	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2126 			tcon->ses->server);
2127 	cifs_sb = CIFS_FILE_SB(file);
2128 
2129 	if (cap_unix(tcon->ses) &&
2130 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2131 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2132 		posix_lck = true;
2133 
2134 	if (!lock && !unlock) {
2135 		/*
2136 		 * if no lock or unlock then nothing to do since we do not
2137 		 * know what it is
2138 		 */
2139 		rc = -EOPNOTSUPP;
2140 		free_xid(xid);
2141 		return rc;
2142 	}
2143 
2144 	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2145 			xid);
2146 	free_xid(xid);
2147 	return rc;
2148 
2149 
2150 }
2151 
2152 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2153 {
2154 	int rc, xid;
2155 	int lock = 0, unlock = 0;
2156 	bool wait_flag = false;
2157 	bool posix_lck = false;
2158 	struct cifs_sb_info *cifs_sb;
2159 	struct cifs_tcon *tcon;
2160 	struct cifsFileInfo *cfile;
2161 	__u32 type;
2162 
2163 	rc = -EACCES;
2164 	xid = get_xid();
2165 
2166 	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2167 		 flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2168 		 (long long)flock->fl_end);
2169 
2170 	cfile = (struct cifsFileInfo *)file->private_data;
2171 	tcon = tlink_tcon(cfile->tlink);
2172 
2173 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2174 			tcon->ses->server);
2175 	cifs_sb = CIFS_FILE_SB(file);
2176 	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2177 
2178 	if (cap_unix(tcon->ses) &&
2179 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2180 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2181 		posix_lck = true;
2182 	/*
2183 	 * BB add code here to normalize offset and length to account for
2184 	 * negative length which we can not accept over the wire.
2185 	 */
2186 	if (IS_GETLK(cmd)) {
2187 		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2188 		free_xid(xid);
2189 		return rc;
2190 	}
2191 
2192 	if (!lock && !unlock) {
2193 		/*
2194 		 * if no lock or unlock then nothing to do since we do not
2195 		 * know what it is
2196 		 */
2197 		free_xid(xid);
2198 		return -EOPNOTSUPP;
2199 	}
2200 
2201 	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2202 			xid);
2203 	free_xid(xid);
2204 	return rc;
2205 }
2206 
2207 /*
2208  * update the file size (if needed) after a write. Should be called with
2209  * the inode->i_lock held
2210  */
2211 void
2212 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2213 		      unsigned int bytes_written)
2214 {
2215 	loff_t end_of_write = offset + bytes_written;
2216 
2217 	if (end_of_write > cifsi->server_eof)
2218 		cifsi->server_eof = end_of_write;
2219 }
2220 
2221 static ssize_t
2222 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2223 	   size_t write_size, loff_t *offset)
2224 {
2225 	int rc = 0;
2226 	unsigned int bytes_written = 0;
2227 	unsigned int total_written;
2228 	struct cifs_tcon *tcon;
2229 	struct TCP_Server_Info *server;
2230 	unsigned int xid;
2231 	struct dentry *dentry = open_file->dentry;
2232 	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2233 	struct cifs_io_parms io_parms = {0};
2234 
2235 	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2236 		 write_size, *offset, dentry);
2237 
2238 	tcon = tlink_tcon(open_file->tlink);
2239 	server = tcon->ses->server;
2240 
2241 	if (!server->ops->sync_write)
2242 		return -ENOSYS;
2243 
2244 	xid = get_xid();
2245 
2246 	for (total_written = 0; write_size > total_written;
2247 	     total_written += bytes_written) {
2248 		rc = -EAGAIN;
2249 		while (rc == -EAGAIN) {
2250 			struct kvec iov[2];
2251 			unsigned int len;
2252 
2253 			if (open_file->invalidHandle) {
2254 				/* we could deadlock if we called
2255 				   filemap_fdatawait from here so tell
2256 				   reopen_file not to flush data to
2257 				   server now */
2258 				rc = cifs_reopen_file(open_file, false);
2259 				if (rc != 0)
2260 					break;
2261 			}
2262 
2263 			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2264 				  (unsigned int)write_size - total_written);
2265 			/* iov[0] is reserved for smb header */
2266 			iov[1].iov_base = (char *)write_data + total_written;
2267 			iov[1].iov_len = len;
2268 			io_parms.pid = pid;
2269 			io_parms.tcon = tcon;
2270 			io_parms.offset = *offset;
2271 			io_parms.length = len;
2272 			rc = server->ops->sync_write(xid, &open_file->fid,
2273 					&io_parms, &bytes_written, iov, 1);
2274 		}
2275 		if (rc || (bytes_written == 0)) {
2276 			if (total_written)
2277 				break;
2278 			else {
2279 				free_xid(xid);
2280 				return rc;
2281 			}
2282 		} else {
2283 			spin_lock(&d_inode(dentry)->i_lock);
2284 			cifs_update_eof(cifsi, *offset, bytes_written);
2285 			spin_unlock(&d_inode(dentry)->i_lock);
2286 			*offset += bytes_written;
2287 		}
2288 	}
2289 
2290 	cifs_stats_bytes_written(tcon, total_written);
2291 
2292 	if (total_written > 0) {
2293 		spin_lock(&d_inode(dentry)->i_lock);
2294 		if (*offset > d_inode(dentry)->i_size) {
2295 			i_size_write(d_inode(dentry), *offset);
2296 			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2297 		}
2298 		spin_unlock(&d_inode(dentry)->i_lock);
2299 	}
2300 	mark_inode_dirty_sync(d_inode(dentry));
2301 	free_xid(xid);
2302 	return total_written;
2303 }
2304 
2305 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2306 					bool fsuid_only)
2307 {
2308 	struct cifsFileInfo *open_file = NULL;
2309 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2310 
2311 	/* only filter by fsuid on multiuser mounts */
2312 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2313 		fsuid_only = false;
2314 
2315 	spin_lock(&cifs_inode->open_file_lock);
2316 	/* we could simply get the first_list_entry since write-only entries
2317 	   are always at the end of the list but since the first entry might
2318 	   have a close pending, we go through the whole list */
2319 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2320 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2321 			continue;
2322 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2323 			if ((!open_file->invalidHandle)) {
2324 				/* found a good file */
2325 				/* lock it so it will not be closed on us */
2326 				cifsFileInfo_get(open_file);
2327 				spin_unlock(&cifs_inode->open_file_lock);
2328 				return open_file;
2329 			} /* else might as well continue, and look for
2330 			     another, or simply have the caller reopen it
2331 			     again rather than trying to fix this handle */
2332 		} else /* write only file */
2333 			break; /* write only files are last so must be done */
2334 	}
2335 	spin_unlock(&cifs_inode->open_file_lock);
2336 	return NULL;
2337 }
2338 
2339 /* Return -EBADF if no handle is found and general rc otherwise */
2340 int
2341 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2342 		       struct cifsFileInfo **ret_file)
2343 {
2344 	struct cifsFileInfo *open_file, *inv_file = NULL;
2345 	struct cifs_sb_info *cifs_sb;
2346 	bool any_available = false;
2347 	int rc = -EBADF;
2348 	unsigned int refind = 0;
2349 	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2350 	bool with_delete = flags & FIND_WR_WITH_DELETE;
2351 	*ret_file = NULL;
2352 
2353 	/*
2354 	 * Having a null inode here (because mapping->host was set to zero by
2355 	 * the VFS or MM) should not happen but we had reports of on oops (due
2356 	 * to it being zero) during stress testcases so we need to check for it
2357 	 */
2358 
2359 	if (cifs_inode == NULL) {
2360 		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2361 		dump_stack();
2362 		return rc;
2363 	}
2364 
2365 	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2366 
2367 	/* only filter by fsuid on multiuser mounts */
2368 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2369 		fsuid_only = false;
2370 
2371 	spin_lock(&cifs_inode->open_file_lock);
2372 refind_writable:
2373 	if (refind > MAX_REOPEN_ATT) {
2374 		spin_unlock(&cifs_inode->open_file_lock);
2375 		return rc;
2376 	}
2377 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2378 		if (!any_available && open_file->pid != current->tgid)
2379 			continue;
2380 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2381 			continue;
2382 		if (with_delete && !(open_file->fid.access & DELETE))
2383 			continue;
2384 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2385 			if (!open_file->invalidHandle) {
2386 				/* found a good writable file */
2387 				cifsFileInfo_get(open_file);
2388 				spin_unlock(&cifs_inode->open_file_lock);
2389 				*ret_file = open_file;
2390 				return 0;
2391 			} else {
2392 				if (!inv_file)
2393 					inv_file = open_file;
2394 			}
2395 		}
2396 	}
2397 	/* couldn't find useable FH with same pid, try any available */
2398 	if (!any_available) {
2399 		any_available = true;
2400 		goto refind_writable;
2401 	}
2402 
2403 	if (inv_file) {
2404 		any_available = false;
2405 		cifsFileInfo_get(inv_file);
2406 	}
2407 
2408 	spin_unlock(&cifs_inode->open_file_lock);
2409 
2410 	if (inv_file) {
2411 		rc = cifs_reopen_file(inv_file, false);
2412 		if (!rc) {
2413 			*ret_file = inv_file;
2414 			return 0;
2415 		}
2416 
2417 		spin_lock(&cifs_inode->open_file_lock);
2418 		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2419 		spin_unlock(&cifs_inode->open_file_lock);
2420 		cifsFileInfo_put(inv_file);
2421 		++refind;
2422 		inv_file = NULL;
2423 		spin_lock(&cifs_inode->open_file_lock);
2424 		goto refind_writable;
2425 	}
2426 
2427 	return rc;
2428 }
2429 
2430 struct cifsFileInfo *
2431 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2432 {
2433 	struct cifsFileInfo *cfile;
2434 	int rc;
2435 
2436 	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2437 	if (rc)
2438 		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2439 
2440 	return cfile;
2441 }
2442 
2443 int
2444 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2445 		       int flags,
2446 		       struct cifsFileInfo **ret_file)
2447 {
2448 	struct cifsFileInfo *cfile;
2449 	void *page = alloc_dentry_path();
2450 
2451 	*ret_file = NULL;
2452 
2453 	spin_lock(&tcon->open_file_lock);
2454 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2455 		struct cifsInodeInfo *cinode;
2456 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2457 		if (IS_ERR(full_path)) {
2458 			spin_unlock(&tcon->open_file_lock);
2459 			free_dentry_path(page);
2460 			return PTR_ERR(full_path);
2461 		}
2462 		if (strcmp(full_path, name))
2463 			continue;
2464 
2465 		cinode = CIFS_I(d_inode(cfile->dentry));
2466 		spin_unlock(&tcon->open_file_lock);
2467 		free_dentry_path(page);
2468 		return cifs_get_writable_file(cinode, flags, ret_file);
2469 	}
2470 
2471 	spin_unlock(&tcon->open_file_lock);
2472 	free_dentry_path(page);
2473 	return -ENOENT;
2474 }
2475 
2476 int
2477 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2478 		       struct cifsFileInfo **ret_file)
2479 {
2480 	struct cifsFileInfo *cfile;
2481 	void *page = alloc_dentry_path();
2482 
2483 	*ret_file = NULL;
2484 
2485 	spin_lock(&tcon->open_file_lock);
2486 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2487 		struct cifsInodeInfo *cinode;
2488 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2489 		if (IS_ERR(full_path)) {
2490 			spin_unlock(&tcon->open_file_lock);
2491 			free_dentry_path(page);
2492 			return PTR_ERR(full_path);
2493 		}
2494 		if (strcmp(full_path, name))
2495 			continue;
2496 
2497 		cinode = CIFS_I(d_inode(cfile->dentry));
2498 		spin_unlock(&tcon->open_file_lock);
2499 		free_dentry_path(page);
2500 		*ret_file = find_readable_file(cinode, 0);
2501 		return *ret_file ? 0 : -ENOENT;
2502 	}
2503 
2504 	spin_unlock(&tcon->open_file_lock);
2505 	free_dentry_path(page);
2506 	return -ENOENT;
2507 }
2508 
2509 void
2510 cifs_writedata_release(struct kref *refcount)
2511 {
2512 	struct cifs_writedata *wdata = container_of(refcount,
2513 					struct cifs_writedata, refcount);
2514 #ifdef CONFIG_CIFS_SMB_DIRECT
2515 	if (wdata->mr) {
2516 		smbd_deregister_mr(wdata->mr);
2517 		wdata->mr = NULL;
2518 	}
2519 #endif
2520 
2521 	if (wdata->cfile)
2522 		cifsFileInfo_put(wdata->cfile);
2523 
2524 	kfree(wdata);
2525 }
2526 
2527 /*
2528  * Write failed with a retryable error. Resend the write request. It's also
2529  * possible that the page was redirtied so re-clean the page.
2530  */
2531 static void
2532 cifs_writev_requeue(struct cifs_writedata *wdata)
2533 {
2534 	int rc = 0;
2535 	struct inode *inode = d_inode(wdata->cfile->dentry);
2536 	struct TCP_Server_Info *server;
2537 	unsigned int rest_len = wdata->bytes;
2538 	loff_t fpos = wdata->offset;
2539 
2540 	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2541 	do {
2542 		struct cifs_writedata *wdata2;
2543 		unsigned int wsize, cur_len;
2544 
2545 		wsize = server->ops->wp_retry_size(inode);
2546 		if (wsize < rest_len) {
2547 			if (wsize < PAGE_SIZE) {
2548 				rc = -EOPNOTSUPP;
2549 				break;
2550 			}
2551 			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2552 		} else {
2553 			cur_len = rest_len;
2554 		}
2555 
2556 		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2557 		if (!wdata2) {
2558 			rc = -ENOMEM;
2559 			break;
2560 		}
2561 
2562 		wdata2->sync_mode = wdata->sync_mode;
2563 		wdata2->offset	= fpos;
2564 		wdata2->bytes	= cur_len;
2565 		wdata2->iter	= wdata->iter;
2566 
2567 		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2568 		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2569 
2570 		if (iov_iter_is_xarray(&wdata2->iter))
2571 			/* Check for pages having been redirtied and clean
2572 			 * them.  We can do this by walking the xarray.  If
2573 			 * it's not an xarray, then it's a DIO and we shouldn't
2574 			 * be mucking around with the page bits.
2575 			 */
2576 			cifs_undirty_folios(inode, fpos, cur_len);
2577 
2578 		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2579 					    &wdata2->cfile);
2580 		if (!wdata2->cfile) {
2581 			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2582 				 rc);
2583 			if (!is_retryable_error(rc))
2584 				rc = -EBADF;
2585 		} else {
2586 			wdata2->pid = wdata2->cfile->pid;
2587 			rc = server->ops->async_writev(wdata2,
2588 						       cifs_writedata_release);
2589 		}
2590 
2591 		kref_put(&wdata2->refcount, cifs_writedata_release);
2592 		if (rc) {
2593 			if (is_retryable_error(rc))
2594 				continue;
2595 			fpos += cur_len;
2596 			rest_len -= cur_len;
2597 			break;
2598 		}
2599 
2600 		fpos += cur_len;
2601 		rest_len -= cur_len;
2602 	} while (rest_len > 0);
2603 
2604 	/* Clean up remaining pages from the original wdata */
2605 	if (iov_iter_is_xarray(&wdata->iter))
2606 		cifs_pages_write_failed(inode, fpos, rest_len);
2607 
2608 	if (rc != 0 && !is_retryable_error(rc))
2609 		mapping_set_error(inode->i_mapping, rc);
2610 	kref_put(&wdata->refcount, cifs_writedata_release);
2611 }
2612 
2613 void
2614 cifs_writev_complete(struct work_struct *work)
2615 {
2616 	struct cifs_writedata *wdata = container_of(work,
2617 						struct cifs_writedata, work);
2618 	struct inode *inode = d_inode(wdata->cfile->dentry);
2619 
2620 	if (wdata->result == 0) {
2621 		spin_lock(&inode->i_lock);
2622 		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2623 		spin_unlock(&inode->i_lock);
2624 		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2625 					 wdata->bytes);
2626 	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2627 		return cifs_writev_requeue(wdata);
2628 
2629 	if (wdata->result == -EAGAIN)
2630 		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2631 	else if (wdata->result < 0)
2632 		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2633 	else
2634 		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2635 
2636 	if (wdata->result != -EAGAIN)
2637 		mapping_set_error(inode->i_mapping, wdata->result);
2638 	kref_put(&wdata->refcount, cifs_writedata_release);
2639 }
2640 
2641 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2642 {
2643 	struct cifs_writedata *wdata;
2644 
2645 	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2646 	if (wdata != NULL) {
2647 		kref_init(&wdata->refcount);
2648 		INIT_LIST_HEAD(&wdata->list);
2649 		init_completion(&wdata->done);
2650 		INIT_WORK(&wdata->work, complete);
2651 	}
2652 	return wdata;
2653 }
2654 
2655 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2656 {
2657 	struct address_space *mapping = page->mapping;
2658 	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2659 	char *write_data;
2660 	int rc = -EFAULT;
2661 	int bytes_written = 0;
2662 	struct inode *inode;
2663 	struct cifsFileInfo *open_file;
2664 
2665 	if (!mapping || !mapping->host)
2666 		return -EFAULT;
2667 
2668 	inode = page->mapping->host;
2669 
2670 	offset += (loff_t)from;
2671 	write_data = kmap(page);
2672 	write_data += from;
2673 
2674 	if ((to > PAGE_SIZE) || (from > to)) {
2675 		kunmap(page);
2676 		return -EIO;
2677 	}
2678 
2679 	/* racing with truncate? */
2680 	if (offset > mapping->host->i_size) {
2681 		kunmap(page);
2682 		return 0; /* don't care */
2683 	}
2684 
2685 	/* check to make sure that we are not extending the file */
2686 	if (mapping->host->i_size - offset < (loff_t)to)
2687 		to = (unsigned)(mapping->host->i_size - offset);
2688 
2689 	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2690 				    &open_file);
2691 	if (!rc) {
2692 		bytes_written = cifs_write(open_file, open_file->pid,
2693 					   write_data, to - from, &offset);
2694 		cifsFileInfo_put(open_file);
2695 		/* Does mm or vfs already set times? */
2696 		simple_inode_init_ts(inode);
2697 		if ((bytes_written > 0) && (offset))
2698 			rc = 0;
2699 		else if (bytes_written < 0)
2700 			rc = bytes_written;
2701 		else
2702 			rc = -EFAULT;
2703 	} else {
2704 		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2705 		if (!is_retryable_error(rc))
2706 			rc = -EIO;
2707 	}
2708 
2709 	kunmap(page);
2710 	return rc;
2711 }
2712 
2713 /*
2714  * Extend the region to be written back to include subsequent contiguously
2715  * dirty pages if possible, but don't sleep while doing so.
2716  */
2717 static void cifs_extend_writeback(struct address_space *mapping,
2718 				  struct xa_state *xas,
2719 				  long *_count,
2720 				  loff_t start,
2721 				  int max_pages,
2722 				  loff_t max_len,
2723 				  size_t *_len)
2724 {
2725 	struct folio_batch batch;
2726 	struct folio *folio;
2727 	unsigned int nr_pages;
2728 	pgoff_t index = (start + *_len) / PAGE_SIZE;
2729 	size_t len;
2730 	bool stop = true;
2731 	unsigned int i;
2732 
2733 	folio_batch_init(&batch);
2734 
2735 	do {
2736 		/* Firstly, we gather up a batch of contiguous dirty pages
2737 		 * under the RCU read lock - but we can't clear the dirty flags
2738 		 * there if any of those pages are mapped.
2739 		 */
2740 		rcu_read_lock();
2741 
2742 		xas_for_each(xas, folio, ULONG_MAX) {
2743 			stop = true;
2744 			if (xas_retry(xas, folio))
2745 				continue;
2746 			if (xa_is_value(folio))
2747 				break;
2748 			if (folio->index != index) {
2749 				xas_reset(xas);
2750 				break;
2751 			}
2752 
2753 			if (!folio_try_get_rcu(folio)) {
2754 				xas_reset(xas);
2755 				continue;
2756 			}
2757 			nr_pages = folio_nr_pages(folio);
2758 			if (nr_pages > max_pages) {
2759 				xas_reset(xas);
2760 				break;
2761 			}
2762 
2763 			/* Has the page moved or been split? */
2764 			if (unlikely(folio != xas_reload(xas))) {
2765 				folio_put(folio);
2766 				xas_reset(xas);
2767 				break;
2768 			}
2769 
2770 			if (!folio_trylock(folio)) {
2771 				folio_put(folio);
2772 				xas_reset(xas);
2773 				break;
2774 			}
2775 			if (!folio_test_dirty(folio) ||
2776 			    folio_test_writeback(folio)) {
2777 				folio_unlock(folio);
2778 				folio_put(folio);
2779 				xas_reset(xas);
2780 				break;
2781 			}
2782 
2783 			max_pages -= nr_pages;
2784 			len = folio_size(folio);
2785 			stop = false;
2786 
2787 			index += nr_pages;
2788 			*_count -= nr_pages;
2789 			*_len += len;
2790 			if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2791 				stop = true;
2792 
2793 			if (!folio_batch_add(&batch, folio))
2794 				break;
2795 			if (stop)
2796 				break;
2797 		}
2798 
2799 		xas_pause(xas);
2800 		rcu_read_unlock();
2801 
2802 		/* Now, if we obtained any pages, we can shift them to being
2803 		 * writable and mark them for caching.
2804 		 */
2805 		if (!folio_batch_count(&batch))
2806 			break;
2807 
2808 		for (i = 0; i < folio_batch_count(&batch); i++) {
2809 			folio = batch.folios[i];
2810 			/* The folio should be locked, dirty and not undergoing
2811 			 * writeback from the loop above.
2812 			 */
2813 			if (!folio_clear_dirty_for_io(folio))
2814 				WARN_ON(1);
2815 			folio_start_writeback(folio);
2816 			folio_unlock(folio);
2817 		}
2818 
2819 		folio_batch_release(&batch);
2820 		cond_resched();
2821 	} while (!stop);
2822 }
2823 
2824 /*
2825  * Write back the locked page and any subsequent non-locked dirty pages.
2826  */
2827 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2828 						 struct writeback_control *wbc,
2829 						 struct xa_state *xas,
2830 						 struct folio *folio,
2831 						 unsigned long long start,
2832 						 unsigned long long end)
2833 {
2834 	struct inode *inode = mapping->host;
2835 	struct TCP_Server_Info *server;
2836 	struct cifs_writedata *wdata;
2837 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2838 	struct cifs_credits credits_on_stack;
2839 	struct cifs_credits *credits = &credits_on_stack;
2840 	struct cifsFileInfo *cfile = NULL;
2841 	unsigned long long i_size = i_size_read(inode), max_len;
2842 	unsigned int xid, wsize;
2843 	size_t len = folio_size(folio);
2844 	long count = wbc->nr_to_write;
2845 	int rc;
2846 
2847 	/* The folio should be locked, dirty and not undergoing writeback. */
2848 	if (!folio_clear_dirty_for_io(folio))
2849 		WARN_ON_ONCE(1);
2850 	folio_start_writeback(folio);
2851 
2852 	count -= folio_nr_pages(folio);
2853 
2854 	xid = get_xid();
2855 	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2856 
2857 	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2858 	if (rc) {
2859 		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2860 		goto err_xid;
2861 	}
2862 
2863 	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2864 					   &wsize, credits);
2865 	if (rc != 0)
2866 		goto err_close;
2867 
2868 	wdata = cifs_writedata_alloc(cifs_writev_complete);
2869 	if (!wdata) {
2870 		rc = -ENOMEM;
2871 		goto err_uncredit;
2872 	}
2873 
2874 	wdata->sync_mode = wbc->sync_mode;
2875 	wdata->offset = folio_pos(folio);
2876 	wdata->pid = cfile->pid;
2877 	wdata->credits = credits_on_stack;
2878 	wdata->cfile = cfile;
2879 	wdata->server = server;
2880 	cfile = NULL;
2881 
2882 	/* Find all consecutive lockable dirty pages that have contiguous
2883 	 * written regions, stopping when we find a page that is not
2884 	 * immediately lockable, is not dirty or is missing, or we reach the
2885 	 * end of the range.
2886 	 */
2887 	if (start < i_size) {
2888 		/* Trim the write to the EOF; the extra data is ignored.  Also
2889 		 * put an upper limit on the size of a single storedata op.
2890 		 */
2891 		max_len = wsize;
2892 		max_len = min_t(unsigned long long, max_len, end - start + 1);
2893 		max_len = min_t(unsigned long long, max_len, i_size - start);
2894 
2895 		if (len < max_len) {
2896 			int max_pages = INT_MAX;
2897 
2898 #ifdef CONFIG_CIFS_SMB_DIRECT
2899 			if (server->smbd_conn)
2900 				max_pages = server->smbd_conn->max_frmr_depth;
2901 #endif
2902 			max_pages -= folio_nr_pages(folio);
2903 
2904 			if (max_pages > 0)
2905 				cifs_extend_writeback(mapping, xas, &count, start,
2906 						      max_pages, max_len, &len);
2907 		}
2908 	}
2909 	len = min_t(unsigned long long, len, i_size - start);
2910 
2911 	/* We now have a contiguous set of dirty pages, each with writeback
2912 	 * set; the first page is still locked at this point, but all the rest
2913 	 * have been unlocked.
2914 	 */
2915 	folio_unlock(folio);
2916 	wdata->bytes = len;
2917 
2918 	if (start < i_size) {
2919 		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2920 				start, len);
2921 
2922 		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2923 		if (rc)
2924 			goto err_wdata;
2925 
2926 		if (wdata->cfile->invalidHandle)
2927 			rc = -EAGAIN;
2928 		else
2929 			rc = wdata->server->ops->async_writev(wdata,
2930 							      cifs_writedata_release);
2931 		if (rc >= 0) {
2932 			kref_put(&wdata->refcount, cifs_writedata_release);
2933 			goto err_close;
2934 		}
2935 	} else {
2936 		/* The dirty region was entirely beyond the EOF. */
2937 		cifs_pages_written_back(inode, start, len);
2938 		rc = 0;
2939 	}
2940 
2941 err_wdata:
2942 	kref_put(&wdata->refcount, cifs_writedata_release);
2943 err_uncredit:
2944 	add_credits_and_wake_if(server, credits, 0);
2945 err_close:
2946 	if (cfile)
2947 		cifsFileInfo_put(cfile);
2948 err_xid:
2949 	free_xid(xid);
2950 	if (rc == 0) {
2951 		wbc->nr_to_write = count;
2952 		rc = len;
2953 	} else if (is_retryable_error(rc)) {
2954 		cifs_pages_write_redirty(inode, start, len);
2955 	} else {
2956 		cifs_pages_write_failed(inode, start, len);
2957 		mapping_set_error(mapping, rc);
2958 	}
2959 	/* Indication to update ctime and mtime as close is deferred */
2960 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2961 	return rc;
2962 }
2963 
2964 /*
2965  * write a region of pages back to the server
2966  */
2967 static ssize_t cifs_writepages_begin(struct address_space *mapping,
2968 				     struct writeback_control *wbc,
2969 				     struct xa_state *xas,
2970 				     unsigned long long *_start,
2971 				     unsigned long long end)
2972 {
2973 	struct folio *folio;
2974 	unsigned long long start = *_start;
2975 	ssize_t ret;
2976 	int skips = 0;
2977 
2978 search_again:
2979 	/* Find the first dirty page. */
2980 	rcu_read_lock();
2981 
2982 	for (;;) {
2983 		folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2984 		if (xas_retry(xas, folio) || xa_is_value(folio))
2985 			continue;
2986 		if (!folio)
2987 			break;
2988 
2989 		if (!folio_try_get_rcu(folio)) {
2990 			xas_reset(xas);
2991 			continue;
2992 		}
2993 
2994 		if (unlikely(folio != xas_reload(xas))) {
2995 			folio_put(folio);
2996 			xas_reset(xas);
2997 			continue;
2998 		}
2999 
3000 		xas_pause(xas);
3001 		break;
3002 	}
3003 	rcu_read_unlock();
3004 	if (!folio)
3005 		return 0;
3006 
3007 	start = folio_pos(folio); /* May regress with THPs */
3008 
3009 	/* At this point we hold neither the i_pages lock nor the page lock:
3010 	 * the page may be truncated or invalidated (changing page->mapping to
3011 	 * NULL), or even swizzled back from swapper_space to tmpfs file
3012 	 * mapping
3013 	 */
3014 lock_again:
3015 	if (wbc->sync_mode != WB_SYNC_NONE) {
3016 		ret = folio_lock_killable(folio);
3017 		if (ret < 0)
3018 			return ret;
3019 	} else {
3020 		if (!folio_trylock(folio))
3021 			goto search_again;
3022 	}
3023 
3024 	if (folio->mapping != mapping ||
3025 	    !folio_test_dirty(folio)) {
3026 		start += folio_size(folio);
3027 		folio_unlock(folio);
3028 		goto search_again;
3029 	}
3030 
3031 	if (folio_test_writeback(folio) ||
3032 	    folio_test_fscache(folio)) {
3033 		folio_unlock(folio);
3034 		if (wbc->sync_mode != WB_SYNC_NONE) {
3035 			folio_wait_writeback(folio);
3036 #ifdef CONFIG_CIFS_FSCACHE
3037 			folio_wait_fscache(folio);
3038 #endif
3039 			goto lock_again;
3040 		}
3041 
3042 		start += folio_size(folio);
3043 		if (wbc->sync_mode == WB_SYNC_NONE) {
3044 			if (skips >= 5 || need_resched()) {
3045 				ret = 0;
3046 				goto out;
3047 			}
3048 			skips++;
3049 		}
3050 		goto search_again;
3051 	}
3052 
3053 	ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
3054 out:
3055 	if (ret > 0)
3056 		*_start = start + ret;
3057 	return ret;
3058 }
3059 
3060 /*
3061  * Write a region of pages back to the server
3062  */
3063 static int cifs_writepages_region(struct address_space *mapping,
3064 				  struct writeback_control *wbc,
3065 				  unsigned long long *_start,
3066 				  unsigned long long end)
3067 {
3068 	ssize_t ret;
3069 
3070 	XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
3071 
3072 	do {
3073 		ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
3074 		if (ret > 0 && wbc->nr_to_write > 0)
3075 			cond_resched();
3076 	} while (ret > 0 && wbc->nr_to_write > 0);
3077 
3078 	return ret > 0 ? 0 : ret;
3079 }
3080 
3081 /*
3082  * Write some of the pending data back to the server
3083  */
3084 static int cifs_writepages(struct address_space *mapping,
3085 			   struct writeback_control *wbc)
3086 {
3087 	loff_t start, end;
3088 	int ret;
3089 
3090 	/* We have to be careful as we can end up racing with setattr()
3091 	 * truncating the pagecache since the caller doesn't take a lock here
3092 	 * to prevent it.
3093 	 */
3094 
3095 	if (wbc->range_cyclic && mapping->writeback_index) {
3096 		start = mapping->writeback_index * PAGE_SIZE;
3097 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3098 		if (ret < 0)
3099 			goto out;
3100 
3101 		if (wbc->nr_to_write <= 0) {
3102 			mapping->writeback_index = start / PAGE_SIZE;
3103 			goto out;
3104 		}
3105 
3106 		start = 0;
3107 		end = mapping->writeback_index * PAGE_SIZE;
3108 		mapping->writeback_index = 0;
3109 		ret = cifs_writepages_region(mapping, wbc, &start, end);
3110 		if (ret == 0)
3111 			mapping->writeback_index = start / PAGE_SIZE;
3112 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3113 		start = 0;
3114 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3115 		if (wbc->nr_to_write > 0 && ret == 0)
3116 			mapping->writeback_index = start / PAGE_SIZE;
3117 	} else {
3118 		start = wbc->range_start;
3119 		ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3120 	}
3121 
3122 out:
3123 	return ret;
3124 }
3125 
3126 static int
3127 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3128 {
3129 	int rc;
3130 	unsigned int xid;
3131 
3132 	xid = get_xid();
3133 /* BB add check for wbc flags */
3134 	get_page(page);
3135 	if (!PageUptodate(page))
3136 		cifs_dbg(FYI, "ppw - page not up to date\n");
3137 
3138 	/*
3139 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3140 	 *
3141 	 * A writepage() implementation always needs to do either this,
3142 	 * or re-dirty the page with "redirty_page_for_writepage()" in
3143 	 * the case of a failure.
3144 	 *
3145 	 * Just unlocking the page will cause the radix tree tag-bits
3146 	 * to fail to update with the state of the page correctly.
3147 	 */
3148 	set_page_writeback(page);
3149 retry_write:
3150 	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3151 	if (is_retryable_error(rc)) {
3152 		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3153 			goto retry_write;
3154 		redirty_page_for_writepage(wbc, page);
3155 	} else if (rc != 0) {
3156 		SetPageError(page);
3157 		mapping_set_error(page->mapping, rc);
3158 	} else {
3159 		SetPageUptodate(page);
3160 	}
3161 	end_page_writeback(page);
3162 	put_page(page);
3163 	free_xid(xid);
3164 	return rc;
3165 }
3166 
3167 static int cifs_write_end(struct file *file, struct address_space *mapping,
3168 			loff_t pos, unsigned len, unsigned copied,
3169 			struct page *page, void *fsdata)
3170 {
3171 	int rc;
3172 	struct inode *inode = mapping->host;
3173 	struct cifsFileInfo *cfile = file->private_data;
3174 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3175 	struct folio *folio = page_folio(page);
3176 	__u32 pid;
3177 
3178 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3179 		pid = cfile->pid;
3180 	else
3181 		pid = current->tgid;
3182 
3183 	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3184 		 page, pos, copied);
3185 
3186 	if (folio_test_checked(folio)) {
3187 		if (copied == len)
3188 			folio_mark_uptodate(folio);
3189 		folio_clear_checked(folio);
3190 	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3191 		folio_mark_uptodate(folio);
3192 
3193 	if (!folio_test_uptodate(folio)) {
3194 		char *page_data;
3195 		unsigned offset = pos & (PAGE_SIZE - 1);
3196 		unsigned int xid;
3197 
3198 		xid = get_xid();
3199 		/* this is probably better than directly calling
3200 		   partialpage_write since in this function the file handle is
3201 		   known which we might as well	leverage */
3202 		/* BB check if anything else missing out of ppw
3203 		   such as updating last write time */
3204 		page_data = kmap(page);
3205 		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3206 		/* if (rc < 0) should we set writebehind rc? */
3207 		kunmap(page);
3208 
3209 		free_xid(xid);
3210 	} else {
3211 		rc = copied;
3212 		pos += copied;
3213 		set_page_dirty(page);
3214 	}
3215 
3216 	if (rc > 0) {
3217 		spin_lock(&inode->i_lock);
3218 		if (pos > inode->i_size) {
3219 			loff_t additional_blocks = (512 - 1 + copied) >> 9;
3220 
3221 			i_size_write(inode, pos);
3222 			/*
3223 			 * Estimate new allocation size based on the amount written.
3224 			 * This will be updated from server on close (and on queryinfo)
3225 			 */
3226 			inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9,
3227 						inode->i_blocks + additional_blocks);
3228 		}
3229 		spin_unlock(&inode->i_lock);
3230 	}
3231 
3232 	unlock_page(page);
3233 	put_page(page);
3234 	/* Indication to update ctime and mtime as close is deferred */
3235 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3236 
3237 	return rc;
3238 }
3239 
3240 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3241 		      int datasync)
3242 {
3243 	unsigned int xid;
3244 	int rc = 0;
3245 	struct cifs_tcon *tcon;
3246 	struct TCP_Server_Info *server;
3247 	struct cifsFileInfo *smbfile = file->private_data;
3248 	struct inode *inode = file_inode(file);
3249 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3250 
3251 	rc = file_write_and_wait_range(file, start, end);
3252 	if (rc) {
3253 		trace_cifs_fsync_err(inode->i_ino, rc);
3254 		return rc;
3255 	}
3256 
3257 	xid = get_xid();
3258 
3259 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3260 		 file, datasync);
3261 
3262 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3263 		rc = cifs_zap_mapping(inode);
3264 		if (rc) {
3265 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3266 			rc = 0; /* don't care about it in fsync */
3267 		}
3268 	}
3269 
3270 	tcon = tlink_tcon(smbfile->tlink);
3271 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3272 		server = tcon->ses->server;
3273 		if (server->ops->flush == NULL) {
3274 			rc = -ENOSYS;
3275 			goto strict_fsync_exit;
3276 		}
3277 
3278 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3279 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3280 			if (smbfile) {
3281 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3282 				cifsFileInfo_put(smbfile);
3283 			} else
3284 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3285 		} else
3286 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3287 	}
3288 
3289 strict_fsync_exit:
3290 	free_xid(xid);
3291 	return rc;
3292 }
3293 
3294 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3295 {
3296 	unsigned int xid;
3297 	int rc = 0;
3298 	struct cifs_tcon *tcon;
3299 	struct TCP_Server_Info *server;
3300 	struct cifsFileInfo *smbfile = file->private_data;
3301 	struct inode *inode = file_inode(file);
3302 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3303 
3304 	rc = file_write_and_wait_range(file, start, end);
3305 	if (rc) {
3306 		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3307 		return rc;
3308 	}
3309 
3310 	xid = get_xid();
3311 
3312 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3313 		 file, datasync);
3314 
3315 	tcon = tlink_tcon(smbfile->tlink);
3316 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3317 		server = tcon->ses->server;
3318 		if (server->ops->flush == NULL) {
3319 			rc = -ENOSYS;
3320 			goto fsync_exit;
3321 		}
3322 
3323 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3324 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3325 			if (smbfile) {
3326 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3327 				cifsFileInfo_put(smbfile);
3328 			} else
3329 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3330 		} else
3331 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3332 	}
3333 
3334 fsync_exit:
3335 	free_xid(xid);
3336 	return rc;
3337 }
3338 
3339 /*
3340  * As file closes, flush all cached write data for this inode checking
3341  * for write behind errors.
3342  */
3343 int cifs_flush(struct file *file, fl_owner_t id)
3344 {
3345 	struct inode *inode = file_inode(file);
3346 	int rc = 0;
3347 
3348 	if (file->f_mode & FMODE_WRITE)
3349 		rc = filemap_write_and_wait(inode->i_mapping);
3350 
3351 	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3352 	if (rc) {
3353 		/* get more nuanced writeback errors */
3354 		rc = filemap_check_wb_err(file->f_mapping, 0);
3355 		trace_cifs_flush_err(inode->i_ino, rc);
3356 	}
3357 	return rc;
3358 }
3359 
3360 static void
3361 cifs_uncached_writedata_release(struct kref *refcount)
3362 {
3363 	struct cifs_writedata *wdata = container_of(refcount,
3364 					struct cifs_writedata, refcount);
3365 
3366 	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3367 	cifs_writedata_release(refcount);
3368 }
3369 
3370 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3371 
3372 static void
3373 cifs_uncached_writev_complete(struct work_struct *work)
3374 {
3375 	struct cifs_writedata *wdata = container_of(work,
3376 					struct cifs_writedata, work);
3377 	struct inode *inode = d_inode(wdata->cfile->dentry);
3378 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3379 
3380 	spin_lock(&inode->i_lock);
3381 	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3382 	if (cifsi->server_eof > inode->i_size)
3383 		i_size_write(inode, cifsi->server_eof);
3384 	spin_unlock(&inode->i_lock);
3385 
3386 	complete(&wdata->done);
3387 	collect_uncached_write_data(wdata->ctx);
3388 	/* the below call can possibly free the last ref to aio ctx */
3389 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3390 }
3391 
3392 static int
3393 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3394 	struct cifs_aio_ctx *ctx)
3395 {
3396 	unsigned int wsize;
3397 	struct cifs_credits credits;
3398 	int rc;
3399 	struct TCP_Server_Info *server = wdata->server;
3400 
3401 	do {
3402 		if (wdata->cfile->invalidHandle) {
3403 			rc = cifs_reopen_file(wdata->cfile, false);
3404 			if (rc == -EAGAIN)
3405 				continue;
3406 			else if (rc)
3407 				break;
3408 		}
3409 
3410 
3411 		/*
3412 		 * Wait for credits to resend this wdata.
3413 		 * Note: we are attempting to resend the whole wdata not in
3414 		 * segments
3415 		 */
3416 		do {
3417 			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3418 						&wsize, &credits);
3419 			if (rc)
3420 				goto fail;
3421 
3422 			if (wsize < wdata->bytes) {
3423 				add_credits_and_wake_if(server, &credits, 0);
3424 				msleep(1000);
3425 			}
3426 		} while (wsize < wdata->bytes);
3427 		wdata->credits = credits;
3428 
3429 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3430 
3431 		if (!rc) {
3432 			if (wdata->cfile->invalidHandle)
3433 				rc = -EAGAIN;
3434 			else {
3435 				wdata->replay = true;
3436 #ifdef CONFIG_CIFS_SMB_DIRECT
3437 				if (wdata->mr) {
3438 					wdata->mr->need_invalidate = true;
3439 					smbd_deregister_mr(wdata->mr);
3440 					wdata->mr = NULL;
3441 				}
3442 #endif
3443 				rc = server->ops->async_writev(wdata,
3444 					cifs_uncached_writedata_release);
3445 			}
3446 		}
3447 
3448 		/* If the write was successfully sent, we are done */
3449 		if (!rc) {
3450 			list_add_tail(&wdata->list, wdata_list);
3451 			return 0;
3452 		}
3453 
3454 		/* Roll back credits and retry if needed */
3455 		add_credits_and_wake_if(server, &wdata->credits, 0);
3456 	} while (rc == -EAGAIN);
3457 
3458 fail:
3459 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3460 	return rc;
3461 }
3462 
3463 /*
3464  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3465  * size and maximum number of segments.
3466  */
3467 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3468 				     size_t max_segs, unsigned int *_nsegs)
3469 {
3470 	const struct bio_vec *bvecs = iter->bvec;
3471 	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3472 	size_t len, span = 0, n = iter->count;
3473 	size_t skip = iter->iov_offset;
3474 
3475 	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3476 		return 0;
3477 
3478 	while (n && ix < nbv && skip) {
3479 		len = bvecs[ix].bv_len;
3480 		if (skip < len)
3481 			break;
3482 		skip -= len;
3483 		n -= len;
3484 		ix++;
3485 	}
3486 
3487 	while (n && ix < nbv) {
3488 		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3489 		span += len;
3490 		max_size -= len;
3491 		nsegs++;
3492 		ix++;
3493 		if (max_size == 0 || nsegs >= max_segs)
3494 			break;
3495 		skip = 0;
3496 		n -= len;
3497 	}
3498 
3499 	*_nsegs = nsegs;
3500 	return span;
3501 }
3502 
3503 static int
3504 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3505 		     struct cifsFileInfo *open_file,
3506 		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3507 		     struct cifs_aio_ctx *ctx)
3508 {
3509 	int rc = 0;
3510 	size_t cur_len, max_len;
3511 	struct cifs_writedata *wdata;
3512 	pid_t pid;
3513 	struct TCP_Server_Info *server;
3514 	unsigned int xid, max_segs = INT_MAX;
3515 
3516 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3517 		pid = open_file->pid;
3518 	else
3519 		pid = current->tgid;
3520 
3521 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3522 	xid = get_xid();
3523 
3524 #ifdef CONFIG_CIFS_SMB_DIRECT
3525 	if (server->smbd_conn)
3526 		max_segs = server->smbd_conn->max_frmr_depth;
3527 #endif
3528 
3529 	do {
3530 		struct cifs_credits credits_on_stack;
3531 		struct cifs_credits *credits = &credits_on_stack;
3532 		unsigned int wsize, nsegs = 0;
3533 
3534 		if (signal_pending(current)) {
3535 			rc = -EINTR;
3536 			break;
3537 		}
3538 
3539 		if (open_file->invalidHandle) {
3540 			rc = cifs_reopen_file(open_file, false);
3541 			if (rc == -EAGAIN)
3542 				continue;
3543 			else if (rc)
3544 				break;
3545 		}
3546 
3547 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3548 						   &wsize, credits);
3549 		if (rc)
3550 			break;
3551 
3552 		max_len = min_t(const size_t, len, wsize);
3553 		if (!max_len) {
3554 			rc = -EAGAIN;
3555 			add_credits_and_wake_if(server, credits, 0);
3556 			break;
3557 		}
3558 
3559 		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3560 		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3561 			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3562 		if (cur_len == 0) {
3563 			rc = -EIO;
3564 			add_credits_and_wake_if(server, credits, 0);
3565 			break;
3566 		}
3567 
3568 		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3569 		if (!wdata) {
3570 			rc = -ENOMEM;
3571 			add_credits_and_wake_if(server, credits, 0);
3572 			break;
3573 		}
3574 
3575 		wdata->sync_mode = WB_SYNC_ALL;
3576 		wdata->offset	= (__u64)fpos;
3577 		wdata->cfile	= cifsFileInfo_get(open_file);
3578 		wdata->server	= server;
3579 		wdata->pid	= pid;
3580 		wdata->bytes	= cur_len;
3581 		wdata->credits	= credits_on_stack;
3582 		wdata->iter	= *from;
3583 		wdata->ctx	= ctx;
3584 		kref_get(&ctx->refcount);
3585 
3586 		iov_iter_truncate(&wdata->iter, cur_len);
3587 
3588 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3589 
3590 		if (!rc) {
3591 			if (wdata->cfile->invalidHandle)
3592 				rc = -EAGAIN;
3593 			else
3594 				rc = server->ops->async_writev(wdata,
3595 					cifs_uncached_writedata_release);
3596 		}
3597 
3598 		if (rc) {
3599 			add_credits_and_wake_if(server, &wdata->credits, 0);
3600 			kref_put(&wdata->refcount,
3601 				 cifs_uncached_writedata_release);
3602 			if (rc == -EAGAIN)
3603 				continue;
3604 			break;
3605 		}
3606 
3607 		list_add_tail(&wdata->list, wdata_list);
3608 		iov_iter_advance(from, cur_len);
3609 		fpos += cur_len;
3610 		len -= cur_len;
3611 	} while (len > 0);
3612 
3613 	free_xid(xid);
3614 	return rc;
3615 }
3616 
3617 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3618 {
3619 	struct cifs_writedata *wdata, *tmp;
3620 	struct cifs_tcon *tcon;
3621 	struct cifs_sb_info *cifs_sb;
3622 	struct dentry *dentry = ctx->cfile->dentry;
3623 	ssize_t rc;
3624 
3625 	tcon = tlink_tcon(ctx->cfile->tlink);
3626 	cifs_sb = CIFS_SB(dentry->d_sb);
3627 
3628 	mutex_lock(&ctx->aio_mutex);
3629 
3630 	if (list_empty(&ctx->list)) {
3631 		mutex_unlock(&ctx->aio_mutex);
3632 		return;
3633 	}
3634 
3635 	rc = ctx->rc;
3636 	/*
3637 	 * Wait for and collect replies for any successful sends in order of
3638 	 * increasing offset. Once an error is hit, then return without waiting
3639 	 * for any more replies.
3640 	 */
3641 restart_loop:
3642 	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3643 		if (!rc) {
3644 			if (!try_wait_for_completion(&wdata->done)) {
3645 				mutex_unlock(&ctx->aio_mutex);
3646 				return;
3647 			}
3648 
3649 			if (wdata->result)
3650 				rc = wdata->result;
3651 			else
3652 				ctx->total_len += wdata->bytes;
3653 
3654 			/* resend call if it's a retryable error */
3655 			if (rc == -EAGAIN) {
3656 				struct list_head tmp_list;
3657 				struct iov_iter tmp_from = ctx->iter;
3658 
3659 				INIT_LIST_HEAD(&tmp_list);
3660 				list_del_init(&wdata->list);
3661 
3662 				if (ctx->direct_io)
3663 					rc = cifs_resend_wdata(
3664 						wdata, &tmp_list, ctx);
3665 				else {
3666 					iov_iter_advance(&tmp_from,
3667 						 wdata->offset - ctx->pos);
3668 
3669 					rc = cifs_write_from_iter(wdata->offset,
3670 						wdata->bytes, &tmp_from,
3671 						ctx->cfile, cifs_sb, &tmp_list,
3672 						ctx);
3673 
3674 					kref_put(&wdata->refcount,
3675 						cifs_uncached_writedata_release);
3676 				}
3677 
3678 				list_splice(&tmp_list, &ctx->list);
3679 				goto restart_loop;
3680 			}
3681 		}
3682 		list_del_init(&wdata->list);
3683 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3684 	}
3685 
3686 	cifs_stats_bytes_written(tcon, ctx->total_len);
3687 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3688 
3689 	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3690 
3691 	mutex_unlock(&ctx->aio_mutex);
3692 
3693 	if (ctx->iocb && ctx->iocb->ki_complete)
3694 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3695 	else
3696 		complete(&ctx->done);
3697 }
3698 
3699 static ssize_t __cifs_writev(
3700 	struct kiocb *iocb, struct iov_iter *from, bool direct)
3701 {
3702 	struct file *file = iocb->ki_filp;
3703 	ssize_t total_written = 0;
3704 	struct cifsFileInfo *cfile;
3705 	struct cifs_tcon *tcon;
3706 	struct cifs_sb_info *cifs_sb;
3707 	struct cifs_aio_ctx *ctx;
3708 	int rc;
3709 
3710 	rc = generic_write_checks(iocb, from);
3711 	if (rc <= 0)
3712 		return rc;
3713 
3714 	cifs_sb = CIFS_FILE_SB(file);
3715 	cfile = file->private_data;
3716 	tcon = tlink_tcon(cfile->tlink);
3717 
3718 	if (!tcon->ses->server->ops->async_writev)
3719 		return -ENOSYS;
3720 
3721 	ctx = cifs_aio_ctx_alloc();
3722 	if (!ctx)
3723 		return -ENOMEM;
3724 
3725 	ctx->cfile = cifsFileInfo_get(cfile);
3726 
3727 	if (!is_sync_kiocb(iocb))
3728 		ctx->iocb = iocb;
3729 
3730 	ctx->pos = iocb->ki_pos;
3731 	ctx->direct_io = direct;
3732 	ctx->nr_pinned_pages = 0;
3733 
3734 	if (user_backed_iter(from)) {
3735 		/*
3736 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3737 		 * they contain references to the calling process's virtual
3738 		 * memory layout which won't be available in an async worker
3739 		 * thread.  This also takes a pin on every folio involved.
3740 		 */
3741 		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3742 					     &ctx->iter, 0);
3743 		if (rc < 0) {
3744 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3745 			return rc;
3746 		}
3747 
3748 		ctx->nr_pinned_pages = rc;
3749 		ctx->bv = (void *)ctx->iter.bvec;
3750 		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3751 	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3752 		   !is_sync_kiocb(iocb)) {
3753 		/*
3754 		 * If the op is asynchronous, we need to copy the list attached
3755 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3756 		 * will be pinned by the caller; in any case, we may or may not
3757 		 * be able to pin the pages, so we don't try.
3758 		 */
3759 		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3760 		if (!ctx->bv) {
3761 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3762 			return -ENOMEM;
3763 		}
3764 	} else {
3765 		/*
3766 		 * Otherwise, we just pass the iterator down as-is and rely on
3767 		 * the caller to make sure the pages referred to by the
3768 		 * iterator don't evaporate.
3769 		 */
3770 		ctx->iter = *from;
3771 	}
3772 
3773 	ctx->len = iov_iter_count(&ctx->iter);
3774 
3775 	/* grab a lock here due to read response handlers can access ctx */
3776 	mutex_lock(&ctx->aio_mutex);
3777 
3778 	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3779 				  cfile, cifs_sb, &ctx->list, ctx);
3780 
3781 	/*
3782 	 * If at least one write was successfully sent, then discard any rc
3783 	 * value from the later writes. If the other write succeeds, then
3784 	 * we'll end up returning whatever was written. If it fails, then
3785 	 * we'll get a new rc value from that.
3786 	 */
3787 	if (!list_empty(&ctx->list))
3788 		rc = 0;
3789 
3790 	mutex_unlock(&ctx->aio_mutex);
3791 
3792 	if (rc) {
3793 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3794 		return rc;
3795 	}
3796 
3797 	if (!is_sync_kiocb(iocb)) {
3798 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3799 		return -EIOCBQUEUED;
3800 	}
3801 
3802 	rc = wait_for_completion_killable(&ctx->done);
3803 	if (rc) {
3804 		mutex_lock(&ctx->aio_mutex);
3805 		ctx->rc = rc = -EINTR;
3806 		total_written = ctx->total_len;
3807 		mutex_unlock(&ctx->aio_mutex);
3808 	} else {
3809 		rc = ctx->rc;
3810 		total_written = ctx->total_len;
3811 	}
3812 
3813 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3814 
3815 	if (unlikely(!total_written))
3816 		return rc;
3817 
3818 	iocb->ki_pos += total_written;
3819 	return total_written;
3820 }
3821 
3822 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3823 {
3824 	struct file *file = iocb->ki_filp;
3825 
3826 	cifs_revalidate_mapping(file->f_inode);
3827 	return __cifs_writev(iocb, from, true);
3828 }
3829 
3830 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3831 {
3832 	return __cifs_writev(iocb, from, false);
3833 }
3834 
3835 static ssize_t
3836 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3837 {
3838 	struct file *file = iocb->ki_filp;
3839 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3840 	struct inode *inode = file->f_mapping->host;
3841 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3842 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3843 	ssize_t rc;
3844 
3845 	inode_lock(inode);
3846 	/*
3847 	 * We need to hold the sem to be sure nobody modifies lock list
3848 	 * with a brlock that prevents writing.
3849 	 */
3850 	down_read(&cinode->lock_sem);
3851 
3852 	rc = generic_write_checks(iocb, from);
3853 	if (rc <= 0)
3854 		goto out;
3855 
3856 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3857 				     server->vals->exclusive_lock_type, 0,
3858 				     NULL, CIFS_WRITE_OP))
3859 		rc = __generic_file_write_iter(iocb, from);
3860 	else
3861 		rc = -EACCES;
3862 out:
3863 	up_read(&cinode->lock_sem);
3864 	inode_unlock(inode);
3865 
3866 	if (rc > 0)
3867 		rc = generic_write_sync(iocb, rc);
3868 	return rc;
3869 }
3870 
3871 ssize_t
3872 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3873 {
3874 	struct inode *inode = file_inode(iocb->ki_filp);
3875 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3876 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3877 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3878 						iocb->ki_filp->private_data;
3879 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3880 	ssize_t written;
3881 
3882 	written = cifs_get_writer(cinode);
3883 	if (written)
3884 		return written;
3885 
3886 	if (CIFS_CACHE_WRITE(cinode)) {
3887 		if (cap_unix(tcon->ses) &&
3888 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3889 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3890 			written = generic_file_write_iter(iocb, from);
3891 			goto out;
3892 		}
3893 		written = cifs_writev(iocb, from);
3894 		goto out;
3895 	}
3896 	/*
3897 	 * For non-oplocked files in strict cache mode we need to write the data
3898 	 * to the server exactly from the pos to pos+len-1 rather than flush all
3899 	 * affected pages because it may cause a error with mandatory locks on
3900 	 * these pages but not on the region from pos to ppos+len-1.
3901 	 */
3902 	written = cifs_user_writev(iocb, from);
3903 	if (CIFS_CACHE_READ(cinode)) {
3904 		/*
3905 		 * We have read level caching and we have just sent a write
3906 		 * request to the server thus making data in the cache stale.
3907 		 * Zap the cache and set oplock/lease level to NONE to avoid
3908 		 * reading stale data from the cache. All subsequent read
3909 		 * operations will read new data from the server.
3910 		 */
3911 		cifs_zap_mapping(inode);
3912 		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3913 			 inode);
3914 		cinode->oplock = 0;
3915 	}
3916 out:
3917 	cifs_put_writer(cinode);
3918 	return written;
3919 }
3920 
3921 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3922 {
3923 	struct cifs_readdata *rdata;
3924 
3925 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3926 	if (rdata) {
3927 		kref_init(&rdata->refcount);
3928 		INIT_LIST_HEAD(&rdata->list);
3929 		init_completion(&rdata->done);
3930 		INIT_WORK(&rdata->work, complete);
3931 	}
3932 
3933 	return rdata;
3934 }
3935 
3936 void
3937 cifs_readdata_release(struct kref *refcount)
3938 {
3939 	struct cifs_readdata *rdata = container_of(refcount,
3940 					struct cifs_readdata, refcount);
3941 
3942 	if (rdata->ctx)
3943 		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3944 #ifdef CONFIG_CIFS_SMB_DIRECT
3945 	if (rdata->mr) {
3946 		smbd_deregister_mr(rdata->mr);
3947 		rdata->mr = NULL;
3948 	}
3949 #endif
3950 	if (rdata->cfile)
3951 		cifsFileInfo_put(rdata->cfile);
3952 
3953 	kfree(rdata);
3954 }
3955 
3956 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3957 
3958 static void
3959 cifs_uncached_readv_complete(struct work_struct *work)
3960 {
3961 	struct cifs_readdata *rdata = container_of(work,
3962 						struct cifs_readdata, work);
3963 
3964 	complete(&rdata->done);
3965 	collect_uncached_read_data(rdata->ctx);
3966 	/* the below call can possibly free the last ref to aio ctx */
3967 	kref_put(&rdata->refcount, cifs_readdata_release);
3968 }
3969 
3970 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3971 			struct list_head *rdata_list,
3972 			struct cifs_aio_ctx *ctx)
3973 {
3974 	unsigned int rsize;
3975 	struct cifs_credits credits;
3976 	int rc;
3977 	struct TCP_Server_Info *server;
3978 
3979 	/* XXX: should we pick a new channel here? */
3980 	server = rdata->server;
3981 
3982 	do {
3983 		if (rdata->cfile->invalidHandle) {
3984 			rc = cifs_reopen_file(rdata->cfile, true);
3985 			if (rc == -EAGAIN)
3986 				continue;
3987 			else if (rc)
3988 				break;
3989 		}
3990 
3991 		/*
3992 		 * Wait for credits to resend this rdata.
3993 		 * Note: we are attempting to resend the whole rdata not in
3994 		 * segments
3995 		 */
3996 		do {
3997 			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3998 						&rsize, &credits);
3999 
4000 			if (rc)
4001 				goto fail;
4002 
4003 			if (rsize < rdata->bytes) {
4004 				add_credits_and_wake_if(server, &credits, 0);
4005 				msleep(1000);
4006 			}
4007 		} while (rsize < rdata->bytes);
4008 		rdata->credits = credits;
4009 
4010 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4011 		if (!rc) {
4012 			if (rdata->cfile->invalidHandle)
4013 				rc = -EAGAIN;
4014 			else {
4015 #ifdef CONFIG_CIFS_SMB_DIRECT
4016 				if (rdata->mr) {
4017 					rdata->mr->need_invalidate = true;
4018 					smbd_deregister_mr(rdata->mr);
4019 					rdata->mr = NULL;
4020 				}
4021 #endif
4022 				rc = server->ops->async_readv(rdata);
4023 			}
4024 		}
4025 
4026 		/* If the read was successfully sent, we are done */
4027 		if (!rc) {
4028 			/* Add to aio pending list */
4029 			list_add_tail(&rdata->list, rdata_list);
4030 			return 0;
4031 		}
4032 
4033 		/* Roll back credits and retry if needed */
4034 		add_credits_and_wake_if(server, &rdata->credits, 0);
4035 	} while (rc == -EAGAIN);
4036 
4037 fail:
4038 	kref_put(&rdata->refcount, cifs_readdata_release);
4039 	return rc;
4040 }
4041 
4042 static int
4043 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
4044 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4045 		     struct cifs_aio_ctx *ctx)
4046 {
4047 	struct cifs_readdata *rdata;
4048 	unsigned int rsize, nsegs, max_segs = INT_MAX;
4049 	struct cifs_credits credits_on_stack;
4050 	struct cifs_credits *credits = &credits_on_stack;
4051 	size_t cur_len, max_len;
4052 	int rc;
4053 	pid_t pid;
4054 	struct TCP_Server_Info *server;
4055 
4056 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4057 
4058 #ifdef CONFIG_CIFS_SMB_DIRECT
4059 	if (server->smbd_conn)
4060 		max_segs = server->smbd_conn->max_frmr_depth;
4061 #endif
4062 
4063 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4064 		pid = open_file->pid;
4065 	else
4066 		pid = current->tgid;
4067 
4068 	do {
4069 		if (open_file->invalidHandle) {
4070 			rc = cifs_reopen_file(open_file, true);
4071 			if (rc == -EAGAIN)
4072 				continue;
4073 			else if (rc)
4074 				break;
4075 		}
4076 
4077 		if (cifs_sb->ctx->rsize == 0)
4078 			cifs_sb->ctx->rsize =
4079 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4080 							     cifs_sb->ctx);
4081 
4082 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4083 						   &rsize, credits);
4084 		if (rc)
4085 			break;
4086 
4087 		max_len = min_t(size_t, len, rsize);
4088 
4089 		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4090 						 max_segs, &nsegs);
4091 		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4092 			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4093 		if (cur_len == 0) {
4094 			rc = -EIO;
4095 			add_credits_and_wake_if(server, credits, 0);
4096 			break;
4097 		}
4098 
4099 		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4100 		if (!rdata) {
4101 			add_credits_and_wake_if(server, credits, 0);
4102 			rc = -ENOMEM;
4103 			break;
4104 		}
4105 
4106 		rdata->server	= server;
4107 		rdata->cfile	= cifsFileInfo_get(open_file);
4108 		rdata->offset	= fpos;
4109 		rdata->bytes	= cur_len;
4110 		rdata->pid	= pid;
4111 		rdata->credits	= credits_on_stack;
4112 		rdata->ctx	= ctx;
4113 		kref_get(&ctx->refcount);
4114 
4115 		rdata->iter	= ctx->iter;
4116 		iov_iter_truncate(&rdata->iter, cur_len);
4117 
4118 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4119 
4120 		if (!rc) {
4121 			if (rdata->cfile->invalidHandle)
4122 				rc = -EAGAIN;
4123 			else
4124 				rc = server->ops->async_readv(rdata);
4125 		}
4126 
4127 		if (rc) {
4128 			add_credits_and_wake_if(server, &rdata->credits, 0);
4129 			kref_put(&rdata->refcount, cifs_readdata_release);
4130 			if (rc == -EAGAIN)
4131 				continue;
4132 			break;
4133 		}
4134 
4135 		list_add_tail(&rdata->list, rdata_list);
4136 		iov_iter_advance(&ctx->iter, cur_len);
4137 		fpos += cur_len;
4138 		len -= cur_len;
4139 	} while (len > 0);
4140 
4141 	return rc;
4142 }
4143 
4144 static void
4145 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4146 {
4147 	struct cifs_readdata *rdata, *tmp;
4148 	struct cifs_sb_info *cifs_sb;
4149 	int rc;
4150 
4151 	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4152 
4153 	mutex_lock(&ctx->aio_mutex);
4154 
4155 	if (list_empty(&ctx->list)) {
4156 		mutex_unlock(&ctx->aio_mutex);
4157 		return;
4158 	}
4159 
4160 	rc = ctx->rc;
4161 	/* the loop below should proceed in the order of increasing offsets */
4162 again:
4163 	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4164 		if (!rc) {
4165 			if (!try_wait_for_completion(&rdata->done)) {
4166 				mutex_unlock(&ctx->aio_mutex);
4167 				return;
4168 			}
4169 
4170 			if (rdata->result == -EAGAIN) {
4171 				/* resend call if it's a retryable error */
4172 				struct list_head tmp_list;
4173 				unsigned int got_bytes = rdata->got_bytes;
4174 
4175 				list_del_init(&rdata->list);
4176 				INIT_LIST_HEAD(&tmp_list);
4177 
4178 				if (ctx->direct_io) {
4179 					/*
4180 					 * Re-use rdata as this is a
4181 					 * direct I/O
4182 					 */
4183 					rc = cifs_resend_rdata(
4184 						rdata,
4185 						&tmp_list, ctx);
4186 				} else {
4187 					rc = cifs_send_async_read(
4188 						rdata->offset + got_bytes,
4189 						rdata->bytes - got_bytes,
4190 						rdata->cfile, cifs_sb,
4191 						&tmp_list, ctx);
4192 
4193 					kref_put(&rdata->refcount,
4194 						cifs_readdata_release);
4195 				}
4196 
4197 				list_splice(&tmp_list, &ctx->list);
4198 
4199 				goto again;
4200 			} else if (rdata->result)
4201 				rc = rdata->result;
4202 
4203 			/* if there was a short read -- discard anything left */
4204 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4205 				rc = -ENODATA;
4206 
4207 			ctx->total_len += rdata->got_bytes;
4208 		}
4209 		list_del_init(&rdata->list);
4210 		kref_put(&rdata->refcount, cifs_readdata_release);
4211 	}
4212 
4213 	/* mask nodata case */
4214 	if (rc == -ENODATA)
4215 		rc = 0;
4216 
4217 	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4218 
4219 	mutex_unlock(&ctx->aio_mutex);
4220 
4221 	if (ctx->iocb && ctx->iocb->ki_complete)
4222 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4223 	else
4224 		complete(&ctx->done);
4225 }
4226 
4227 static ssize_t __cifs_readv(
4228 	struct kiocb *iocb, struct iov_iter *to, bool direct)
4229 {
4230 	size_t len;
4231 	struct file *file = iocb->ki_filp;
4232 	struct cifs_sb_info *cifs_sb;
4233 	struct cifsFileInfo *cfile;
4234 	struct cifs_tcon *tcon;
4235 	ssize_t rc, total_read = 0;
4236 	loff_t offset = iocb->ki_pos;
4237 	struct cifs_aio_ctx *ctx;
4238 
4239 	len = iov_iter_count(to);
4240 	if (!len)
4241 		return 0;
4242 
4243 	cifs_sb = CIFS_FILE_SB(file);
4244 	cfile = file->private_data;
4245 	tcon = tlink_tcon(cfile->tlink);
4246 
4247 	if (!tcon->ses->server->ops->async_readv)
4248 		return -ENOSYS;
4249 
4250 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4251 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4252 
4253 	ctx = cifs_aio_ctx_alloc();
4254 	if (!ctx)
4255 		return -ENOMEM;
4256 
4257 	ctx->pos	= offset;
4258 	ctx->direct_io	= direct;
4259 	ctx->len	= len;
4260 	ctx->cfile	= cifsFileInfo_get(cfile);
4261 	ctx->nr_pinned_pages = 0;
4262 
4263 	if (!is_sync_kiocb(iocb))
4264 		ctx->iocb = iocb;
4265 
4266 	if (user_backed_iter(to)) {
4267 		/*
4268 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4269 		 * they contain references to the calling process's virtual
4270 		 * memory layout which won't be available in an async worker
4271 		 * thread.  This also takes a pin on every folio involved.
4272 		 */
4273 		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4274 					     &ctx->iter, 0);
4275 		if (rc < 0) {
4276 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4277 			return rc;
4278 		}
4279 
4280 		ctx->nr_pinned_pages = rc;
4281 		ctx->bv = (void *)ctx->iter.bvec;
4282 		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4283 		ctx->should_dirty = true;
4284 	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4285 		   !is_sync_kiocb(iocb)) {
4286 		/*
4287 		 * If the op is asynchronous, we need to copy the list attached
4288 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4289 		 * will be retained by the caller; in any case, we may or may
4290 		 * not be able to pin the pages, so we don't try.
4291 		 */
4292 		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4293 		if (!ctx->bv) {
4294 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4295 			return -ENOMEM;
4296 		}
4297 	} else {
4298 		/*
4299 		 * Otherwise, we just pass the iterator down as-is and rely on
4300 		 * the caller to make sure the pages referred to by the
4301 		 * iterator don't evaporate.
4302 		 */
4303 		ctx->iter = *to;
4304 	}
4305 
4306 	if (direct) {
4307 		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4308 						  offset, offset + len - 1);
4309 		if (rc) {
4310 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4311 			return -EAGAIN;
4312 		}
4313 	}
4314 
4315 	/* grab a lock here due to read response handlers can access ctx */
4316 	mutex_lock(&ctx->aio_mutex);
4317 
4318 	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4319 
4320 	/* if at least one read request send succeeded, then reset rc */
4321 	if (!list_empty(&ctx->list))
4322 		rc = 0;
4323 
4324 	mutex_unlock(&ctx->aio_mutex);
4325 
4326 	if (rc) {
4327 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4328 		return rc;
4329 	}
4330 
4331 	if (!is_sync_kiocb(iocb)) {
4332 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4333 		return -EIOCBQUEUED;
4334 	}
4335 
4336 	rc = wait_for_completion_killable(&ctx->done);
4337 	if (rc) {
4338 		mutex_lock(&ctx->aio_mutex);
4339 		ctx->rc = rc = -EINTR;
4340 		total_read = ctx->total_len;
4341 		mutex_unlock(&ctx->aio_mutex);
4342 	} else {
4343 		rc = ctx->rc;
4344 		total_read = ctx->total_len;
4345 	}
4346 
4347 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4348 
4349 	if (total_read) {
4350 		iocb->ki_pos += total_read;
4351 		return total_read;
4352 	}
4353 	return rc;
4354 }
4355 
4356 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4357 {
4358 	return __cifs_readv(iocb, to, true);
4359 }
4360 
4361 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4362 {
4363 	return __cifs_readv(iocb, to, false);
4364 }
4365 
4366 ssize_t
4367 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4368 {
4369 	struct inode *inode = file_inode(iocb->ki_filp);
4370 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4371 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4372 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4373 						iocb->ki_filp->private_data;
4374 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4375 	int rc = -EACCES;
4376 
4377 	/*
4378 	 * In strict cache mode we need to read from the server all the time
4379 	 * if we don't have level II oplock because the server can delay mtime
4380 	 * change - so we can't make a decision about inode invalidating.
4381 	 * And we can also fail with pagereading if there are mandatory locks
4382 	 * on pages affected by this read but not on the region from pos to
4383 	 * pos+len-1.
4384 	 */
4385 	if (!CIFS_CACHE_READ(cinode))
4386 		return cifs_user_readv(iocb, to);
4387 
4388 	if (cap_unix(tcon->ses) &&
4389 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4390 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4391 		return generic_file_read_iter(iocb, to);
4392 
4393 	/*
4394 	 * We need to hold the sem to be sure nobody modifies lock list
4395 	 * with a brlock that prevents reading.
4396 	 */
4397 	down_read(&cinode->lock_sem);
4398 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4399 				     tcon->ses->server->vals->shared_lock_type,
4400 				     0, NULL, CIFS_READ_OP))
4401 		rc = generic_file_read_iter(iocb, to);
4402 	up_read(&cinode->lock_sem);
4403 	return rc;
4404 }
4405 
4406 static ssize_t
4407 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4408 {
4409 	int rc = -EACCES;
4410 	unsigned int bytes_read = 0;
4411 	unsigned int total_read;
4412 	unsigned int current_read_size;
4413 	unsigned int rsize;
4414 	struct cifs_sb_info *cifs_sb;
4415 	struct cifs_tcon *tcon;
4416 	struct TCP_Server_Info *server;
4417 	unsigned int xid;
4418 	char *cur_offset;
4419 	struct cifsFileInfo *open_file;
4420 	struct cifs_io_parms io_parms = {0};
4421 	int buf_type = CIFS_NO_BUFFER;
4422 	__u32 pid;
4423 
4424 	xid = get_xid();
4425 	cifs_sb = CIFS_FILE_SB(file);
4426 
4427 	/* FIXME: set up handlers for larger reads and/or convert to async */
4428 	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4429 
4430 	if (file->private_data == NULL) {
4431 		rc = -EBADF;
4432 		free_xid(xid);
4433 		return rc;
4434 	}
4435 	open_file = file->private_data;
4436 	tcon = tlink_tcon(open_file->tlink);
4437 	server = cifs_pick_channel(tcon->ses);
4438 
4439 	if (!server->ops->sync_read) {
4440 		free_xid(xid);
4441 		return -ENOSYS;
4442 	}
4443 
4444 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4445 		pid = open_file->pid;
4446 	else
4447 		pid = current->tgid;
4448 
4449 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4450 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4451 
4452 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4453 	     total_read += bytes_read, cur_offset += bytes_read) {
4454 		do {
4455 			current_read_size = min_t(uint, read_size - total_read,
4456 						  rsize);
4457 			/*
4458 			 * For windows me and 9x we do not want to request more
4459 			 * than it negotiated since it will refuse the read
4460 			 * then.
4461 			 */
4462 			if (!(tcon->ses->capabilities &
4463 				tcon->ses->server->vals->cap_large_files)) {
4464 				current_read_size = min_t(uint,
4465 					current_read_size, CIFSMaxBufSize);
4466 			}
4467 			if (open_file->invalidHandle) {
4468 				rc = cifs_reopen_file(open_file, true);
4469 				if (rc != 0)
4470 					break;
4471 			}
4472 			io_parms.pid = pid;
4473 			io_parms.tcon = tcon;
4474 			io_parms.offset = *offset;
4475 			io_parms.length = current_read_size;
4476 			io_parms.server = server;
4477 			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4478 						    &bytes_read, &cur_offset,
4479 						    &buf_type);
4480 		} while (rc == -EAGAIN);
4481 
4482 		if (rc || (bytes_read == 0)) {
4483 			if (total_read) {
4484 				break;
4485 			} else {
4486 				free_xid(xid);
4487 				return rc;
4488 			}
4489 		} else {
4490 			cifs_stats_bytes_read(tcon, total_read);
4491 			*offset += bytes_read;
4492 		}
4493 	}
4494 	free_xid(xid);
4495 	return total_read;
4496 }
4497 
4498 /*
4499  * If the page is mmap'ed into a process' page tables, then we need to make
4500  * sure that it doesn't change while being written back.
4501  */
4502 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4503 {
4504 	struct folio *folio = page_folio(vmf->page);
4505 
4506 	/* Wait for the folio to be written to the cache before we allow it to
4507 	 * be modified.  We then assume the entire folio will need writing back.
4508 	 */
4509 #ifdef CONFIG_CIFS_FSCACHE
4510 	if (folio_test_fscache(folio) &&
4511 	    folio_wait_fscache_killable(folio) < 0)
4512 		return VM_FAULT_RETRY;
4513 #endif
4514 
4515 	folio_wait_writeback(folio);
4516 
4517 	if (folio_lock_killable(folio) < 0)
4518 		return VM_FAULT_RETRY;
4519 	return VM_FAULT_LOCKED;
4520 }
4521 
4522 static const struct vm_operations_struct cifs_file_vm_ops = {
4523 	.fault = filemap_fault,
4524 	.map_pages = filemap_map_pages,
4525 	.page_mkwrite = cifs_page_mkwrite,
4526 };
4527 
4528 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4529 {
4530 	int xid, rc = 0;
4531 	struct inode *inode = file_inode(file);
4532 
4533 	xid = get_xid();
4534 
4535 	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4536 		rc = cifs_zap_mapping(inode);
4537 	if (!rc)
4538 		rc = generic_file_mmap(file, vma);
4539 	if (!rc)
4540 		vma->vm_ops = &cifs_file_vm_ops;
4541 
4542 	free_xid(xid);
4543 	return rc;
4544 }
4545 
4546 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4547 {
4548 	int rc, xid;
4549 
4550 	xid = get_xid();
4551 
4552 	rc = cifs_revalidate_file(file);
4553 	if (rc)
4554 		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4555 			 rc);
4556 	if (!rc)
4557 		rc = generic_file_mmap(file, vma);
4558 	if (!rc)
4559 		vma->vm_ops = &cifs_file_vm_ops;
4560 
4561 	free_xid(xid);
4562 	return rc;
4563 }
4564 
4565 /*
4566  * Unlock a bunch of folios in the pagecache.
4567  */
4568 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4569 {
4570 	struct folio *folio;
4571 	XA_STATE(xas, &mapping->i_pages, first);
4572 
4573 	rcu_read_lock();
4574 	xas_for_each(&xas, folio, last) {
4575 		folio_unlock(folio);
4576 	}
4577 	rcu_read_unlock();
4578 }
4579 
4580 static void cifs_readahead_complete(struct work_struct *work)
4581 {
4582 	struct cifs_readdata *rdata = container_of(work,
4583 						   struct cifs_readdata, work);
4584 	struct folio *folio;
4585 	pgoff_t last;
4586 	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4587 
4588 	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4589 
4590 	if (good)
4591 		cifs_readahead_to_fscache(rdata->mapping->host,
4592 					  rdata->offset, rdata->bytes);
4593 
4594 	if (iov_iter_count(&rdata->iter) > 0)
4595 		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4596 
4597 	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4598 
4599 	rcu_read_lock();
4600 	xas_for_each(&xas, folio, last) {
4601 		if (good) {
4602 			flush_dcache_folio(folio);
4603 			folio_mark_uptodate(folio);
4604 		}
4605 		folio_unlock(folio);
4606 	}
4607 	rcu_read_unlock();
4608 
4609 	kref_put(&rdata->refcount, cifs_readdata_release);
4610 }
4611 
4612 static void cifs_readahead(struct readahead_control *ractl)
4613 {
4614 	struct cifsFileInfo *open_file = ractl->file->private_data;
4615 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4616 	struct TCP_Server_Info *server;
4617 	unsigned int xid, nr_pages, cache_nr_pages = 0;
4618 	unsigned int ra_pages;
4619 	pgoff_t next_cached = ULONG_MAX, ra_index;
4620 	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4621 		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4622 	bool check_cache = caching;
4623 	pid_t pid;
4624 	int rc = 0;
4625 
4626 	/* Note that readahead_count() lags behind our dequeuing of pages from
4627 	 * the ractl, wo we have to keep track for ourselves.
4628 	 */
4629 	ra_pages = readahead_count(ractl);
4630 	ra_index = readahead_index(ractl);
4631 
4632 	xid = get_xid();
4633 
4634 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4635 		pid = open_file->pid;
4636 	else
4637 		pid = current->tgid;
4638 
4639 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4640 
4641 	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4642 		 __func__, ractl->file, ractl->mapping, ra_pages);
4643 
4644 	/*
4645 	 * Chop the readahead request up into rsize-sized read requests.
4646 	 */
4647 	while ((nr_pages = ra_pages)) {
4648 		unsigned int i, rsize;
4649 		struct cifs_readdata *rdata;
4650 		struct cifs_credits credits_on_stack;
4651 		struct cifs_credits *credits = &credits_on_stack;
4652 		struct folio *folio;
4653 		pgoff_t fsize;
4654 
4655 		/*
4656 		 * Find out if we have anything cached in the range of
4657 		 * interest, and if so, where the next chunk of cached data is.
4658 		 */
4659 		if (caching) {
4660 			if (check_cache) {
4661 				rc = cifs_fscache_query_occupancy(
4662 					ractl->mapping->host, ra_index, nr_pages,
4663 					&next_cached, &cache_nr_pages);
4664 				if (rc < 0)
4665 					caching = false;
4666 				check_cache = false;
4667 			}
4668 
4669 			if (ra_index == next_cached) {
4670 				/*
4671 				 * TODO: Send a whole batch of pages to be read
4672 				 * by the cache.
4673 				 */
4674 				folio = readahead_folio(ractl);
4675 				fsize = folio_nr_pages(folio);
4676 				ra_pages -= fsize;
4677 				ra_index += fsize;
4678 				if (cifs_readpage_from_fscache(ractl->mapping->host,
4679 							       &folio->page) < 0) {
4680 					/*
4681 					 * TODO: Deal with cache read failure
4682 					 * here, but for the moment, delegate
4683 					 * that to readpage.
4684 					 */
4685 					caching = false;
4686 				}
4687 				folio_unlock(folio);
4688 				next_cached += fsize;
4689 				cache_nr_pages -= fsize;
4690 				if (cache_nr_pages == 0)
4691 					check_cache = true;
4692 				continue;
4693 			}
4694 		}
4695 
4696 		if (open_file->invalidHandle) {
4697 			rc = cifs_reopen_file(open_file, true);
4698 			if (rc) {
4699 				if (rc == -EAGAIN)
4700 					continue;
4701 				break;
4702 			}
4703 		}
4704 
4705 		if (cifs_sb->ctx->rsize == 0)
4706 			cifs_sb->ctx->rsize =
4707 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4708 							     cifs_sb->ctx);
4709 
4710 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4711 						   &rsize, credits);
4712 		if (rc)
4713 			break;
4714 		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4715 		if (next_cached != ULONG_MAX)
4716 			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4717 
4718 		/*
4719 		 * Give up immediately if rsize is too small to read an entire
4720 		 * page. The VFS will fall back to readpage. We should never
4721 		 * reach this point however since we set ra_pages to 0 when the
4722 		 * rsize is smaller than a cache page.
4723 		 */
4724 		if (unlikely(!nr_pages)) {
4725 			add_credits_and_wake_if(server, credits, 0);
4726 			break;
4727 		}
4728 
4729 		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4730 		if (!rdata) {
4731 			/* best to give up if we're out of mem */
4732 			add_credits_and_wake_if(server, credits, 0);
4733 			break;
4734 		}
4735 
4736 		rdata->offset	= ra_index * PAGE_SIZE;
4737 		rdata->bytes	= nr_pages * PAGE_SIZE;
4738 		rdata->cfile	= cifsFileInfo_get(open_file);
4739 		rdata->server	= server;
4740 		rdata->mapping	= ractl->mapping;
4741 		rdata->pid	= pid;
4742 		rdata->credits	= credits_on_stack;
4743 
4744 		for (i = 0; i < nr_pages; i++) {
4745 			if (!readahead_folio(ractl))
4746 				WARN_ON(1);
4747 		}
4748 		ra_pages -= nr_pages;
4749 		ra_index += nr_pages;
4750 
4751 		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4752 				rdata->offset, rdata->bytes);
4753 
4754 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4755 		if (!rc) {
4756 			if (rdata->cfile->invalidHandle)
4757 				rc = -EAGAIN;
4758 			else
4759 				rc = server->ops->async_readv(rdata);
4760 		}
4761 
4762 		if (rc) {
4763 			add_credits_and_wake_if(server, &rdata->credits, 0);
4764 			cifs_unlock_folios(rdata->mapping,
4765 					   rdata->offset / PAGE_SIZE,
4766 					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4767 			/* Fallback to the readpage in error/reconnect cases */
4768 			kref_put(&rdata->refcount, cifs_readdata_release);
4769 			break;
4770 		}
4771 
4772 		kref_put(&rdata->refcount, cifs_readdata_release);
4773 	}
4774 
4775 	free_xid(xid);
4776 }
4777 
4778 /*
4779  * cifs_readpage_worker must be called with the page pinned
4780  */
4781 static int cifs_readpage_worker(struct file *file, struct page *page,
4782 	loff_t *poffset)
4783 {
4784 	struct inode *inode = file_inode(file);
4785 	struct timespec64 atime, mtime;
4786 	char *read_data;
4787 	int rc;
4788 
4789 	/* Is the page cached? */
4790 	rc = cifs_readpage_from_fscache(inode, page);
4791 	if (rc == 0)
4792 		goto read_complete;
4793 
4794 	read_data = kmap(page);
4795 	/* for reads over a certain size could initiate async read ahead */
4796 
4797 	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4798 
4799 	if (rc < 0)
4800 		goto io_error;
4801 	else
4802 		cifs_dbg(FYI, "Bytes read %d\n", rc);
4803 
4804 	/* we do not want atime to be less than mtime, it broke some apps */
4805 	atime = inode_set_atime_to_ts(inode, current_time(inode));
4806 	mtime = inode_get_mtime(inode);
4807 	if (timespec64_compare(&atime, &mtime) < 0)
4808 		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4809 
4810 	if (PAGE_SIZE > rc)
4811 		memset(read_data + rc, 0, PAGE_SIZE - rc);
4812 
4813 	flush_dcache_page(page);
4814 	SetPageUptodate(page);
4815 	rc = 0;
4816 
4817 io_error:
4818 	kunmap(page);
4819 
4820 read_complete:
4821 	unlock_page(page);
4822 	return rc;
4823 }
4824 
4825 static int cifs_read_folio(struct file *file, struct folio *folio)
4826 {
4827 	struct page *page = &folio->page;
4828 	loff_t offset = page_file_offset(page);
4829 	int rc = -EACCES;
4830 	unsigned int xid;
4831 
4832 	xid = get_xid();
4833 
4834 	if (file->private_data == NULL) {
4835 		rc = -EBADF;
4836 		free_xid(xid);
4837 		return rc;
4838 	}
4839 
4840 	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4841 		 page, (int)offset, (int)offset);
4842 
4843 	rc = cifs_readpage_worker(file, page, &offset);
4844 
4845 	free_xid(xid);
4846 	return rc;
4847 }
4848 
4849 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4850 {
4851 	struct cifsFileInfo *open_file;
4852 
4853 	spin_lock(&cifs_inode->open_file_lock);
4854 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4855 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4856 			spin_unlock(&cifs_inode->open_file_lock);
4857 			return 1;
4858 		}
4859 	}
4860 	spin_unlock(&cifs_inode->open_file_lock);
4861 	return 0;
4862 }
4863 
4864 /* We do not want to update the file size from server for inodes
4865    open for write - to avoid races with writepage extending
4866    the file - in the future we could consider allowing
4867    refreshing the inode only on increases in the file size
4868    but this is tricky to do without racing with writebehind
4869    page caching in the current Linux kernel design */
4870 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4871 			    bool from_readdir)
4872 {
4873 	if (!cifsInode)
4874 		return true;
4875 
4876 	if (is_inode_writable(cifsInode) ||
4877 		((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4878 		/* This inode is open for write at least once */
4879 		struct cifs_sb_info *cifs_sb;
4880 
4881 		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4882 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4883 			/* since no page cache to corrupt on directio
4884 			we can change size safely */
4885 			return true;
4886 		}
4887 
4888 		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4889 			return true;
4890 
4891 		return false;
4892 	} else
4893 		return true;
4894 }
4895 
4896 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4897 			loff_t pos, unsigned len,
4898 			struct page **pagep, void **fsdata)
4899 {
4900 	int oncethru = 0;
4901 	pgoff_t index = pos >> PAGE_SHIFT;
4902 	loff_t offset = pos & (PAGE_SIZE - 1);
4903 	loff_t page_start = pos & PAGE_MASK;
4904 	loff_t i_size;
4905 	struct page *page;
4906 	int rc = 0;
4907 
4908 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4909 
4910 start:
4911 	page = grab_cache_page_write_begin(mapping, index);
4912 	if (!page) {
4913 		rc = -ENOMEM;
4914 		goto out;
4915 	}
4916 
4917 	if (PageUptodate(page))
4918 		goto out;
4919 
4920 	/*
4921 	 * If we write a full page it will be up to date, no need to read from
4922 	 * the server. If the write is short, we'll end up doing a sync write
4923 	 * instead.
4924 	 */
4925 	if (len == PAGE_SIZE)
4926 		goto out;
4927 
4928 	/*
4929 	 * optimize away the read when we have an oplock, and we're not
4930 	 * expecting to use any of the data we'd be reading in. That
4931 	 * is, when the page lies beyond the EOF, or straddles the EOF
4932 	 * and the write will cover all of the existing data.
4933 	 */
4934 	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4935 		i_size = i_size_read(mapping->host);
4936 		if (page_start >= i_size ||
4937 		    (offset == 0 && (pos + len) >= i_size)) {
4938 			zero_user_segments(page, 0, offset,
4939 					   offset + len,
4940 					   PAGE_SIZE);
4941 			/*
4942 			 * PageChecked means that the parts of the page
4943 			 * to which we're not writing are considered up
4944 			 * to date. Once the data is copied to the
4945 			 * page, it can be set uptodate.
4946 			 */
4947 			SetPageChecked(page);
4948 			goto out;
4949 		}
4950 	}
4951 
4952 	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4953 		/*
4954 		 * might as well read a page, it is fast enough. If we get
4955 		 * an error, we don't need to return it. cifs_write_end will
4956 		 * do a sync write instead since PG_uptodate isn't set.
4957 		 */
4958 		cifs_readpage_worker(file, page, &page_start);
4959 		put_page(page);
4960 		oncethru = 1;
4961 		goto start;
4962 	} else {
4963 		/* we could try using another file handle if there is one -
4964 		   but how would we lock it to prevent close of that handle
4965 		   racing with this read? In any case
4966 		   this will be written out by write_end so is fine */
4967 	}
4968 out:
4969 	*pagep = page;
4970 	return rc;
4971 }
4972 
4973 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4974 {
4975 	if (folio_test_private(folio))
4976 		return 0;
4977 	if (folio_test_fscache(folio)) {
4978 		if (current_is_kswapd() || !(gfp & __GFP_FS))
4979 			return false;
4980 		folio_wait_fscache(folio);
4981 	}
4982 	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4983 	return true;
4984 }
4985 
4986 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4987 				 size_t length)
4988 {
4989 	folio_wait_fscache(folio);
4990 }
4991 
4992 static int cifs_launder_folio(struct folio *folio)
4993 {
4994 	int rc = 0;
4995 	loff_t range_start = folio_pos(folio);
4996 	loff_t range_end = range_start + folio_size(folio);
4997 	struct writeback_control wbc = {
4998 		.sync_mode = WB_SYNC_ALL,
4999 		.nr_to_write = 0,
5000 		.range_start = range_start,
5001 		.range_end = range_end,
5002 	};
5003 
5004 	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5005 
5006 	if (folio_clear_dirty_for_io(folio))
5007 		rc = cifs_writepage_locked(&folio->page, &wbc);
5008 
5009 	folio_wait_fscache(folio);
5010 	return rc;
5011 }
5012 
5013 void cifs_oplock_break(struct work_struct *work)
5014 {
5015 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5016 						  oplock_break);
5017 	struct inode *inode = d_inode(cfile->dentry);
5018 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5019 	struct cifsInodeInfo *cinode = CIFS_I(inode);
5020 	struct cifs_tcon *tcon;
5021 	struct TCP_Server_Info *server;
5022 	struct tcon_link *tlink;
5023 	int rc = 0;
5024 	bool purge_cache = false, oplock_break_cancelled;
5025 	__u64 persistent_fid, volatile_fid;
5026 	__u16 net_fid;
5027 
5028 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5029 			TASK_UNINTERRUPTIBLE);
5030 
5031 	tlink = cifs_sb_tlink(cifs_sb);
5032 	if (IS_ERR(tlink))
5033 		goto out;
5034 	tcon = tlink_tcon(tlink);
5035 	server = tcon->ses->server;
5036 
5037 	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5038 				      cfile->oplock_epoch, &purge_cache);
5039 
5040 	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5041 						cifs_has_mand_locks(cinode)) {
5042 		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5043 			 inode);
5044 		cinode->oplock = 0;
5045 	}
5046 
5047 	if (inode && S_ISREG(inode->i_mode)) {
5048 		if (CIFS_CACHE_READ(cinode))
5049 			break_lease(inode, O_RDONLY);
5050 		else
5051 			break_lease(inode, O_WRONLY);
5052 		rc = filemap_fdatawrite(inode->i_mapping);
5053 		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5054 			rc = filemap_fdatawait(inode->i_mapping);
5055 			mapping_set_error(inode->i_mapping, rc);
5056 			cifs_zap_mapping(inode);
5057 		}
5058 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5059 		if (CIFS_CACHE_WRITE(cinode))
5060 			goto oplock_break_ack;
5061 	}
5062 
5063 	rc = cifs_push_locks(cfile);
5064 	if (rc)
5065 		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5066 
5067 oplock_break_ack:
5068 	/*
5069 	 * When oplock break is received and there are no active
5070 	 * file handles but cached, then schedule deferred close immediately.
5071 	 * So, new open will not use cached handle.
5072 	 */
5073 
5074 	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5075 		cifs_close_deferred_file(cinode);
5076 
5077 	persistent_fid = cfile->fid.persistent_fid;
5078 	volatile_fid = cfile->fid.volatile_fid;
5079 	net_fid = cfile->fid.netfid;
5080 	oplock_break_cancelled = cfile->oplock_break_cancelled;
5081 
5082 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5083 	/*
5084 	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5085 	 * an acknowledgment to be sent when the file has already been closed.
5086 	 */
5087 	spin_lock(&cinode->open_file_lock);
5088 	/* check list empty since can race with kill_sb calling tree disconnect */
5089 	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5090 		spin_unlock(&cinode->open_file_lock);
5091 		rc = server->ops->oplock_response(tcon, persistent_fid,
5092 						  volatile_fid, net_fid, cinode);
5093 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5094 	} else
5095 		spin_unlock(&cinode->open_file_lock);
5096 
5097 	cifs_put_tlink(tlink);
5098 out:
5099 	cifs_done_oplock_break(cinode);
5100 }
5101 
5102 /*
5103  * The presence of cifs_direct_io() in the address space ops vector
5104  * allowes open() O_DIRECT flags which would have failed otherwise.
5105  *
5106  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5107  * so this method should never be called.
5108  *
5109  * Direct IO is not yet supported in the cached mode.
5110  */
5111 static ssize_t
5112 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5113 {
5114         /*
5115          * FIXME
5116          * Eventually need to support direct IO for non forcedirectio mounts
5117          */
5118         return -EINVAL;
5119 }
5120 
5121 static int cifs_swap_activate(struct swap_info_struct *sis,
5122 			      struct file *swap_file, sector_t *span)
5123 {
5124 	struct cifsFileInfo *cfile = swap_file->private_data;
5125 	struct inode *inode = swap_file->f_mapping->host;
5126 	unsigned long blocks;
5127 	long long isize;
5128 
5129 	cifs_dbg(FYI, "swap activate\n");
5130 
5131 	if (!swap_file->f_mapping->a_ops->swap_rw)
5132 		/* Cannot support swap */
5133 		return -EINVAL;
5134 
5135 	spin_lock(&inode->i_lock);
5136 	blocks = inode->i_blocks;
5137 	isize = inode->i_size;
5138 	spin_unlock(&inode->i_lock);
5139 	if (blocks*512 < isize) {
5140 		pr_warn("swap activate: swapfile has holes\n");
5141 		return -EINVAL;
5142 	}
5143 	*span = sis->pages;
5144 
5145 	pr_warn_once("Swap support over SMB3 is experimental\n");
5146 
5147 	/*
5148 	 * TODO: consider adding ACL (or documenting how) to prevent other
5149 	 * users (on this or other systems) from reading it
5150 	 */
5151 
5152 
5153 	/* TODO: add sk_set_memalloc(inet) or similar */
5154 
5155 	if (cfile)
5156 		cfile->swapfile = true;
5157 	/*
5158 	 * TODO: Since file already open, we can't open with DENY_ALL here
5159 	 * but we could add call to grab a byte range lock to prevent others
5160 	 * from reading or writing the file
5161 	 */
5162 
5163 	sis->flags |= SWP_FS_OPS;
5164 	return add_swap_extent(sis, 0, sis->max, 0);
5165 }
5166 
5167 static void cifs_swap_deactivate(struct file *file)
5168 {
5169 	struct cifsFileInfo *cfile = file->private_data;
5170 
5171 	cifs_dbg(FYI, "swap deactivate\n");
5172 
5173 	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5174 
5175 	if (cfile)
5176 		cfile->swapfile = false;
5177 
5178 	/* do we need to unpin (or unlock) the file */
5179 }
5180 
5181 /*
5182  * Mark a page as having been made dirty and thus needing writeback.  We also
5183  * need to pin the cache object to write back to.
5184  */
5185 #ifdef CONFIG_CIFS_FSCACHE
5186 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5187 {
5188 	return fscache_dirty_folio(mapping, folio,
5189 					cifs_inode_cookie(mapping->host));
5190 }
5191 #else
5192 #define cifs_dirty_folio filemap_dirty_folio
5193 #endif
5194 
5195 const struct address_space_operations cifs_addr_ops = {
5196 	.read_folio = cifs_read_folio,
5197 	.readahead = cifs_readahead,
5198 	.writepages = cifs_writepages,
5199 	.write_begin = cifs_write_begin,
5200 	.write_end = cifs_write_end,
5201 	.dirty_folio = cifs_dirty_folio,
5202 	.release_folio = cifs_release_folio,
5203 	.direct_IO = cifs_direct_io,
5204 	.invalidate_folio = cifs_invalidate_folio,
5205 	.launder_folio = cifs_launder_folio,
5206 	.migrate_folio = filemap_migrate_folio,
5207 	/*
5208 	 * TODO: investigate and if useful we could add an is_dirty_writeback
5209 	 * helper if needed
5210 	 */
5211 	.swap_activate = cifs_swap_activate,
5212 	.swap_deactivate = cifs_swap_deactivate,
5213 };
5214 
5215 /*
5216  * cifs_readahead requires the server to support a buffer large enough to
5217  * contain the header plus one complete page of data.  Otherwise, we need
5218  * to leave cifs_readahead out of the address space operations.
5219  */
5220 const struct address_space_operations cifs_addr_ops_smallbuf = {
5221 	.read_folio = cifs_read_folio,
5222 	.writepages = cifs_writepages,
5223 	.write_begin = cifs_write_begin,
5224 	.write_end = cifs_write_end,
5225 	.dirty_folio = cifs_dirty_folio,
5226 	.release_folio = cifs_release_folio,
5227 	.invalidate_folio = cifs_invalidate_folio,
5228 	.launder_folio = cifs_launder_folio,
5229 	.migrate_folio = filemap_migrate_folio,
5230 };
5231