xref: /openbmc/linux/fs/smb/client/file.c (revision e45deec3)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39 
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45 	struct address_space *mapping = inode->i_mapping;
46 	struct folio *folio;
47 	pgoff_t end;
48 
49 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50 
51 	rcu_read_lock();
52 
53 	end = (start + len - 1) / PAGE_SIZE;
54 	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 		if (xas_retry(&xas, folio))
56 			continue;
57 		xas_pause(&xas);
58 		rcu_read_unlock();
59 		folio_lock(folio);
60 		folio_clear_dirty_for_io(folio);
61 		folio_unlock(folio);
62 		rcu_read_lock();
63 	}
64 
65 	rcu_read_unlock();
66 }
67 
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73 	struct address_space *mapping = inode->i_mapping;
74 	struct folio *folio;
75 	pgoff_t end;
76 
77 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78 
79 	if (!len)
80 		return;
81 
82 	rcu_read_lock();
83 
84 	end = (start + len - 1) / PAGE_SIZE;
85 	xas_for_each(&xas, folio, end) {
86 		if (xas_retry(&xas, folio))
87 			continue;
88 		if (!folio_test_writeback(folio)) {
89 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 				  len, start, folio->index, end);
91 			continue;
92 		}
93 
94 		folio_detach_private(folio);
95 		folio_end_writeback(folio);
96 	}
97 
98 	rcu_read_unlock();
99 }
100 
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106 	struct address_space *mapping = inode->i_mapping;
107 	struct folio *folio;
108 	pgoff_t end;
109 
110 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111 
112 	if (!len)
113 		return;
114 
115 	rcu_read_lock();
116 
117 	end = (start + len - 1) / PAGE_SIZE;
118 	xas_for_each(&xas, folio, end) {
119 		if (xas_retry(&xas, folio))
120 			continue;
121 		if (!folio_test_writeback(folio)) {
122 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 				  len, start, folio->index, end);
124 			continue;
125 		}
126 
127 		folio_set_error(folio);
128 		folio_end_writeback(folio);
129 	}
130 
131 	rcu_read_unlock();
132 }
133 
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139 	struct address_space *mapping = inode->i_mapping;
140 	struct folio *folio;
141 	pgoff_t end;
142 
143 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144 
145 	if (!len)
146 		return;
147 
148 	rcu_read_lock();
149 
150 	end = (start + len - 1) / PAGE_SIZE;
151 	xas_for_each(&xas, folio, end) {
152 		if (!folio_test_writeback(folio)) {
153 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 				  len, start, folio->index, end);
155 			continue;
156 		}
157 
158 		filemap_dirty_folio(folio->mapping, folio);
159 		folio_end_writeback(folio);
160 	}
161 
162 	rcu_read_unlock();
163 }
164 
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172 	struct cifsFileInfo *open_file = NULL;
173 	struct list_head *tmp;
174 	struct list_head *tmp1;
175 
176 	/* only send once per connect */
177 	spin_lock(&tcon->tc_lock);
178 	if (tcon->need_reconnect)
179 		tcon->status = TID_NEED_RECON;
180 
181 	if (tcon->status != TID_NEED_RECON) {
182 		spin_unlock(&tcon->tc_lock);
183 		return;
184 	}
185 	tcon->status = TID_IN_FILES_INVALIDATE;
186 	spin_unlock(&tcon->tc_lock);
187 
188 	/* list all files open on tree connection and mark them invalid */
189 	spin_lock(&tcon->open_file_lock);
190 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192 		open_file->invalidHandle = true;
193 		open_file->oplock_break_cancelled = true;
194 	}
195 	spin_unlock(&tcon->open_file_lock);
196 
197 	invalidate_all_cached_dirs(tcon);
198 	spin_lock(&tcon->tc_lock);
199 	if (tcon->status == TID_IN_FILES_INVALIDATE)
200 		tcon->status = TID_NEED_TCON;
201 	spin_unlock(&tcon->tc_lock);
202 
203 	/*
204 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205 	 * to this tcon.
206 	 */
207 }
208 
209 static inline int cifs_convert_flags(unsigned int flags)
210 {
211 	if ((flags & O_ACCMODE) == O_RDONLY)
212 		return GENERIC_READ;
213 	else if ((flags & O_ACCMODE) == O_WRONLY)
214 		return GENERIC_WRITE;
215 	else if ((flags & O_ACCMODE) == O_RDWR) {
216 		/* GENERIC_ALL is too much permission to request
217 		   can cause unnecessary access denied on create */
218 		/* return GENERIC_ALL; */
219 		return (GENERIC_READ | GENERIC_WRITE);
220 	}
221 
222 	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223 		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224 		FILE_READ_DATA);
225 }
226 
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
229 {
230 	u32 posix_flags = 0;
231 
232 	if ((flags & O_ACCMODE) == O_RDONLY)
233 		posix_flags = SMB_O_RDONLY;
234 	else if ((flags & O_ACCMODE) == O_WRONLY)
235 		posix_flags = SMB_O_WRONLY;
236 	else if ((flags & O_ACCMODE) == O_RDWR)
237 		posix_flags = SMB_O_RDWR;
238 
239 	if (flags & O_CREAT) {
240 		posix_flags |= SMB_O_CREAT;
241 		if (flags & O_EXCL)
242 			posix_flags |= SMB_O_EXCL;
243 	} else if (flags & O_EXCL)
244 		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245 			 current->comm, current->tgid);
246 
247 	if (flags & O_TRUNC)
248 		posix_flags |= SMB_O_TRUNC;
249 	/* be safe and imply O_SYNC for O_DSYNC */
250 	if (flags & O_DSYNC)
251 		posix_flags |= SMB_O_SYNC;
252 	if (flags & O_DIRECTORY)
253 		posix_flags |= SMB_O_DIRECTORY;
254 	if (flags & O_NOFOLLOW)
255 		posix_flags |= SMB_O_NOFOLLOW;
256 	if (flags & O_DIRECT)
257 		posix_flags |= SMB_O_DIRECT;
258 
259 	return posix_flags;
260 }
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262 
263 static inline int cifs_get_disposition(unsigned int flags)
264 {
265 	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266 		return FILE_CREATE;
267 	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268 		return FILE_OVERWRITE_IF;
269 	else if ((flags & O_CREAT) == O_CREAT)
270 		return FILE_OPEN_IF;
271 	else if ((flags & O_TRUNC) == O_TRUNC)
272 		return FILE_OVERWRITE;
273 	else
274 		return FILE_OPEN;
275 }
276 
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279 			struct super_block *sb, int mode, unsigned int f_flags,
280 			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
281 {
282 	int rc;
283 	FILE_UNIX_BASIC_INFO *presp_data;
284 	__u32 posix_flags = 0;
285 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286 	struct cifs_fattr fattr;
287 	struct tcon_link *tlink;
288 	struct cifs_tcon *tcon;
289 
290 	cifs_dbg(FYI, "posix open %s\n", full_path);
291 
292 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293 	if (presp_data == NULL)
294 		return -ENOMEM;
295 
296 	tlink = cifs_sb_tlink(cifs_sb);
297 	if (IS_ERR(tlink)) {
298 		rc = PTR_ERR(tlink);
299 		goto posix_open_ret;
300 	}
301 
302 	tcon = tlink_tcon(tlink);
303 	mode &= ~current_umask();
304 
305 	posix_flags = cifs_posix_convert_flags(f_flags);
306 	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307 			     poplock, full_path, cifs_sb->local_nls,
308 			     cifs_remap(cifs_sb));
309 	cifs_put_tlink(tlink);
310 
311 	if (rc)
312 		goto posix_open_ret;
313 
314 	if (presp_data->Type == cpu_to_le32(-1))
315 		goto posix_open_ret; /* open ok, caller does qpathinfo */
316 
317 	if (!pinode)
318 		goto posix_open_ret; /* caller does not need info */
319 
320 	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321 
322 	/* get new inode and set it up */
323 	if (*pinode == NULL) {
324 		cifs_fill_uniqueid(sb, &fattr);
325 		*pinode = cifs_iget(sb, &fattr);
326 		if (!*pinode) {
327 			rc = -ENOMEM;
328 			goto posix_open_ret;
329 		}
330 	} else {
331 		cifs_revalidate_mapping(*pinode);
332 		rc = cifs_fattr_to_inode(*pinode, &fattr);
333 	}
334 
335 posix_open_ret:
336 	kfree(presp_data);
337 	return rc;
338 }
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340 
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342 			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343 			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344 {
345 	int rc;
346 	int desired_access;
347 	int disposition;
348 	int create_options = CREATE_NOT_DIR;
349 	struct TCP_Server_Info *server = tcon->ses->server;
350 	struct cifs_open_parms oparms;
351 
352 	if (!server->ops->open)
353 		return -ENOSYS;
354 
355 	desired_access = cifs_convert_flags(f_flags);
356 
357 /*********************************************************************
358  *  open flag mapping table:
359  *
360  *	POSIX Flag            CIFS Disposition
361  *	----------            ----------------
362  *	O_CREAT               FILE_OPEN_IF
363  *	O_CREAT | O_EXCL      FILE_CREATE
364  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
365  *	O_TRUNC               FILE_OVERWRITE
366  *	none of the above     FILE_OPEN
367  *
368  *	Note that there is not a direct match between disposition
369  *	FILE_SUPERSEDE (ie create whether or not file exists although
370  *	O_CREAT | O_TRUNC is similar but truncates the existing
371  *	file rather than creating a new file as FILE_SUPERSEDE does
372  *	(which uses the attributes / metadata passed in on open call)
373  *?
374  *?  O_SYNC is a reasonable match to CIFS writethrough flag
375  *?  and the read write flags match reasonably.  O_LARGEFILE
376  *?  is irrelevant because largefile support is always used
377  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
378  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
379  *********************************************************************/
380 
381 	disposition = cifs_get_disposition(f_flags);
382 
383 	/* BB pass O_SYNC flag through on file attributes .. BB */
384 
385 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
386 	if (f_flags & O_SYNC)
387 		create_options |= CREATE_WRITE_THROUGH;
388 
389 	if (f_flags & O_DIRECT)
390 		create_options |= CREATE_NO_BUFFER;
391 
392 	oparms = (struct cifs_open_parms) {
393 		.tcon = tcon,
394 		.cifs_sb = cifs_sb,
395 		.desired_access = desired_access,
396 		.create_options = cifs_create_options(cifs_sb, create_options),
397 		.disposition = disposition,
398 		.path = full_path,
399 		.fid = fid,
400 	};
401 
402 	rc = server->ops->open(xid, &oparms, oplock, buf);
403 	if (rc)
404 		return rc;
405 
406 	/* TODO: Add support for calling posix query info but with passing in fid */
407 	if (tcon->unix_ext)
408 		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
409 					      xid);
410 	else
411 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
412 					 xid, fid);
413 
414 	if (rc) {
415 		server->ops->close(xid, tcon, fid);
416 		if (rc == -ESTALE)
417 			rc = -EOPENSTALE;
418 	}
419 
420 	return rc;
421 }
422 
423 static bool
424 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
425 {
426 	struct cifs_fid_locks *cur;
427 	bool has_locks = false;
428 
429 	down_read(&cinode->lock_sem);
430 	list_for_each_entry(cur, &cinode->llist, llist) {
431 		if (!list_empty(&cur->locks)) {
432 			has_locks = true;
433 			break;
434 		}
435 	}
436 	up_read(&cinode->lock_sem);
437 	return has_locks;
438 }
439 
440 void
441 cifs_down_write(struct rw_semaphore *sem)
442 {
443 	while (!down_write_trylock(sem))
444 		msleep(10);
445 }
446 
447 static void cifsFileInfo_put_work(struct work_struct *work);
448 
449 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
450 				       struct tcon_link *tlink, __u32 oplock,
451 				       const char *symlink_target)
452 {
453 	struct dentry *dentry = file_dentry(file);
454 	struct inode *inode = d_inode(dentry);
455 	struct cifsInodeInfo *cinode = CIFS_I(inode);
456 	struct cifsFileInfo *cfile;
457 	struct cifs_fid_locks *fdlocks;
458 	struct cifs_tcon *tcon = tlink_tcon(tlink);
459 	struct TCP_Server_Info *server = tcon->ses->server;
460 
461 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
462 	if (cfile == NULL)
463 		return cfile;
464 
465 	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
466 	if (!fdlocks) {
467 		kfree(cfile);
468 		return NULL;
469 	}
470 
471 	if (symlink_target) {
472 		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
473 		if (!cfile->symlink_target) {
474 			kfree(fdlocks);
475 			kfree(cfile);
476 			return NULL;
477 		}
478 	}
479 
480 	INIT_LIST_HEAD(&fdlocks->locks);
481 	fdlocks->cfile = cfile;
482 	cfile->llist = fdlocks;
483 
484 	cfile->count = 1;
485 	cfile->pid = current->tgid;
486 	cfile->uid = current_fsuid();
487 	cfile->dentry = dget(dentry);
488 	cfile->f_flags = file->f_flags;
489 	cfile->invalidHandle = false;
490 	cfile->deferred_close_scheduled = false;
491 	cfile->tlink = cifs_get_tlink(tlink);
492 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
493 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
494 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
495 	mutex_init(&cfile->fh_mutex);
496 	spin_lock_init(&cfile->file_info_lock);
497 
498 	cifs_sb_active(inode->i_sb);
499 
500 	/*
501 	 * If the server returned a read oplock and we have mandatory brlocks,
502 	 * set oplock level to None.
503 	 */
504 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
505 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
506 		oplock = 0;
507 	}
508 
509 	cifs_down_write(&cinode->lock_sem);
510 	list_add(&fdlocks->llist, &cinode->llist);
511 	up_write(&cinode->lock_sem);
512 
513 	spin_lock(&tcon->open_file_lock);
514 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
515 		oplock = fid->pending_open->oplock;
516 	list_del(&fid->pending_open->olist);
517 
518 	fid->purge_cache = false;
519 	server->ops->set_fid(cfile, fid, oplock);
520 
521 	list_add(&cfile->tlist, &tcon->openFileList);
522 	atomic_inc(&tcon->num_local_opens);
523 
524 	/* if readable file instance put first in list*/
525 	spin_lock(&cinode->open_file_lock);
526 	if (file->f_mode & FMODE_READ)
527 		list_add(&cfile->flist, &cinode->openFileList);
528 	else
529 		list_add_tail(&cfile->flist, &cinode->openFileList);
530 	spin_unlock(&cinode->open_file_lock);
531 	spin_unlock(&tcon->open_file_lock);
532 
533 	if (fid->purge_cache)
534 		cifs_zap_mapping(inode);
535 
536 	file->private_data = cfile;
537 	return cfile;
538 }
539 
540 struct cifsFileInfo *
541 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
542 {
543 	spin_lock(&cifs_file->file_info_lock);
544 	cifsFileInfo_get_locked(cifs_file);
545 	spin_unlock(&cifs_file->file_info_lock);
546 	return cifs_file;
547 }
548 
549 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
550 {
551 	struct inode *inode = d_inode(cifs_file->dentry);
552 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
553 	struct cifsLockInfo *li, *tmp;
554 	struct super_block *sb = inode->i_sb;
555 
556 	/*
557 	 * Delete any outstanding lock records. We'll lose them when the file
558 	 * is closed anyway.
559 	 */
560 	cifs_down_write(&cifsi->lock_sem);
561 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
562 		list_del(&li->llist);
563 		cifs_del_lock_waiters(li);
564 		kfree(li);
565 	}
566 	list_del(&cifs_file->llist->llist);
567 	kfree(cifs_file->llist);
568 	up_write(&cifsi->lock_sem);
569 
570 	cifs_put_tlink(cifs_file->tlink);
571 	dput(cifs_file->dentry);
572 	cifs_sb_deactive(sb);
573 	kfree(cifs_file->symlink_target);
574 	kfree(cifs_file);
575 }
576 
577 static void cifsFileInfo_put_work(struct work_struct *work)
578 {
579 	struct cifsFileInfo *cifs_file = container_of(work,
580 			struct cifsFileInfo, put);
581 
582 	cifsFileInfo_put_final(cifs_file);
583 }
584 
585 /**
586  * cifsFileInfo_put - release a reference of file priv data
587  *
588  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
589  *
590  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
591  */
592 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
593 {
594 	_cifsFileInfo_put(cifs_file, true, true);
595 }
596 
597 /**
598  * _cifsFileInfo_put - release a reference of file priv data
599  *
600  * This may involve closing the filehandle @cifs_file out on the
601  * server. Must be called without holding tcon->open_file_lock,
602  * cinode->open_file_lock and cifs_file->file_info_lock.
603  *
604  * If @wait_for_oplock_handler is true and we are releasing the last
605  * reference, wait for any running oplock break handler of the file
606  * and cancel any pending one.
607  *
608  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
609  * @wait_oplock_handler: must be false if called from oplock_break_handler
610  * @offload:	not offloaded on close and oplock breaks
611  *
612  */
613 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
614 		       bool wait_oplock_handler, bool offload)
615 {
616 	struct inode *inode = d_inode(cifs_file->dentry);
617 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
618 	struct TCP_Server_Info *server = tcon->ses->server;
619 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
620 	struct super_block *sb = inode->i_sb;
621 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
622 	struct cifs_fid fid = {};
623 	struct cifs_pending_open open;
624 	bool oplock_break_cancelled;
625 
626 	spin_lock(&tcon->open_file_lock);
627 	spin_lock(&cifsi->open_file_lock);
628 	spin_lock(&cifs_file->file_info_lock);
629 	if (--cifs_file->count > 0) {
630 		spin_unlock(&cifs_file->file_info_lock);
631 		spin_unlock(&cifsi->open_file_lock);
632 		spin_unlock(&tcon->open_file_lock);
633 		return;
634 	}
635 	spin_unlock(&cifs_file->file_info_lock);
636 
637 	if (server->ops->get_lease_key)
638 		server->ops->get_lease_key(inode, &fid);
639 
640 	/* store open in pending opens to make sure we don't miss lease break */
641 	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
642 
643 	/* remove it from the lists */
644 	list_del(&cifs_file->flist);
645 	list_del(&cifs_file->tlist);
646 	atomic_dec(&tcon->num_local_opens);
647 
648 	if (list_empty(&cifsi->openFileList)) {
649 		cifs_dbg(FYI, "closing last open instance for inode %p\n",
650 			 d_inode(cifs_file->dentry));
651 		/*
652 		 * In strict cache mode we need invalidate mapping on the last
653 		 * close  because it may cause a error when we open this file
654 		 * again and get at least level II oplock.
655 		 */
656 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
657 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
658 		cifs_set_oplock_level(cifsi, 0);
659 	}
660 
661 	spin_unlock(&cifsi->open_file_lock);
662 	spin_unlock(&tcon->open_file_lock);
663 
664 	oplock_break_cancelled = wait_oplock_handler ?
665 		cancel_work_sync(&cifs_file->oplock_break) : false;
666 
667 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
668 		struct TCP_Server_Info *server = tcon->ses->server;
669 		unsigned int xid;
670 
671 		xid = get_xid();
672 		if (server->ops->close_getattr)
673 			server->ops->close_getattr(xid, tcon, cifs_file);
674 		else if (server->ops->close)
675 			server->ops->close(xid, tcon, &cifs_file->fid);
676 		_free_xid(xid);
677 	}
678 
679 	if (oplock_break_cancelled)
680 		cifs_done_oplock_break(cifsi);
681 
682 	cifs_del_pending_open(&open);
683 
684 	if (offload)
685 		queue_work(fileinfo_put_wq, &cifs_file->put);
686 	else
687 		cifsFileInfo_put_final(cifs_file);
688 }
689 
690 int cifs_open(struct inode *inode, struct file *file)
691 
692 {
693 	int rc = -EACCES;
694 	unsigned int xid;
695 	__u32 oplock;
696 	struct cifs_sb_info *cifs_sb;
697 	struct TCP_Server_Info *server;
698 	struct cifs_tcon *tcon;
699 	struct tcon_link *tlink;
700 	struct cifsFileInfo *cfile = NULL;
701 	void *page;
702 	const char *full_path;
703 	bool posix_open_ok = false;
704 	struct cifs_fid fid = {};
705 	struct cifs_pending_open open;
706 	struct cifs_open_info_data data = {};
707 
708 	xid = get_xid();
709 
710 	cifs_sb = CIFS_SB(inode->i_sb);
711 	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
712 		free_xid(xid);
713 		return -EIO;
714 	}
715 
716 	tlink = cifs_sb_tlink(cifs_sb);
717 	if (IS_ERR(tlink)) {
718 		free_xid(xid);
719 		return PTR_ERR(tlink);
720 	}
721 	tcon = tlink_tcon(tlink);
722 	server = tcon->ses->server;
723 
724 	page = alloc_dentry_path();
725 	full_path = build_path_from_dentry(file_dentry(file), page);
726 	if (IS_ERR(full_path)) {
727 		rc = PTR_ERR(full_path);
728 		goto out;
729 	}
730 
731 	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
732 		 inode, file->f_flags, full_path);
733 
734 	if (file->f_flags & O_DIRECT &&
735 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
736 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
737 			file->f_op = &cifs_file_direct_nobrl_ops;
738 		else
739 			file->f_op = &cifs_file_direct_ops;
740 	}
741 
742 	/* Get the cached handle as SMB2 close is deferred */
743 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
744 	if (rc == 0) {
745 		if (file->f_flags == cfile->f_flags) {
746 			file->private_data = cfile;
747 			spin_lock(&CIFS_I(inode)->deferred_lock);
748 			cifs_del_deferred_close(cfile);
749 			spin_unlock(&CIFS_I(inode)->deferred_lock);
750 			goto use_cache;
751 		} else {
752 			_cifsFileInfo_put(cfile, true, false);
753 		}
754 	}
755 
756 	if (server->oplocks)
757 		oplock = REQ_OPLOCK;
758 	else
759 		oplock = 0;
760 
761 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
762 	if (!tcon->broken_posix_open && tcon->unix_ext &&
763 	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
764 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
765 		/* can not refresh inode info since size could be stale */
766 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
767 				cifs_sb->ctx->file_mode /* ignored */,
768 				file->f_flags, &oplock, &fid.netfid, xid);
769 		if (rc == 0) {
770 			cifs_dbg(FYI, "posix open succeeded\n");
771 			posix_open_ok = true;
772 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
773 			if (tcon->ses->serverNOS)
774 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
775 					 tcon->ses->ip_addr,
776 					 tcon->ses->serverNOS);
777 			tcon->broken_posix_open = true;
778 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
779 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
780 			goto out;
781 		/*
782 		 * Else fallthrough to retry open the old way on network i/o
783 		 * or DFS errors.
784 		 */
785 	}
786 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
787 
788 	if (server->ops->get_lease_key)
789 		server->ops->get_lease_key(inode, &fid);
790 
791 	cifs_add_pending_open(&fid, tlink, &open);
792 
793 	if (!posix_open_ok) {
794 		if (server->ops->get_lease_key)
795 			server->ops->get_lease_key(inode, &fid);
796 
797 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
798 				  xid, &data);
799 		if (rc) {
800 			cifs_del_pending_open(&open);
801 			goto out;
802 		}
803 	}
804 
805 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
806 	if (cfile == NULL) {
807 		if (server->ops->close)
808 			server->ops->close(xid, tcon, &fid);
809 		cifs_del_pending_open(&open);
810 		rc = -ENOMEM;
811 		goto out;
812 	}
813 
814 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
815 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
816 		/*
817 		 * Time to set mode which we can not set earlier due to
818 		 * problems creating new read-only files.
819 		 */
820 		struct cifs_unix_set_info_args args = {
821 			.mode	= inode->i_mode,
822 			.uid	= INVALID_UID, /* no change */
823 			.gid	= INVALID_GID, /* no change */
824 			.ctime	= NO_CHANGE_64,
825 			.atime	= NO_CHANGE_64,
826 			.mtime	= NO_CHANGE_64,
827 			.device	= 0,
828 		};
829 		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
830 				       cfile->pid);
831 	}
832 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
833 
834 use_cache:
835 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
836 			   file->f_mode & FMODE_WRITE);
837 	if (file->f_flags & O_DIRECT &&
838 	    (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
839 	     file->f_flags & O_APPEND))
840 		cifs_invalidate_cache(file_inode(file),
841 				      FSCACHE_INVAL_DIO_WRITE);
842 
843 out:
844 	free_dentry_path(page);
845 	free_xid(xid);
846 	cifs_put_tlink(tlink);
847 	cifs_free_open_info(&data);
848 	return rc;
849 }
850 
851 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
852 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
853 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
854 
855 /*
856  * Try to reacquire byte range locks that were released when session
857  * to server was lost.
858  */
859 static int
860 cifs_relock_file(struct cifsFileInfo *cfile)
861 {
862 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
863 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
864 	int rc = 0;
865 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
866 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
867 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
868 
869 	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
870 	if (cinode->can_cache_brlcks) {
871 		/* can cache locks - no need to relock */
872 		up_read(&cinode->lock_sem);
873 		return rc;
874 	}
875 
876 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
877 	if (cap_unix(tcon->ses) &&
878 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
879 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
880 		rc = cifs_push_posix_locks(cfile);
881 	else
882 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
883 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
884 
885 	up_read(&cinode->lock_sem);
886 	return rc;
887 }
888 
889 static int
890 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
891 {
892 	int rc = -EACCES;
893 	unsigned int xid;
894 	__u32 oplock;
895 	struct cifs_sb_info *cifs_sb;
896 	struct cifs_tcon *tcon;
897 	struct TCP_Server_Info *server;
898 	struct cifsInodeInfo *cinode;
899 	struct inode *inode;
900 	void *page;
901 	const char *full_path;
902 	int desired_access;
903 	int disposition = FILE_OPEN;
904 	int create_options = CREATE_NOT_DIR;
905 	struct cifs_open_parms oparms;
906 
907 	xid = get_xid();
908 	mutex_lock(&cfile->fh_mutex);
909 	if (!cfile->invalidHandle) {
910 		mutex_unlock(&cfile->fh_mutex);
911 		free_xid(xid);
912 		return 0;
913 	}
914 
915 	inode = d_inode(cfile->dentry);
916 	cifs_sb = CIFS_SB(inode->i_sb);
917 	tcon = tlink_tcon(cfile->tlink);
918 	server = tcon->ses->server;
919 
920 	/*
921 	 * Can not grab rename sem here because various ops, including those
922 	 * that already have the rename sem can end up causing writepage to get
923 	 * called and if the server was down that means we end up here, and we
924 	 * can never tell if the caller already has the rename_sem.
925 	 */
926 	page = alloc_dentry_path();
927 	full_path = build_path_from_dentry(cfile->dentry, page);
928 	if (IS_ERR(full_path)) {
929 		mutex_unlock(&cfile->fh_mutex);
930 		free_dentry_path(page);
931 		free_xid(xid);
932 		return PTR_ERR(full_path);
933 	}
934 
935 	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
936 		 inode, cfile->f_flags, full_path);
937 
938 	if (tcon->ses->server->oplocks)
939 		oplock = REQ_OPLOCK;
940 	else
941 		oplock = 0;
942 
943 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
944 	if (tcon->unix_ext && cap_unix(tcon->ses) &&
945 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
946 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
947 		/*
948 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
949 		 * original open. Must mask them off for a reopen.
950 		 */
951 		unsigned int oflags = cfile->f_flags &
952 						~(O_CREAT | O_EXCL | O_TRUNC);
953 
954 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
955 				     cifs_sb->ctx->file_mode /* ignored */,
956 				     oflags, &oplock, &cfile->fid.netfid, xid);
957 		if (rc == 0) {
958 			cifs_dbg(FYI, "posix reopen succeeded\n");
959 			oparms.reconnect = true;
960 			goto reopen_success;
961 		}
962 		/*
963 		 * fallthrough to retry open the old way on errors, especially
964 		 * in the reconnect path it is important to retry hard
965 		 */
966 	}
967 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
968 
969 	desired_access = cifs_convert_flags(cfile->f_flags);
970 
971 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
972 	if (cfile->f_flags & O_SYNC)
973 		create_options |= CREATE_WRITE_THROUGH;
974 
975 	if (cfile->f_flags & O_DIRECT)
976 		create_options |= CREATE_NO_BUFFER;
977 
978 	if (server->ops->get_lease_key)
979 		server->ops->get_lease_key(inode, &cfile->fid);
980 
981 	oparms = (struct cifs_open_parms) {
982 		.tcon = tcon,
983 		.cifs_sb = cifs_sb,
984 		.desired_access = desired_access,
985 		.create_options = cifs_create_options(cifs_sb, create_options),
986 		.disposition = disposition,
987 		.path = full_path,
988 		.fid = &cfile->fid,
989 		.reconnect = true,
990 	};
991 
992 	/*
993 	 * Can not refresh inode by passing in file_info buf to be returned by
994 	 * ops->open and then calling get_inode_info with returned buf since
995 	 * file might have write behind data that needs to be flushed and server
996 	 * version of file size can be stale. If we knew for sure that inode was
997 	 * not dirty locally we could do this.
998 	 */
999 	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1000 	if (rc == -ENOENT && oparms.reconnect == false) {
1001 		/* durable handle timeout is expired - open the file again */
1002 		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1003 		/* indicate that we need to relock the file */
1004 		oparms.reconnect = true;
1005 	}
1006 
1007 	if (rc) {
1008 		mutex_unlock(&cfile->fh_mutex);
1009 		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1010 		cifs_dbg(FYI, "oplock: %d\n", oplock);
1011 		goto reopen_error_exit;
1012 	}
1013 
1014 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1015 reopen_success:
1016 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1017 	cfile->invalidHandle = false;
1018 	mutex_unlock(&cfile->fh_mutex);
1019 	cinode = CIFS_I(inode);
1020 
1021 	if (can_flush) {
1022 		rc = filemap_write_and_wait(inode->i_mapping);
1023 		if (!is_interrupt_error(rc))
1024 			mapping_set_error(inode->i_mapping, rc);
1025 
1026 		if (tcon->posix_extensions)
1027 			rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
1028 		else if (tcon->unix_ext)
1029 			rc = cifs_get_inode_info_unix(&inode, full_path,
1030 						      inode->i_sb, xid);
1031 		else
1032 			rc = cifs_get_inode_info(&inode, full_path, NULL,
1033 						 inode->i_sb, xid, NULL);
1034 	}
1035 	/*
1036 	 * Else we are writing out data to server already and could deadlock if
1037 	 * we tried to flush data, and since we do not know if we have data that
1038 	 * would invalidate the current end of file on the server we can not go
1039 	 * to the server to get the new inode info.
1040 	 */
1041 
1042 	/*
1043 	 * If the server returned a read oplock and we have mandatory brlocks,
1044 	 * set oplock level to None.
1045 	 */
1046 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1047 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1048 		oplock = 0;
1049 	}
1050 
1051 	server->ops->set_fid(cfile, &cfile->fid, oplock);
1052 	if (oparms.reconnect)
1053 		cifs_relock_file(cfile);
1054 
1055 reopen_error_exit:
1056 	free_dentry_path(page);
1057 	free_xid(xid);
1058 	return rc;
1059 }
1060 
1061 void smb2_deferred_work_close(struct work_struct *work)
1062 {
1063 	struct cifsFileInfo *cfile = container_of(work,
1064 			struct cifsFileInfo, deferred.work);
1065 
1066 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1067 	cifs_del_deferred_close(cfile);
1068 	cfile->deferred_close_scheduled = false;
1069 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1070 	_cifsFileInfo_put(cfile, true, false);
1071 }
1072 
1073 int cifs_close(struct inode *inode, struct file *file)
1074 {
1075 	struct cifsFileInfo *cfile;
1076 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1077 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1078 	struct cifs_deferred_close *dclose;
1079 
1080 	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1081 
1082 	if (file->private_data != NULL) {
1083 		cfile = file->private_data;
1084 		file->private_data = NULL;
1085 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1086 		if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1087 		    && cinode->lease_granted &&
1088 		    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1089 		    dclose) {
1090 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1091 				inode_set_mtime_to_ts(inode,
1092 						      inode_set_ctime_current(inode));
1093 			}
1094 			spin_lock(&cinode->deferred_lock);
1095 			cifs_add_deferred_close(cfile, dclose);
1096 			if (cfile->deferred_close_scheduled &&
1097 			    delayed_work_pending(&cfile->deferred)) {
1098 				/*
1099 				 * If there is no pending work, mod_delayed_work queues new work.
1100 				 * So, Increase the ref count to avoid use-after-free.
1101 				 */
1102 				if (!mod_delayed_work(deferredclose_wq,
1103 						&cfile->deferred, cifs_sb->ctx->closetimeo))
1104 					cifsFileInfo_get(cfile);
1105 			} else {
1106 				/* Deferred close for files */
1107 				queue_delayed_work(deferredclose_wq,
1108 						&cfile->deferred, cifs_sb->ctx->closetimeo);
1109 				cfile->deferred_close_scheduled = true;
1110 				spin_unlock(&cinode->deferred_lock);
1111 				return 0;
1112 			}
1113 			spin_unlock(&cinode->deferred_lock);
1114 			_cifsFileInfo_put(cfile, true, false);
1115 		} else {
1116 			_cifsFileInfo_put(cfile, true, false);
1117 			kfree(dclose);
1118 		}
1119 	}
1120 
1121 	/* return code from the ->release op is always ignored */
1122 	return 0;
1123 }
1124 
1125 void
1126 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1127 {
1128 	struct cifsFileInfo *open_file, *tmp;
1129 	struct list_head tmp_list;
1130 
1131 	if (!tcon->use_persistent || !tcon->need_reopen_files)
1132 		return;
1133 
1134 	tcon->need_reopen_files = false;
1135 
1136 	cifs_dbg(FYI, "Reopen persistent handles\n");
1137 	INIT_LIST_HEAD(&tmp_list);
1138 
1139 	/* list all files open on tree connection, reopen resilient handles  */
1140 	spin_lock(&tcon->open_file_lock);
1141 	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1142 		if (!open_file->invalidHandle)
1143 			continue;
1144 		cifsFileInfo_get(open_file);
1145 		list_add_tail(&open_file->rlist, &tmp_list);
1146 	}
1147 	spin_unlock(&tcon->open_file_lock);
1148 
1149 	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1150 		if (cifs_reopen_file(open_file, false /* do not flush */))
1151 			tcon->need_reopen_files = true;
1152 		list_del_init(&open_file->rlist);
1153 		cifsFileInfo_put(open_file);
1154 	}
1155 }
1156 
1157 int cifs_closedir(struct inode *inode, struct file *file)
1158 {
1159 	int rc = 0;
1160 	unsigned int xid;
1161 	struct cifsFileInfo *cfile = file->private_data;
1162 	struct cifs_tcon *tcon;
1163 	struct TCP_Server_Info *server;
1164 	char *buf;
1165 
1166 	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1167 
1168 	if (cfile == NULL)
1169 		return rc;
1170 
1171 	xid = get_xid();
1172 	tcon = tlink_tcon(cfile->tlink);
1173 	server = tcon->ses->server;
1174 
1175 	cifs_dbg(FYI, "Freeing private data in close dir\n");
1176 	spin_lock(&cfile->file_info_lock);
1177 	if (server->ops->dir_needs_close(cfile)) {
1178 		cfile->invalidHandle = true;
1179 		spin_unlock(&cfile->file_info_lock);
1180 		if (server->ops->close_dir)
1181 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1182 		else
1183 			rc = -ENOSYS;
1184 		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1185 		/* not much we can do if it fails anyway, ignore rc */
1186 		rc = 0;
1187 	} else
1188 		spin_unlock(&cfile->file_info_lock);
1189 
1190 	buf = cfile->srch_inf.ntwrk_buf_start;
1191 	if (buf) {
1192 		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1193 		cfile->srch_inf.ntwrk_buf_start = NULL;
1194 		if (cfile->srch_inf.smallBuf)
1195 			cifs_small_buf_release(buf);
1196 		else
1197 			cifs_buf_release(buf);
1198 	}
1199 
1200 	cifs_put_tlink(cfile->tlink);
1201 	kfree(file->private_data);
1202 	file->private_data = NULL;
1203 	/* BB can we lock the filestruct while this is going on? */
1204 	free_xid(xid);
1205 	return rc;
1206 }
1207 
1208 static struct cifsLockInfo *
1209 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1210 {
1211 	struct cifsLockInfo *lock =
1212 		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1213 	if (!lock)
1214 		return lock;
1215 	lock->offset = offset;
1216 	lock->length = length;
1217 	lock->type = type;
1218 	lock->pid = current->tgid;
1219 	lock->flags = flags;
1220 	INIT_LIST_HEAD(&lock->blist);
1221 	init_waitqueue_head(&lock->block_q);
1222 	return lock;
1223 }
1224 
1225 void
1226 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1227 {
1228 	struct cifsLockInfo *li, *tmp;
1229 	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1230 		list_del_init(&li->blist);
1231 		wake_up(&li->block_q);
1232 	}
1233 }
1234 
1235 #define CIFS_LOCK_OP	0
1236 #define CIFS_READ_OP	1
1237 #define CIFS_WRITE_OP	2
1238 
1239 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1240 static bool
1241 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1242 			    __u64 length, __u8 type, __u16 flags,
1243 			    struct cifsFileInfo *cfile,
1244 			    struct cifsLockInfo **conf_lock, int rw_check)
1245 {
1246 	struct cifsLockInfo *li;
1247 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1248 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1249 
1250 	list_for_each_entry(li, &fdlocks->locks, llist) {
1251 		if (offset + length <= li->offset ||
1252 		    offset >= li->offset + li->length)
1253 			continue;
1254 		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1255 		    server->ops->compare_fids(cfile, cur_cfile)) {
1256 			/* shared lock prevents write op through the same fid */
1257 			if (!(li->type & server->vals->shared_lock_type) ||
1258 			    rw_check != CIFS_WRITE_OP)
1259 				continue;
1260 		}
1261 		if ((type & server->vals->shared_lock_type) &&
1262 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1263 		     current->tgid == li->pid) || type == li->type))
1264 			continue;
1265 		if (rw_check == CIFS_LOCK_OP &&
1266 		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1267 		    server->ops->compare_fids(cfile, cur_cfile))
1268 			continue;
1269 		if (conf_lock)
1270 			*conf_lock = li;
1271 		return true;
1272 	}
1273 	return false;
1274 }
1275 
1276 bool
1277 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1278 			__u8 type, __u16 flags,
1279 			struct cifsLockInfo **conf_lock, int rw_check)
1280 {
1281 	bool rc = false;
1282 	struct cifs_fid_locks *cur;
1283 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1284 
1285 	list_for_each_entry(cur, &cinode->llist, llist) {
1286 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1287 						 flags, cfile, conf_lock,
1288 						 rw_check);
1289 		if (rc)
1290 			break;
1291 	}
1292 
1293 	return rc;
1294 }
1295 
1296 /*
1297  * Check if there is another lock that prevents us to set the lock (mandatory
1298  * style). If such a lock exists, update the flock structure with its
1299  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1300  * or leave it the same if we can't. Returns 0 if we don't need to request to
1301  * the server or 1 otherwise.
1302  */
1303 static int
1304 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1305 	       __u8 type, struct file_lock *flock)
1306 {
1307 	int rc = 0;
1308 	struct cifsLockInfo *conf_lock;
1309 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1310 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1311 	bool exist;
1312 
1313 	down_read(&cinode->lock_sem);
1314 
1315 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1316 					flock->fl_flags, &conf_lock,
1317 					CIFS_LOCK_OP);
1318 	if (exist) {
1319 		flock->fl_start = conf_lock->offset;
1320 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1321 		flock->fl_pid = conf_lock->pid;
1322 		if (conf_lock->type & server->vals->shared_lock_type)
1323 			flock->fl_type = F_RDLCK;
1324 		else
1325 			flock->fl_type = F_WRLCK;
1326 	} else if (!cinode->can_cache_brlcks)
1327 		rc = 1;
1328 	else
1329 		flock->fl_type = F_UNLCK;
1330 
1331 	up_read(&cinode->lock_sem);
1332 	return rc;
1333 }
1334 
1335 static void
1336 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1337 {
1338 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1339 	cifs_down_write(&cinode->lock_sem);
1340 	list_add_tail(&lock->llist, &cfile->llist->locks);
1341 	up_write(&cinode->lock_sem);
1342 }
1343 
1344 /*
1345  * Set the byte-range lock (mandatory style). Returns:
1346  * 1) 0, if we set the lock and don't need to request to the server;
1347  * 2) 1, if no locks prevent us but we need to request to the server;
1348  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1349  */
1350 static int
1351 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1352 		 bool wait)
1353 {
1354 	struct cifsLockInfo *conf_lock;
1355 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1356 	bool exist;
1357 	int rc = 0;
1358 
1359 try_again:
1360 	exist = false;
1361 	cifs_down_write(&cinode->lock_sem);
1362 
1363 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1364 					lock->type, lock->flags, &conf_lock,
1365 					CIFS_LOCK_OP);
1366 	if (!exist && cinode->can_cache_brlcks) {
1367 		list_add_tail(&lock->llist, &cfile->llist->locks);
1368 		up_write(&cinode->lock_sem);
1369 		return rc;
1370 	}
1371 
1372 	if (!exist)
1373 		rc = 1;
1374 	else if (!wait)
1375 		rc = -EACCES;
1376 	else {
1377 		list_add_tail(&lock->blist, &conf_lock->blist);
1378 		up_write(&cinode->lock_sem);
1379 		rc = wait_event_interruptible(lock->block_q,
1380 					(lock->blist.prev == &lock->blist) &&
1381 					(lock->blist.next == &lock->blist));
1382 		if (!rc)
1383 			goto try_again;
1384 		cifs_down_write(&cinode->lock_sem);
1385 		list_del_init(&lock->blist);
1386 	}
1387 
1388 	up_write(&cinode->lock_sem);
1389 	return rc;
1390 }
1391 
1392 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1393 /*
1394  * Check if there is another lock that prevents us to set the lock (posix
1395  * style). If such a lock exists, update the flock structure with its
1396  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1397  * or leave it the same if we can't. Returns 0 if we don't need to request to
1398  * the server or 1 otherwise.
1399  */
1400 static int
1401 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1402 {
1403 	int rc = 0;
1404 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1405 	unsigned char saved_type = flock->fl_type;
1406 
1407 	if ((flock->fl_flags & FL_POSIX) == 0)
1408 		return 1;
1409 
1410 	down_read(&cinode->lock_sem);
1411 	posix_test_lock(file, flock);
1412 
1413 	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1414 		flock->fl_type = saved_type;
1415 		rc = 1;
1416 	}
1417 
1418 	up_read(&cinode->lock_sem);
1419 	return rc;
1420 }
1421 
1422 /*
1423  * Set the byte-range lock (posix style). Returns:
1424  * 1) <0, if the error occurs while setting the lock;
1425  * 2) 0, if we set the lock and don't need to request to the server;
1426  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1427  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1428  */
1429 static int
1430 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1431 {
1432 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1433 	int rc = FILE_LOCK_DEFERRED + 1;
1434 
1435 	if ((flock->fl_flags & FL_POSIX) == 0)
1436 		return rc;
1437 
1438 	cifs_down_write(&cinode->lock_sem);
1439 	if (!cinode->can_cache_brlcks) {
1440 		up_write(&cinode->lock_sem);
1441 		return rc;
1442 	}
1443 
1444 	rc = posix_lock_file(file, flock, NULL);
1445 	up_write(&cinode->lock_sem);
1446 	return rc;
1447 }
1448 
1449 int
1450 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1451 {
1452 	unsigned int xid;
1453 	int rc = 0, stored_rc;
1454 	struct cifsLockInfo *li, *tmp;
1455 	struct cifs_tcon *tcon;
1456 	unsigned int num, max_num, max_buf;
1457 	LOCKING_ANDX_RANGE *buf, *cur;
1458 	static const int types[] = {
1459 		LOCKING_ANDX_LARGE_FILES,
1460 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1461 	};
1462 	int i;
1463 
1464 	xid = get_xid();
1465 	tcon = tlink_tcon(cfile->tlink);
1466 
1467 	/*
1468 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1469 	 * and check it before using.
1470 	 */
1471 	max_buf = tcon->ses->server->maxBuf;
1472 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1473 		free_xid(xid);
1474 		return -EINVAL;
1475 	}
1476 
1477 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1478 		     PAGE_SIZE);
1479 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1480 			PAGE_SIZE);
1481 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1482 						sizeof(LOCKING_ANDX_RANGE);
1483 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1484 	if (!buf) {
1485 		free_xid(xid);
1486 		return -ENOMEM;
1487 	}
1488 
1489 	for (i = 0; i < 2; i++) {
1490 		cur = buf;
1491 		num = 0;
1492 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1493 			if (li->type != types[i])
1494 				continue;
1495 			cur->Pid = cpu_to_le16(li->pid);
1496 			cur->LengthLow = cpu_to_le32((u32)li->length);
1497 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1498 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1499 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1500 			if (++num == max_num) {
1501 				stored_rc = cifs_lockv(xid, tcon,
1502 						       cfile->fid.netfid,
1503 						       (__u8)li->type, 0, num,
1504 						       buf);
1505 				if (stored_rc)
1506 					rc = stored_rc;
1507 				cur = buf;
1508 				num = 0;
1509 			} else
1510 				cur++;
1511 		}
1512 
1513 		if (num) {
1514 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1515 					       (__u8)types[i], 0, num, buf);
1516 			if (stored_rc)
1517 				rc = stored_rc;
1518 		}
1519 	}
1520 
1521 	kfree(buf);
1522 	free_xid(xid);
1523 	return rc;
1524 }
1525 
1526 static __u32
1527 hash_lockowner(fl_owner_t owner)
1528 {
1529 	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1530 }
1531 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1532 
1533 struct lock_to_push {
1534 	struct list_head llist;
1535 	__u64 offset;
1536 	__u64 length;
1537 	__u32 pid;
1538 	__u16 netfid;
1539 	__u8 type;
1540 };
1541 
1542 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1543 static int
1544 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1545 {
1546 	struct inode *inode = d_inode(cfile->dentry);
1547 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1548 	struct file_lock *flock;
1549 	struct file_lock_context *flctx = locks_inode_context(inode);
1550 	unsigned int count = 0, i;
1551 	int rc = 0, xid, type;
1552 	struct list_head locks_to_send, *el;
1553 	struct lock_to_push *lck, *tmp;
1554 	__u64 length;
1555 
1556 	xid = get_xid();
1557 
1558 	if (!flctx)
1559 		goto out;
1560 
1561 	spin_lock(&flctx->flc_lock);
1562 	list_for_each(el, &flctx->flc_posix) {
1563 		count++;
1564 	}
1565 	spin_unlock(&flctx->flc_lock);
1566 
1567 	INIT_LIST_HEAD(&locks_to_send);
1568 
1569 	/*
1570 	 * Allocating count locks is enough because no FL_POSIX locks can be
1571 	 * added to the list while we are holding cinode->lock_sem that
1572 	 * protects locking operations of this inode.
1573 	 */
1574 	for (i = 0; i < count; i++) {
1575 		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1576 		if (!lck) {
1577 			rc = -ENOMEM;
1578 			goto err_out;
1579 		}
1580 		list_add_tail(&lck->llist, &locks_to_send);
1581 	}
1582 
1583 	el = locks_to_send.next;
1584 	spin_lock(&flctx->flc_lock);
1585 	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1586 		if (el == &locks_to_send) {
1587 			/*
1588 			 * The list ended. We don't have enough allocated
1589 			 * structures - something is really wrong.
1590 			 */
1591 			cifs_dbg(VFS, "Can't push all brlocks!\n");
1592 			break;
1593 		}
1594 		length = cifs_flock_len(flock);
1595 		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1596 			type = CIFS_RDLCK;
1597 		else
1598 			type = CIFS_WRLCK;
1599 		lck = list_entry(el, struct lock_to_push, llist);
1600 		lck->pid = hash_lockowner(flock->fl_owner);
1601 		lck->netfid = cfile->fid.netfid;
1602 		lck->length = length;
1603 		lck->type = type;
1604 		lck->offset = flock->fl_start;
1605 	}
1606 	spin_unlock(&flctx->flc_lock);
1607 
1608 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1609 		int stored_rc;
1610 
1611 		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1612 					     lck->offset, lck->length, NULL,
1613 					     lck->type, 0);
1614 		if (stored_rc)
1615 			rc = stored_rc;
1616 		list_del(&lck->llist);
1617 		kfree(lck);
1618 	}
1619 
1620 out:
1621 	free_xid(xid);
1622 	return rc;
1623 err_out:
1624 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1625 		list_del(&lck->llist);
1626 		kfree(lck);
1627 	}
1628 	goto out;
1629 }
1630 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1631 
1632 static int
1633 cifs_push_locks(struct cifsFileInfo *cfile)
1634 {
1635 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1636 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1637 	int rc = 0;
1638 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1639 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1640 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1641 
1642 	/* we are going to update can_cache_brlcks here - need a write access */
1643 	cifs_down_write(&cinode->lock_sem);
1644 	if (!cinode->can_cache_brlcks) {
1645 		up_write(&cinode->lock_sem);
1646 		return rc;
1647 	}
1648 
1649 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1650 	if (cap_unix(tcon->ses) &&
1651 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1652 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1653 		rc = cifs_push_posix_locks(cfile);
1654 	else
1655 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1656 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1657 
1658 	cinode->can_cache_brlcks = false;
1659 	up_write(&cinode->lock_sem);
1660 	return rc;
1661 }
1662 
1663 static void
1664 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1665 		bool *wait_flag, struct TCP_Server_Info *server)
1666 {
1667 	if (flock->fl_flags & FL_POSIX)
1668 		cifs_dbg(FYI, "Posix\n");
1669 	if (flock->fl_flags & FL_FLOCK)
1670 		cifs_dbg(FYI, "Flock\n");
1671 	if (flock->fl_flags & FL_SLEEP) {
1672 		cifs_dbg(FYI, "Blocking lock\n");
1673 		*wait_flag = true;
1674 	}
1675 	if (flock->fl_flags & FL_ACCESS)
1676 		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1677 	if (flock->fl_flags & FL_LEASE)
1678 		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1679 	if (flock->fl_flags &
1680 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1681 	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1682 		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1683 
1684 	*type = server->vals->large_lock_type;
1685 	if (flock->fl_type == F_WRLCK) {
1686 		cifs_dbg(FYI, "F_WRLCK\n");
1687 		*type |= server->vals->exclusive_lock_type;
1688 		*lock = 1;
1689 	} else if (flock->fl_type == F_UNLCK) {
1690 		cifs_dbg(FYI, "F_UNLCK\n");
1691 		*type |= server->vals->unlock_lock_type;
1692 		*unlock = 1;
1693 		/* Check if unlock includes more than one lock range */
1694 	} else if (flock->fl_type == F_RDLCK) {
1695 		cifs_dbg(FYI, "F_RDLCK\n");
1696 		*type |= server->vals->shared_lock_type;
1697 		*lock = 1;
1698 	} else if (flock->fl_type == F_EXLCK) {
1699 		cifs_dbg(FYI, "F_EXLCK\n");
1700 		*type |= server->vals->exclusive_lock_type;
1701 		*lock = 1;
1702 	} else if (flock->fl_type == F_SHLCK) {
1703 		cifs_dbg(FYI, "F_SHLCK\n");
1704 		*type |= server->vals->shared_lock_type;
1705 		*lock = 1;
1706 	} else
1707 		cifs_dbg(FYI, "Unknown type of lock\n");
1708 }
1709 
1710 static int
1711 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1712 	   bool wait_flag, bool posix_lck, unsigned int xid)
1713 {
1714 	int rc = 0;
1715 	__u64 length = cifs_flock_len(flock);
1716 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1717 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1718 	struct TCP_Server_Info *server = tcon->ses->server;
1719 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1720 	__u16 netfid = cfile->fid.netfid;
1721 
1722 	if (posix_lck) {
1723 		int posix_lock_type;
1724 
1725 		rc = cifs_posix_lock_test(file, flock);
1726 		if (!rc)
1727 			return rc;
1728 
1729 		if (type & server->vals->shared_lock_type)
1730 			posix_lock_type = CIFS_RDLCK;
1731 		else
1732 			posix_lock_type = CIFS_WRLCK;
1733 		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1734 				      hash_lockowner(flock->fl_owner),
1735 				      flock->fl_start, length, flock,
1736 				      posix_lock_type, wait_flag);
1737 		return rc;
1738 	}
1739 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1740 
1741 	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1742 	if (!rc)
1743 		return rc;
1744 
1745 	/* BB we could chain these into one lock request BB */
1746 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1747 				    1, 0, false);
1748 	if (rc == 0) {
1749 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1750 					    type, 0, 1, false);
1751 		flock->fl_type = F_UNLCK;
1752 		if (rc != 0)
1753 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1754 				 rc);
1755 		return 0;
1756 	}
1757 
1758 	if (type & server->vals->shared_lock_type) {
1759 		flock->fl_type = F_WRLCK;
1760 		return 0;
1761 	}
1762 
1763 	type &= ~server->vals->exclusive_lock_type;
1764 
1765 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1766 				    type | server->vals->shared_lock_type,
1767 				    1, 0, false);
1768 	if (rc == 0) {
1769 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1770 			type | server->vals->shared_lock_type, 0, 1, false);
1771 		flock->fl_type = F_RDLCK;
1772 		if (rc != 0)
1773 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1774 				 rc);
1775 	} else
1776 		flock->fl_type = F_WRLCK;
1777 
1778 	return 0;
1779 }
1780 
1781 void
1782 cifs_move_llist(struct list_head *source, struct list_head *dest)
1783 {
1784 	struct list_head *li, *tmp;
1785 	list_for_each_safe(li, tmp, source)
1786 		list_move(li, dest);
1787 }
1788 
1789 void
1790 cifs_free_llist(struct list_head *llist)
1791 {
1792 	struct cifsLockInfo *li, *tmp;
1793 	list_for_each_entry_safe(li, tmp, llist, llist) {
1794 		cifs_del_lock_waiters(li);
1795 		list_del(&li->llist);
1796 		kfree(li);
1797 	}
1798 }
1799 
1800 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1801 int
1802 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1803 		  unsigned int xid)
1804 {
1805 	int rc = 0, stored_rc;
1806 	static const int types[] = {
1807 		LOCKING_ANDX_LARGE_FILES,
1808 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1809 	};
1810 	unsigned int i;
1811 	unsigned int max_num, num, max_buf;
1812 	LOCKING_ANDX_RANGE *buf, *cur;
1813 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1814 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1815 	struct cifsLockInfo *li, *tmp;
1816 	__u64 length = cifs_flock_len(flock);
1817 	struct list_head tmp_llist;
1818 
1819 	INIT_LIST_HEAD(&tmp_llist);
1820 
1821 	/*
1822 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1823 	 * and check it before using.
1824 	 */
1825 	max_buf = tcon->ses->server->maxBuf;
1826 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1827 		return -EINVAL;
1828 
1829 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1830 		     PAGE_SIZE);
1831 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1832 			PAGE_SIZE);
1833 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1834 						sizeof(LOCKING_ANDX_RANGE);
1835 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1836 	if (!buf)
1837 		return -ENOMEM;
1838 
1839 	cifs_down_write(&cinode->lock_sem);
1840 	for (i = 0; i < 2; i++) {
1841 		cur = buf;
1842 		num = 0;
1843 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1844 			if (flock->fl_start > li->offset ||
1845 			    (flock->fl_start + length) <
1846 			    (li->offset + li->length))
1847 				continue;
1848 			if (current->tgid != li->pid)
1849 				continue;
1850 			if (types[i] != li->type)
1851 				continue;
1852 			if (cinode->can_cache_brlcks) {
1853 				/*
1854 				 * We can cache brlock requests - simply remove
1855 				 * a lock from the file's list.
1856 				 */
1857 				list_del(&li->llist);
1858 				cifs_del_lock_waiters(li);
1859 				kfree(li);
1860 				continue;
1861 			}
1862 			cur->Pid = cpu_to_le16(li->pid);
1863 			cur->LengthLow = cpu_to_le32((u32)li->length);
1864 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1865 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1866 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1867 			/*
1868 			 * We need to save a lock here to let us add it again to
1869 			 * the file's list if the unlock range request fails on
1870 			 * the server.
1871 			 */
1872 			list_move(&li->llist, &tmp_llist);
1873 			if (++num == max_num) {
1874 				stored_rc = cifs_lockv(xid, tcon,
1875 						       cfile->fid.netfid,
1876 						       li->type, num, 0, buf);
1877 				if (stored_rc) {
1878 					/*
1879 					 * We failed on the unlock range
1880 					 * request - add all locks from the tmp
1881 					 * list to the head of the file's list.
1882 					 */
1883 					cifs_move_llist(&tmp_llist,
1884 							&cfile->llist->locks);
1885 					rc = stored_rc;
1886 				} else
1887 					/*
1888 					 * The unlock range request succeed -
1889 					 * free the tmp list.
1890 					 */
1891 					cifs_free_llist(&tmp_llist);
1892 				cur = buf;
1893 				num = 0;
1894 			} else
1895 				cur++;
1896 		}
1897 		if (num) {
1898 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1899 					       types[i], num, 0, buf);
1900 			if (stored_rc) {
1901 				cifs_move_llist(&tmp_llist,
1902 						&cfile->llist->locks);
1903 				rc = stored_rc;
1904 			} else
1905 				cifs_free_llist(&tmp_llist);
1906 		}
1907 	}
1908 
1909 	up_write(&cinode->lock_sem);
1910 	kfree(buf);
1911 	return rc;
1912 }
1913 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1914 
1915 static int
1916 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1917 	   bool wait_flag, bool posix_lck, int lock, int unlock,
1918 	   unsigned int xid)
1919 {
1920 	int rc = 0;
1921 	__u64 length = cifs_flock_len(flock);
1922 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1923 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1924 	struct TCP_Server_Info *server = tcon->ses->server;
1925 	struct inode *inode = d_inode(cfile->dentry);
1926 
1927 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1928 	if (posix_lck) {
1929 		int posix_lock_type;
1930 
1931 		rc = cifs_posix_lock_set(file, flock);
1932 		if (rc <= FILE_LOCK_DEFERRED)
1933 			return rc;
1934 
1935 		if (type & server->vals->shared_lock_type)
1936 			posix_lock_type = CIFS_RDLCK;
1937 		else
1938 			posix_lock_type = CIFS_WRLCK;
1939 
1940 		if (unlock == 1)
1941 			posix_lock_type = CIFS_UNLCK;
1942 
1943 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1944 				      hash_lockowner(flock->fl_owner),
1945 				      flock->fl_start, length,
1946 				      NULL, posix_lock_type, wait_flag);
1947 		goto out;
1948 	}
1949 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1950 	if (lock) {
1951 		struct cifsLockInfo *lock;
1952 
1953 		lock = cifs_lock_init(flock->fl_start, length, type,
1954 				      flock->fl_flags);
1955 		if (!lock)
1956 			return -ENOMEM;
1957 
1958 		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1959 		if (rc < 0) {
1960 			kfree(lock);
1961 			return rc;
1962 		}
1963 		if (!rc)
1964 			goto out;
1965 
1966 		/*
1967 		 * Windows 7 server can delay breaking lease from read to None
1968 		 * if we set a byte-range lock on a file - break it explicitly
1969 		 * before sending the lock to the server to be sure the next
1970 		 * read won't conflict with non-overlapted locks due to
1971 		 * pagereading.
1972 		 */
1973 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1974 					CIFS_CACHE_READ(CIFS_I(inode))) {
1975 			cifs_zap_mapping(inode);
1976 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1977 				 inode);
1978 			CIFS_I(inode)->oplock = 0;
1979 		}
1980 
1981 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1982 					    type, 1, 0, wait_flag);
1983 		if (rc) {
1984 			kfree(lock);
1985 			return rc;
1986 		}
1987 
1988 		cifs_lock_add(cfile, lock);
1989 	} else if (unlock)
1990 		rc = server->ops->mand_unlock_range(cfile, flock, xid);
1991 
1992 out:
1993 	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1994 		/*
1995 		 * If this is a request to remove all locks because we
1996 		 * are closing the file, it doesn't matter if the
1997 		 * unlocking failed as both cifs.ko and the SMB server
1998 		 * remove the lock on file close
1999 		 */
2000 		if (rc) {
2001 			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2002 			if (!(flock->fl_flags & FL_CLOSE))
2003 				return rc;
2004 		}
2005 		rc = locks_lock_file_wait(file, flock);
2006 	}
2007 	return rc;
2008 }
2009 
2010 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2011 {
2012 	int rc, xid;
2013 	int lock = 0, unlock = 0;
2014 	bool wait_flag = false;
2015 	bool posix_lck = false;
2016 	struct cifs_sb_info *cifs_sb;
2017 	struct cifs_tcon *tcon;
2018 	struct cifsFileInfo *cfile;
2019 	__u32 type;
2020 
2021 	xid = get_xid();
2022 
2023 	if (!(fl->fl_flags & FL_FLOCK)) {
2024 		rc = -ENOLCK;
2025 		free_xid(xid);
2026 		return rc;
2027 	}
2028 
2029 	cfile = (struct cifsFileInfo *)file->private_data;
2030 	tcon = tlink_tcon(cfile->tlink);
2031 
2032 	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2033 			tcon->ses->server);
2034 	cifs_sb = CIFS_FILE_SB(file);
2035 
2036 	if (cap_unix(tcon->ses) &&
2037 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2038 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2039 		posix_lck = true;
2040 
2041 	if (!lock && !unlock) {
2042 		/*
2043 		 * if no lock or unlock then nothing to do since we do not
2044 		 * know what it is
2045 		 */
2046 		rc = -EOPNOTSUPP;
2047 		free_xid(xid);
2048 		return rc;
2049 	}
2050 
2051 	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2052 			xid);
2053 	free_xid(xid);
2054 	return rc;
2055 
2056 
2057 }
2058 
2059 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2060 {
2061 	int rc, xid;
2062 	int lock = 0, unlock = 0;
2063 	bool wait_flag = false;
2064 	bool posix_lck = false;
2065 	struct cifs_sb_info *cifs_sb;
2066 	struct cifs_tcon *tcon;
2067 	struct cifsFileInfo *cfile;
2068 	__u32 type;
2069 
2070 	rc = -EACCES;
2071 	xid = get_xid();
2072 
2073 	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2074 		 flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2075 		 (long long)flock->fl_end);
2076 
2077 	cfile = (struct cifsFileInfo *)file->private_data;
2078 	tcon = tlink_tcon(cfile->tlink);
2079 
2080 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2081 			tcon->ses->server);
2082 	cifs_sb = CIFS_FILE_SB(file);
2083 	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2084 
2085 	if (cap_unix(tcon->ses) &&
2086 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2087 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2088 		posix_lck = true;
2089 	/*
2090 	 * BB add code here to normalize offset and length to account for
2091 	 * negative length which we can not accept over the wire.
2092 	 */
2093 	if (IS_GETLK(cmd)) {
2094 		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2095 		free_xid(xid);
2096 		return rc;
2097 	}
2098 
2099 	if (!lock && !unlock) {
2100 		/*
2101 		 * if no lock or unlock then nothing to do since we do not
2102 		 * know what it is
2103 		 */
2104 		free_xid(xid);
2105 		return -EOPNOTSUPP;
2106 	}
2107 
2108 	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2109 			xid);
2110 	free_xid(xid);
2111 	return rc;
2112 }
2113 
2114 /*
2115  * update the file size (if needed) after a write. Should be called with
2116  * the inode->i_lock held
2117  */
2118 void
2119 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2120 		      unsigned int bytes_written)
2121 {
2122 	loff_t end_of_write = offset + bytes_written;
2123 
2124 	if (end_of_write > cifsi->server_eof)
2125 		cifsi->server_eof = end_of_write;
2126 }
2127 
2128 static ssize_t
2129 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2130 	   size_t write_size, loff_t *offset)
2131 {
2132 	int rc = 0;
2133 	unsigned int bytes_written = 0;
2134 	unsigned int total_written;
2135 	struct cifs_tcon *tcon;
2136 	struct TCP_Server_Info *server;
2137 	unsigned int xid;
2138 	struct dentry *dentry = open_file->dentry;
2139 	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2140 	struct cifs_io_parms io_parms = {0};
2141 
2142 	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2143 		 write_size, *offset, dentry);
2144 
2145 	tcon = tlink_tcon(open_file->tlink);
2146 	server = tcon->ses->server;
2147 
2148 	if (!server->ops->sync_write)
2149 		return -ENOSYS;
2150 
2151 	xid = get_xid();
2152 
2153 	for (total_written = 0; write_size > total_written;
2154 	     total_written += bytes_written) {
2155 		rc = -EAGAIN;
2156 		while (rc == -EAGAIN) {
2157 			struct kvec iov[2];
2158 			unsigned int len;
2159 
2160 			if (open_file->invalidHandle) {
2161 				/* we could deadlock if we called
2162 				   filemap_fdatawait from here so tell
2163 				   reopen_file not to flush data to
2164 				   server now */
2165 				rc = cifs_reopen_file(open_file, false);
2166 				if (rc != 0)
2167 					break;
2168 			}
2169 
2170 			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2171 				  (unsigned int)write_size - total_written);
2172 			/* iov[0] is reserved for smb header */
2173 			iov[1].iov_base = (char *)write_data + total_written;
2174 			iov[1].iov_len = len;
2175 			io_parms.pid = pid;
2176 			io_parms.tcon = tcon;
2177 			io_parms.offset = *offset;
2178 			io_parms.length = len;
2179 			rc = server->ops->sync_write(xid, &open_file->fid,
2180 					&io_parms, &bytes_written, iov, 1);
2181 		}
2182 		if (rc || (bytes_written == 0)) {
2183 			if (total_written)
2184 				break;
2185 			else {
2186 				free_xid(xid);
2187 				return rc;
2188 			}
2189 		} else {
2190 			spin_lock(&d_inode(dentry)->i_lock);
2191 			cifs_update_eof(cifsi, *offset, bytes_written);
2192 			spin_unlock(&d_inode(dentry)->i_lock);
2193 			*offset += bytes_written;
2194 		}
2195 	}
2196 
2197 	cifs_stats_bytes_written(tcon, total_written);
2198 
2199 	if (total_written > 0) {
2200 		spin_lock(&d_inode(dentry)->i_lock);
2201 		if (*offset > d_inode(dentry)->i_size) {
2202 			i_size_write(d_inode(dentry), *offset);
2203 			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2204 		}
2205 		spin_unlock(&d_inode(dentry)->i_lock);
2206 	}
2207 	mark_inode_dirty_sync(d_inode(dentry));
2208 	free_xid(xid);
2209 	return total_written;
2210 }
2211 
2212 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2213 					bool fsuid_only)
2214 {
2215 	struct cifsFileInfo *open_file = NULL;
2216 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2217 
2218 	/* only filter by fsuid on multiuser mounts */
2219 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2220 		fsuid_only = false;
2221 
2222 	spin_lock(&cifs_inode->open_file_lock);
2223 	/* we could simply get the first_list_entry since write-only entries
2224 	   are always at the end of the list but since the first entry might
2225 	   have a close pending, we go through the whole list */
2226 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2227 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2228 			continue;
2229 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2230 			if ((!open_file->invalidHandle)) {
2231 				/* found a good file */
2232 				/* lock it so it will not be closed on us */
2233 				cifsFileInfo_get(open_file);
2234 				spin_unlock(&cifs_inode->open_file_lock);
2235 				return open_file;
2236 			} /* else might as well continue, and look for
2237 			     another, or simply have the caller reopen it
2238 			     again rather than trying to fix this handle */
2239 		} else /* write only file */
2240 			break; /* write only files are last so must be done */
2241 	}
2242 	spin_unlock(&cifs_inode->open_file_lock);
2243 	return NULL;
2244 }
2245 
2246 /* Return -EBADF if no handle is found and general rc otherwise */
2247 int
2248 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2249 		       struct cifsFileInfo **ret_file)
2250 {
2251 	struct cifsFileInfo *open_file, *inv_file = NULL;
2252 	struct cifs_sb_info *cifs_sb;
2253 	bool any_available = false;
2254 	int rc = -EBADF;
2255 	unsigned int refind = 0;
2256 	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2257 	bool with_delete = flags & FIND_WR_WITH_DELETE;
2258 	*ret_file = NULL;
2259 
2260 	/*
2261 	 * Having a null inode here (because mapping->host was set to zero by
2262 	 * the VFS or MM) should not happen but we had reports of on oops (due
2263 	 * to it being zero) during stress testcases so we need to check for it
2264 	 */
2265 
2266 	if (cifs_inode == NULL) {
2267 		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2268 		dump_stack();
2269 		return rc;
2270 	}
2271 
2272 	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2273 
2274 	/* only filter by fsuid on multiuser mounts */
2275 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2276 		fsuid_only = false;
2277 
2278 	spin_lock(&cifs_inode->open_file_lock);
2279 refind_writable:
2280 	if (refind > MAX_REOPEN_ATT) {
2281 		spin_unlock(&cifs_inode->open_file_lock);
2282 		return rc;
2283 	}
2284 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2285 		if (!any_available && open_file->pid != current->tgid)
2286 			continue;
2287 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2288 			continue;
2289 		if (with_delete && !(open_file->fid.access & DELETE))
2290 			continue;
2291 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2292 			if (!open_file->invalidHandle) {
2293 				/* found a good writable file */
2294 				cifsFileInfo_get(open_file);
2295 				spin_unlock(&cifs_inode->open_file_lock);
2296 				*ret_file = open_file;
2297 				return 0;
2298 			} else {
2299 				if (!inv_file)
2300 					inv_file = open_file;
2301 			}
2302 		}
2303 	}
2304 	/* couldn't find useable FH with same pid, try any available */
2305 	if (!any_available) {
2306 		any_available = true;
2307 		goto refind_writable;
2308 	}
2309 
2310 	if (inv_file) {
2311 		any_available = false;
2312 		cifsFileInfo_get(inv_file);
2313 	}
2314 
2315 	spin_unlock(&cifs_inode->open_file_lock);
2316 
2317 	if (inv_file) {
2318 		rc = cifs_reopen_file(inv_file, false);
2319 		if (!rc) {
2320 			*ret_file = inv_file;
2321 			return 0;
2322 		}
2323 
2324 		spin_lock(&cifs_inode->open_file_lock);
2325 		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2326 		spin_unlock(&cifs_inode->open_file_lock);
2327 		cifsFileInfo_put(inv_file);
2328 		++refind;
2329 		inv_file = NULL;
2330 		spin_lock(&cifs_inode->open_file_lock);
2331 		goto refind_writable;
2332 	}
2333 
2334 	return rc;
2335 }
2336 
2337 struct cifsFileInfo *
2338 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2339 {
2340 	struct cifsFileInfo *cfile;
2341 	int rc;
2342 
2343 	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2344 	if (rc)
2345 		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2346 
2347 	return cfile;
2348 }
2349 
2350 int
2351 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2352 		       int flags,
2353 		       struct cifsFileInfo **ret_file)
2354 {
2355 	struct cifsFileInfo *cfile;
2356 	void *page = alloc_dentry_path();
2357 
2358 	*ret_file = NULL;
2359 
2360 	spin_lock(&tcon->open_file_lock);
2361 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2362 		struct cifsInodeInfo *cinode;
2363 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2364 		if (IS_ERR(full_path)) {
2365 			spin_unlock(&tcon->open_file_lock);
2366 			free_dentry_path(page);
2367 			return PTR_ERR(full_path);
2368 		}
2369 		if (strcmp(full_path, name))
2370 			continue;
2371 
2372 		cinode = CIFS_I(d_inode(cfile->dentry));
2373 		spin_unlock(&tcon->open_file_lock);
2374 		free_dentry_path(page);
2375 		return cifs_get_writable_file(cinode, flags, ret_file);
2376 	}
2377 
2378 	spin_unlock(&tcon->open_file_lock);
2379 	free_dentry_path(page);
2380 	return -ENOENT;
2381 }
2382 
2383 int
2384 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2385 		       struct cifsFileInfo **ret_file)
2386 {
2387 	struct cifsFileInfo *cfile;
2388 	void *page = alloc_dentry_path();
2389 
2390 	*ret_file = NULL;
2391 
2392 	spin_lock(&tcon->open_file_lock);
2393 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2394 		struct cifsInodeInfo *cinode;
2395 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2396 		if (IS_ERR(full_path)) {
2397 			spin_unlock(&tcon->open_file_lock);
2398 			free_dentry_path(page);
2399 			return PTR_ERR(full_path);
2400 		}
2401 		if (strcmp(full_path, name))
2402 			continue;
2403 
2404 		cinode = CIFS_I(d_inode(cfile->dentry));
2405 		spin_unlock(&tcon->open_file_lock);
2406 		free_dentry_path(page);
2407 		*ret_file = find_readable_file(cinode, 0);
2408 		return *ret_file ? 0 : -ENOENT;
2409 	}
2410 
2411 	spin_unlock(&tcon->open_file_lock);
2412 	free_dentry_path(page);
2413 	return -ENOENT;
2414 }
2415 
2416 void
2417 cifs_writedata_release(struct kref *refcount)
2418 {
2419 	struct cifs_writedata *wdata = container_of(refcount,
2420 					struct cifs_writedata, refcount);
2421 #ifdef CONFIG_CIFS_SMB_DIRECT
2422 	if (wdata->mr) {
2423 		smbd_deregister_mr(wdata->mr);
2424 		wdata->mr = NULL;
2425 	}
2426 #endif
2427 
2428 	if (wdata->cfile)
2429 		cifsFileInfo_put(wdata->cfile);
2430 
2431 	kfree(wdata);
2432 }
2433 
2434 /*
2435  * Write failed with a retryable error. Resend the write request. It's also
2436  * possible that the page was redirtied so re-clean the page.
2437  */
2438 static void
2439 cifs_writev_requeue(struct cifs_writedata *wdata)
2440 {
2441 	int rc = 0;
2442 	struct inode *inode = d_inode(wdata->cfile->dentry);
2443 	struct TCP_Server_Info *server;
2444 	unsigned int rest_len = wdata->bytes;
2445 	loff_t fpos = wdata->offset;
2446 
2447 	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2448 	do {
2449 		struct cifs_writedata *wdata2;
2450 		unsigned int wsize, cur_len;
2451 
2452 		wsize = server->ops->wp_retry_size(inode);
2453 		if (wsize < rest_len) {
2454 			if (wsize < PAGE_SIZE) {
2455 				rc = -EOPNOTSUPP;
2456 				break;
2457 			}
2458 			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2459 		} else {
2460 			cur_len = rest_len;
2461 		}
2462 
2463 		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2464 		if (!wdata2) {
2465 			rc = -ENOMEM;
2466 			break;
2467 		}
2468 
2469 		wdata2->sync_mode = wdata->sync_mode;
2470 		wdata2->offset	= fpos;
2471 		wdata2->bytes	= cur_len;
2472 		wdata2->iter	= wdata->iter;
2473 
2474 		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2475 		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2476 
2477 		if (iov_iter_is_xarray(&wdata2->iter))
2478 			/* Check for pages having been redirtied and clean
2479 			 * them.  We can do this by walking the xarray.  If
2480 			 * it's not an xarray, then it's a DIO and we shouldn't
2481 			 * be mucking around with the page bits.
2482 			 */
2483 			cifs_undirty_folios(inode, fpos, cur_len);
2484 
2485 		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2486 					    &wdata2->cfile);
2487 		if (!wdata2->cfile) {
2488 			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2489 				 rc);
2490 			if (!is_retryable_error(rc))
2491 				rc = -EBADF;
2492 		} else {
2493 			wdata2->pid = wdata2->cfile->pid;
2494 			rc = server->ops->async_writev(wdata2,
2495 						       cifs_writedata_release);
2496 		}
2497 
2498 		kref_put(&wdata2->refcount, cifs_writedata_release);
2499 		if (rc) {
2500 			if (is_retryable_error(rc))
2501 				continue;
2502 			fpos += cur_len;
2503 			rest_len -= cur_len;
2504 			break;
2505 		}
2506 
2507 		fpos += cur_len;
2508 		rest_len -= cur_len;
2509 	} while (rest_len > 0);
2510 
2511 	/* Clean up remaining pages from the original wdata */
2512 	if (iov_iter_is_xarray(&wdata->iter))
2513 		cifs_pages_write_failed(inode, fpos, rest_len);
2514 
2515 	if (rc != 0 && !is_retryable_error(rc))
2516 		mapping_set_error(inode->i_mapping, rc);
2517 	kref_put(&wdata->refcount, cifs_writedata_release);
2518 }
2519 
2520 void
2521 cifs_writev_complete(struct work_struct *work)
2522 {
2523 	struct cifs_writedata *wdata = container_of(work,
2524 						struct cifs_writedata, work);
2525 	struct inode *inode = d_inode(wdata->cfile->dentry);
2526 
2527 	if (wdata->result == 0) {
2528 		spin_lock(&inode->i_lock);
2529 		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2530 		spin_unlock(&inode->i_lock);
2531 		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2532 					 wdata->bytes);
2533 	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2534 		return cifs_writev_requeue(wdata);
2535 
2536 	if (wdata->result == -EAGAIN)
2537 		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2538 	else if (wdata->result < 0)
2539 		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2540 	else
2541 		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2542 
2543 	if (wdata->result != -EAGAIN)
2544 		mapping_set_error(inode->i_mapping, wdata->result);
2545 	kref_put(&wdata->refcount, cifs_writedata_release);
2546 }
2547 
2548 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2549 {
2550 	struct cifs_writedata *wdata;
2551 
2552 	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2553 	if (wdata != NULL) {
2554 		kref_init(&wdata->refcount);
2555 		INIT_LIST_HEAD(&wdata->list);
2556 		init_completion(&wdata->done);
2557 		INIT_WORK(&wdata->work, complete);
2558 	}
2559 	return wdata;
2560 }
2561 
2562 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2563 {
2564 	struct address_space *mapping = page->mapping;
2565 	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2566 	char *write_data;
2567 	int rc = -EFAULT;
2568 	int bytes_written = 0;
2569 	struct inode *inode;
2570 	struct cifsFileInfo *open_file;
2571 
2572 	if (!mapping || !mapping->host)
2573 		return -EFAULT;
2574 
2575 	inode = page->mapping->host;
2576 
2577 	offset += (loff_t)from;
2578 	write_data = kmap(page);
2579 	write_data += from;
2580 
2581 	if ((to > PAGE_SIZE) || (from > to)) {
2582 		kunmap(page);
2583 		return -EIO;
2584 	}
2585 
2586 	/* racing with truncate? */
2587 	if (offset > mapping->host->i_size) {
2588 		kunmap(page);
2589 		return 0; /* don't care */
2590 	}
2591 
2592 	/* check to make sure that we are not extending the file */
2593 	if (mapping->host->i_size - offset < (loff_t)to)
2594 		to = (unsigned)(mapping->host->i_size - offset);
2595 
2596 	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2597 				    &open_file);
2598 	if (!rc) {
2599 		bytes_written = cifs_write(open_file, open_file->pid,
2600 					   write_data, to - from, &offset);
2601 		cifsFileInfo_put(open_file);
2602 		/* Does mm or vfs already set times? */
2603 		simple_inode_init_ts(inode);
2604 		if ((bytes_written > 0) && (offset))
2605 			rc = 0;
2606 		else if (bytes_written < 0)
2607 			rc = bytes_written;
2608 		else
2609 			rc = -EFAULT;
2610 	} else {
2611 		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2612 		if (!is_retryable_error(rc))
2613 			rc = -EIO;
2614 	}
2615 
2616 	kunmap(page);
2617 	return rc;
2618 }
2619 
2620 /*
2621  * Extend the region to be written back to include subsequent contiguously
2622  * dirty pages if possible, but don't sleep while doing so.
2623  */
2624 static void cifs_extend_writeback(struct address_space *mapping,
2625 				  struct xa_state *xas,
2626 				  long *_count,
2627 				  loff_t start,
2628 				  int max_pages,
2629 				  loff_t max_len,
2630 				  size_t *_len)
2631 {
2632 	struct folio_batch batch;
2633 	struct folio *folio;
2634 	unsigned int nr_pages;
2635 	pgoff_t index = (start + *_len) / PAGE_SIZE;
2636 	size_t len;
2637 	bool stop = true;
2638 	unsigned int i;
2639 
2640 	folio_batch_init(&batch);
2641 
2642 	do {
2643 		/* Firstly, we gather up a batch of contiguous dirty pages
2644 		 * under the RCU read lock - but we can't clear the dirty flags
2645 		 * there if any of those pages are mapped.
2646 		 */
2647 		rcu_read_lock();
2648 
2649 		xas_for_each(xas, folio, ULONG_MAX) {
2650 			stop = true;
2651 			if (xas_retry(xas, folio))
2652 				continue;
2653 			if (xa_is_value(folio))
2654 				break;
2655 			if (folio->index != index) {
2656 				xas_reset(xas);
2657 				break;
2658 			}
2659 
2660 			if (!folio_try_get_rcu(folio)) {
2661 				xas_reset(xas);
2662 				continue;
2663 			}
2664 			nr_pages = folio_nr_pages(folio);
2665 			if (nr_pages > max_pages) {
2666 				xas_reset(xas);
2667 				break;
2668 			}
2669 
2670 			/* Has the page moved or been split? */
2671 			if (unlikely(folio != xas_reload(xas))) {
2672 				folio_put(folio);
2673 				xas_reset(xas);
2674 				break;
2675 			}
2676 
2677 			if (!folio_trylock(folio)) {
2678 				folio_put(folio);
2679 				xas_reset(xas);
2680 				break;
2681 			}
2682 			if (!folio_test_dirty(folio) ||
2683 			    folio_test_writeback(folio)) {
2684 				folio_unlock(folio);
2685 				folio_put(folio);
2686 				xas_reset(xas);
2687 				break;
2688 			}
2689 
2690 			max_pages -= nr_pages;
2691 			len = folio_size(folio);
2692 			stop = false;
2693 
2694 			index += nr_pages;
2695 			*_count -= nr_pages;
2696 			*_len += len;
2697 			if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2698 				stop = true;
2699 
2700 			if (!folio_batch_add(&batch, folio))
2701 				break;
2702 			if (stop)
2703 				break;
2704 		}
2705 
2706 		xas_pause(xas);
2707 		rcu_read_unlock();
2708 
2709 		/* Now, if we obtained any pages, we can shift them to being
2710 		 * writable and mark them for caching.
2711 		 */
2712 		if (!folio_batch_count(&batch))
2713 			break;
2714 
2715 		for (i = 0; i < folio_batch_count(&batch); i++) {
2716 			folio = batch.folios[i];
2717 			/* The folio should be locked, dirty and not undergoing
2718 			 * writeback from the loop above.
2719 			 */
2720 			if (!folio_clear_dirty_for_io(folio))
2721 				WARN_ON(1);
2722 			folio_start_writeback(folio);
2723 			folio_unlock(folio);
2724 		}
2725 
2726 		folio_batch_release(&batch);
2727 		cond_resched();
2728 	} while (!stop);
2729 }
2730 
2731 /*
2732  * Write back the locked page and any subsequent non-locked dirty pages.
2733  */
2734 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2735 						 struct writeback_control *wbc,
2736 						 struct xa_state *xas,
2737 						 struct folio *folio,
2738 						 unsigned long long start,
2739 						 unsigned long long end)
2740 {
2741 	struct inode *inode = mapping->host;
2742 	struct TCP_Server_Info *server;
2743 	struct cifs_writedata *wdata;
2744 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2745 	struct cifs_credits credits_on_stack;
2746 	struct cifs_credits *credits = &credits_on_stack;
2747 	struct cifsFileInfo *cfile = NULL;
2748 	unsigned long long i_size = i_size_read(inode), max_len;
2749 	unsigned int xid, wsize;
2750 	size_t len = folio_size(folio);
2751 	long count = wbc->nr_to_write;
2752 	int rc;
2753 
2754 	/* The folio should be locked, dirty and not undergoing writeback. */
2755 	if (!folio_clear_dirty_for_io(folio))
2756 		WARN_ON_ONCE(1);
2757 	folio_start_writeback(folio);
2758 
2759 	count -= folio_nr_pages(folio);
2760 
2761 	xid = get_xid();
2762 	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2763 
2764 	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2765 	if (rc) {
2766 		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2767 		goto err_xid;
2768 	}
2769 
2770 	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2771 					   &wsize, credits);
2772 	if (rc != 0)
2773 		goto err_close;
2774 
2775 	wdata = cifs_writedata_alloc(cifs_writev_complete);
2776 	if (!wdata) {
2777 		rc = -ENOMEM;
2778 		goto err_uncredit;
2779 	}
2780 
2781 	wdata->sync_mode = wbc->sync_mode;
2782 	wdata->offset = folio_pos(folio);
2783 	wdata->pid = cfile->pid;
2784 	wdata->credits = credits_on_stack;
2785 	wdata->cfile = cfile;
2786 	wdata->server = server;
2787 	cfile = NULL;
2788 
2789 	/* Find all consecutive lockable dirty pages that have contiguous
2790 	 * written regions, stopping when we find a page that is not
2791 	 * immediately lockable, is not dirty or is missing, or we reach the
2792 	 * end of the range.
2793 	 */
2794 	if (start < i_size) {
2795 		/* Trim the write to the EOF; the extra data is ignored.  Also
2796 		 * put an upper limit on the size of a single storedata op.
2797 		 */
2798 		max_len = wsize;
2799 		max_len = min_t(unsigned long long, max_len, end - start + 1);
2800 		max_len = min_t(unsigned long long, max_len, i_size - start);
2801 
2802 		if (len < max_len) {
2803 			int max_pages = INT_MAX;
2804 
2805 #ifdef CONFIG_CIFS_SMB_DIRECT
2806 			if (server->smbd_conn)
2807 				max_pages = server->smbd_conn->max_frmr_depth;
2808 #endif
2809 			max_pages -= folio_nr_pages(folio);
2810 
2811 			if (max_pages > 0)
2812 				cifs_extend_writeback(mapping, xas, &count, start,
2813 						      max_pages, max_len, &len);
2814 		}
2815 	}
2816 	len = min_t(unsigned long long, len, i_size - start);
2817 
2818 	/* We now have a contiguous set of dirty pages, each with writeback
2819 	 * set; the first page is still locked at this point, but all the rest
2820 	 * have been unlocked.
2821 	 */
2822 	folio_unlock(folio);
2823 	wdata->bytes = len;
2824 
2825 	if (start < i_size) {
2826 		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2827 				start, len);
2828 
2829 		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2830 		if (rc)
2831 			goto err_wdata;
2832 
2833 		if (wdata->cfile->invalidHandle)
2834 			rc = -EAGAIN;
2835 		else
2836 			rc = wdata->server->ops->async_writev(wdata,
2837 							      cifs_writedata_release);
2838 		if (rc >= 0) {
2839 			kref_put(&wdata->refcount, cifs_writedata_release);
2840 			goto err_close;
2841 		}
2842 	} else {
2843 		/* The dirty region was entirely beyond the EOF. */
2844 		cifs_pages_written_back(inode, start, len);
2845 		rc = 0;
2846 	}
2847 
2848 err_wdata:
2849 	kref_put(&wdata->refcount, cifs_writedata_release);
2850 err_uncredit:
2851 	add_credits_and_wake_if(server, credits, 0);
2852 err_close:
2853 	if (cfile)
2854 		cifsFileInfo_put(cfile);
2855 err_xid:
2856 	free_xid(xid);
2857 	if (rc == 0) {
2858 		wbc->nr_to_write = count;
2859 		rc = len;
2860 	} else if (is_retryable_error(rc)) {
2861 		cifs_pages_write_redirty(inode, start, len);
2862 	} else {
2863 		cifs_pages_write_failed(inode, start, len);
2864 		mapping_set_error(mapping, rc);
2865 	}
2866 	/* Indication to update ctime and mtime as close is deferred */
2867 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2868 	return rc;
2869 }
2870 
2871 /*
2872  * write a region of pages back to the server
2873  */
2874 static ssize_t cifs_writepages_begin(struct address_space *mapping,
2875 				     struct writeback_control *wbc,
2876 				     struct xa_state *xas,
2877 				     unsigned long long *_start,
2878 				     unsigned long long end)
2879 {
2880 	struct folio *folio;
2881 	unsigned long long start = *_start;
2882 	ssize_t ret;
2883 	int skips = 0;
2884 
2885 search_again:
2886 	/* Find the first dirty page. */
2887 	rcu_read_lock();
2888 
2889 	for (;;) {
2890 		folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2891 		if (xas_retry(xas, folio) || xa_is_value(folio))
2892 			continue;
2893 		if (!folio)
2894 			break;
2895 
2896 		if (!folio_try_get_rcu(folio)) {
2897 			xas_reset(xas);
2898 			continue;
2899 		}
2900 
2901 		if (unlikely(folio != xas_reload(xas))) {
2902 			folio_put(folio);
2903 			xas_reset(xas);
2904 			continue;
2905 		}
2906 
2907 		xas_pause(xas);
2908 		break;
2909 	}
2910 	rcu_read_unlock();
2911 	if (!folio)
2912 		return 0;
2913 
2914 	start = folio_pos(folio); /* May regress with THPs */
2915 
2916 	/* At this point we hold neither the i_pages lock nor the page lock:
2917 	 * the page may be truncated or invalidated (changing page->mapping to
2918 	 * NULL), or even swizzled back from swapper_space to tmpfs file
2919 	 * mapping
2920 	 */
2921 lock_again:
2922 	if (wbc->sync_mode != WB_SYNC_NONE) {
2923 		ret = folio_lock_killable(folio);
2924 		if (ret < 0)
2925 			return ret;
2926 	} else {
2927 		if (!folio_trylock(folio))
2928 			goto search_again;
2929 	}
2930 
2931 	if (folio->mapping != mapping ||
2932 	    !folio_test_dirty(folio)) {
2933 		start += folio_size(folio);
2934 		folio_unlock(folio);
2935 		goto search_again;
2936 	}
2937 
2938 	if (folio_test_writeback(folio) ||
2939 	    folio_test_fscache(folio)) {
2940 		folio_unlock(folio);
2941 		if (wbc->sync_mode != WB_SYNC_NONE) {
2942 			folio_wait_writeback(folio);
2943 #ifdef CONFIG_CIFS_FSCACHE
2944 			folio_wait_fscache(folio);
2945 #endif
2946 			goto lock_again;
2947 		}
2948 
2949 		start += folio_size(folio);
2950 		if (wbc->sync_mode == WB_SYNC_NONE) {
2951 			if (skips >= 5 || need_resched()) {
2952 				ret = 0;
2953 				goto out;
2954 			}
2955 			skips++;
2956 		}
2957 		goto search_again;
2958 	}
2959 
2960 	ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
2961 out:
2962 	if (ret > 0)
2963 		*_start = start + ret;
2964 	return ret;
2965 }
2966 
2967 /*
2968  * Write a region of pages back to the server
2969  */
2970 static int cifs_writepages_region(struct address_space *mapping,
2971 				  struct writeback_control *wbc,
2972 				  unsigned long long *_start,
2973 				  unsigned long long end)
2974 {
2975 	ssize_t ret;
2976 
2977 	XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
2978 
2979 	do {
2980 		ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
2981 		if (ret > 0 && wbc->nr_to_write > 0)
2982 			cond_resched();
2983 	} while (ret > 0 && wbc->nr_to_write > 0);
2984 
2985 	return ret > 0 ? 0 : ret;
2986 }
2987 
2988 /*
2989  * Write some of the pending data back to the server
2990  */
2991 static int cifs_writepages(struct address_space *mapping,
2992 			   struct writeback_control *wbc)
2993 {
2994 	loff_t start, end;
2995 	int ret;
2996 
2997 	/* We have to be careful as we can end up racing with setattr()
2998 	 * truncating the pagecache since the caller doesn't take a lock here
2999 	 * to prevent it.
3000 	 */
3001 
3002 	if (wbc->range_cyclic && mapping->writeback_index) {
3003 		start = mapping->writeback_index * PAGE_SIZE;
3004 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3005 		if (ret < 0)
3006 			goto out;
3007 
3008 		if (wbc->nr_to_write <= 0) {
3009 			mapping->writeback_index = start / PAGE_SIZE;
3010 			goto out;
3011 		}
3012 
3013 		start = 0;
3014 		end = mapping->writeback_index * PAGE_SIZE;
3015 		mapping->writeback_index = 0;
3016 		ret = cifs_writepages_region(mapping, wbc, &start, end);
3017 		if (ret == 0)
3018 			mapping->writeback_index = start / PAGE_SIZE;
3019 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3020 		start = 0;
3021 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3022 		if (wbc->nr_to_write > 0 && ret == 0)
3023 			mapping->writeback_index = start / PAGE_SIZE;
3024 	} else {
3025 		start = wbc->range_start;
3026 		ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3027 	}
3028 
3029 out:
3030 	return ret;
3031 }
3032 
3033 static int
3034 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3035 {
3036 	int rc;
3037 	unsigned int xid;
3038 
3039 	xid = get_xid();
3040 /* BB add check for wbc flags */
3041 	get_page(page);
3042 	if (!PageUptodate(page))
3043 		cifs_dbg(FYI, "ppw - page not up to date\n");
3044 
3045 	/*
3046 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3047 	 *
3048 	 * A writepage() implementation always needs to do either this,
3049 	 * or re-dirty the page with "redirty_page_for_writepage()" in
3050 	 * the case of a failure.
3051 	 *
3052 	 * Just unlocking the page will cause the radix tree tag-bits
3053 	 * to fail to update with the state of the page correctly.
3054 	 */
3055 	set_page_writeback(page);
3056 retry_write:
3057 	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3058 	if (is_retryable_error(rc)) {
3059 		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3060 			goto retry_write;
3061 		redirty_page_for_writepage(wbc, page);
3062 	} else if (rc != 0) {
3063 		SetPageError(page);
3064 		mapping_set_error(page->mapping, rc);
3065 	} else {
3066 		SetPageUptodate(page);
3067 	}
3068 	end_page_writeback(page);
3069 	put_page(page);
3070 	free_xid(xid);
3071 	return rc;
3072 }
3073 
3074 static int cifs_write_end(struct file *file, struct address_space *mapping,
3075 			loff_t pos, unsigned len, unsigned copied,
3076 			struct page *page, void *fsdata)
3077 {
3078 	int rc;
3079 	struct inode *inode = mapping->host;
3080 	struct cifsFileInfo *cfile = file->private_data;
3081 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3082 	struct folio *folio = page_folio(page);
3083 	__u32 pid;
3084 
3085 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3086 		pid = cfile->pid;
3087 	else
3088 		pid = current->tgid;
3089 
3090 	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3091 		 page, pos, copied);
3092 
3093 	if (folio_test_checked(folio)) {
3094 		if (copied == len)
3095 			folio_mark_uptodate(folio);
3096 		folio_clear_checked(folio);
3097 	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3098 		folio_mark_uptodate(folio);
3099 
3100 	if (!folio_test_uptodate(folio)) {
3101 		char *page_data;
3102 		unsigned offset = pos & (PAGE_SIZE - 1);
3103 		unsigned int xid;
3104 
3105 		xid = get_xid();
3106 		/* this is probably better than directly calling
3107 		   partialpage_write since in this function the file handle is
3108 		   known which we might as well	leverage */
3109 		/* BB check if anything else missing out of ppw
3110 		   such as updating last write time */
3111 		page_data = kmap(page);
3112 		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3113 		/* if (rc < 0) should we set writebehind rc? */
3114 		kunmap(page);
3115 
3116 		free_xid(xid);
3117 	} else {
3118 		rc = copied;
3119 		pos += copied;
3120 		set_page_dirty(page);
3121 	}
3122 
3123 	if (rc > 0) {
3124 		spin_lock(&inode->i_lock);
3125 		if (pos > inode->i_size) {
3126 			i_size_write(inode, pos);
3127 			inode->i_blocks = (512 - 1 + pos) >> 9;
3128 		}
3129 		spin_unlock(&inode->i_lock);
3130 	}
3131 
3132 	unlock_page(page);
3133 	put_page(page);
3134 	/* Indication to update ctime and mtime as close is deferred */
3135 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3136 
3137 	return rc;
3138 }
3139 
3140 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3141 		      int datasync)
3142 {
3143 	unsigned int xid;
3144 	int rc = 0;
3145 	struct cifs_tcon *tcon;
3146 	struct TCP_Server_Info *server;
3147 	struct cifsFileInfo *smbfile = file->private_data;
3148 	struct inode *inode = file_inode(file);
3149 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3150 
3151 	rc = file_write_and_wait_range(file, start, end);
3152 	if (rc) {
3153 		trace_cifs_fsync_err(inode->i_ino, rc);
3154 		return rc;
3155 	}
3156 
3157 	xid = get_xid();
3158 
3159 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3160 		 file, datasync);
3161 
3162 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3163 		rc = cifs_zap_mapping(inode);
3164 		if (rc) {
3165 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3166 			rc = 0; /* don't care about it in fsync */
3167 		}
3168 	}
3169 
3170 	tcon = tlink_tcon(smbfile->tlink);
3171 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3172 		server = tcon->ses->server;
3173 		if (server->ops->flush == NULL) {
3174 			rc = -ENOSYS;
3175 			goto strict_fsync_exit;
3176 		}
3177 
3178 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3179 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3180 			if (smbfile) {
3181 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3182 				cifsFileInfo_put(smbfile);
3183 			} else
3184 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3185 		} else
3186 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3187 	}
3188 
3189 strict_fsync_exit:
3190 	free_xid(xid);
3191 	return rc;
3192 }
3193 
3194 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3195 {
3196 	unsigned int xid;
3197 	int rc = 0;
3198 	struct cifs_tcon *tcon;
3199 	struct TCP_Server_Info *server;
3200 	struct cifsFileInfo *smbfile = file->private_data;
3201 	struct inode *inode = file_inode(file);
3202 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3203 
3204 	rc = file_write_and_wait_range(file, start, end);
3205 	if (rc) {
3206 		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3207 		return rc;
3208 	}
3209 
3210 	xid = get_xid();
3211 
3212 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3213 		 file, datasync);
3214 
3215 	tcon = tlink_tcon(smbfile->tlink);
3216 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3217 		server = tcon->ses->server;
3218 		if (server->ops->flush == NULL) {
3219 			rc = -ENOSYS;
3220 			goto fsync_exit;
3221 		}
3222 
3223 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3224 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3225 			if (smbfile) {
3226 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3227 				cifsFileInfo_put(smbfile);
3228 			} else
3229 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3230 		} else
3231 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3232 	}
3233 
3234 fsync_exit:
3235 	free_xid(xid);
3236 	return rc;
3237 }
3238 
3239 /*
3240  * As file closes, flush all cached write data for this inode checking
3241  * for write behind errors.
3242  */
3243 int cifs_flush(struct file *file, fl_owner_t id)
3244 {
3245 	struct inode *inode = file_inode(file);
3246 	int rc = 0;
3247 
3248 	if (file->f_mode & FMODE_WRITE)
3249 		rc = filemap_write_and_wait(inode->i_mapping);
3250 
3251 	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3252 	if (rc) {
3253 		/* get more nuanced writeback errors */
3254 		rc = filemap_check_wb_err(file->f_mapping, 0);
3255 		trace_cifs_flush_err(inode->i_ino, rc);
3256 	}
3257 	return rc;
3258 }
3259 
3260 static void
3261 cifs_uncached_writedata_release(struct kref *refcount)
3262 {
3263 	struct cifs_writedata *wdata = container_of(refcount,
3264 					struct cifs_writedata, refcount);
3265 
3266 	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3267 	cifs_writedata_release(refcount);
3268 }
3269 
3270 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3271 
3272 static void
3273 cifs_uncached_writev_complete(struct work_struct *work)
3274 {
3275 	struct cifs_writedata *wdata = container_of(work,
3276 					struct cifs_writedata, work);
3277 	struct inode *inode = d_inode(wdata->cfile->dentry);
3278 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3279 
3280 	spin_lock(&inode->i_lock);
3281 	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3282 	if (cifsi->server_eof > inode->i_size)
3283 		i_size_write(inode, cifsi->server_eof);
3284 	spin_unlock(&inode->i_lock);
3285 
3286 	complete(&wdata->done);
3287 	collect_uncached_write_data(wdata->ctx);
3288 	/* the below call can possibly free the last ref to aio ctx */
3289 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3290 }
3291 
3292 static int
3293 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3294 	struct cifs_aio_ctx *ctx)
3295 {
3296 	unsigned int wsize;
3297 	struct cifs_credits credits;
3298 	int rc;
3299 	struct TCP_Server_Info *server = wdata->server;
3300 
3301 	do {
3302 		if (wdata->cfile->invalidHandle) {
3303 			rc = cifs_reopen_file(wdata->cfile, false);
3304 			if (rc == -EAGAIN)
3305 				continue;
3306 			else if (rc)
3307 				break;
3308 		}
3309 
3310 
3311 		/*
3312 		 * Wait for credits to resend this wdata.
3313 		 * Note: we are attempting to resend the whole wdata not in
3314 		 * segments
3315 		 */
3316 		do {
3317 			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3318 						&wsize, &credits);
3319 			if (rc)
3320 				goto fail;
3321 
3322 			if (wsize < wdata->bytes) {
3323 				add_credits_and_wake_if(server, &credits, 0);
3324 				msleep(1000);
3325 			}
3326 		} while (wsize < wdata->bytes);
3327 		wdata->credits = credits;
3328 
3329 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3330 
3331 		if (!rc) {
3332 			if (wdata->cfile->invalidHandle)
3333 				rc = -EAGAIN;
3334 			else {
3335 #ifdef CONFIG_CIFS_SMB_DIRECT
3336 				if (wdata->mr) {
3337 					wdata->mr->need_invalidate = true;
3338 					smbd_deregister_mr(wdata->mr);
3339 					wdata->mr = NULL;
3340 				}
3341 #endif
3342 				rc = server->ops->async_writev(wdata,
3343 					cifs_uncached_writedata_release);
3344 			}
3345 		}
3346 
3347 		/* If the write was successfully sent, we are done */
3348 		if (!rc) {
3349 			list_add_tail(&wdata->list, wdata_list);
3350 			return 0;
3351 		}
3352 
3353 		/* Roll back credits and retry if needed */
3354 		add_credits_and_wake_if(server, &wdata->credits, 0);
3355 	} while (rc == -EAGAIN);
3356 
3357 fail:
3358 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3359 	return rc;
3360 }
3361 
3362 /*
3363  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3364  * size and maximum number of segments.
3365  */
3366 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3367 				     size_t max_segs, unsigned int *_nsegs)
3368 {
3369 	const struct bio_vec *bvecs = iter->bvec;
3370 	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3371 	size_t len, span = 0, n = iter->count;
3372 	size_t skip = iter->iov_offset;
3373 
3374 	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3375 		return 0;
3376 
3377 	while (n && ix < nbv && skip) {
3378 		len = bvecs[ix].bv_len;
3379 		if (skip < len)
3380 			break;
3381 		skip -= len;
3382 		n -= len;
3383 		ix++;
3384 	}
3385 
3386 	while (n && ix < nbv) {
3387 		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3388 		span += len;
3389 		max_size -= len;
3390 		nsegs++;
3391 		ix++;
3392 		if (max_size == 0 || nsegs >= max_segs)
3393 			break;
3394 		skip = 0;
3395 		n -= len;
3396 	}
3397 
3398 	*_nsegs = nsegs;
3399 	return span;
3400 }
3401 
3402 static int
3403 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3404 		     struct cifsFileInfo *open_file,
3405 		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3406 		     struct cifs_aio_ctx *ctx)
3407 {
3408 	int rc = 0;
3409 	size_t cur_len, max_len;
3410 	struct cifs_writedata *wdata;
3411 	pid_t pid;
3412 	struct TCP_Server_Info *server;
3413 	unsigned int xid, max_segs = INT_MAX;
3414 
3415 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3416 		pid = open_file->pid;
3417 	else
3418 		pid = current->tgid;
3419 
3420 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3421 	xid = get_xid();
3422 
3423 #ifdef CONFIG_CIFS_SMB_DIRECT
3424 	if (server->smbd_conn)
3425 		max_segs = server->smbd_conn->max_frmr_depth;
3426 #endif
3427 
3428 	do {
3429 		struct cifs_credits credits_on_stack;
3430 		struct cifs_credits *credits = &credits_on_stack;
3431 		unsigned int wsize, nsegs = 0;
3432 
3433 		if (signal_pending(current)) {
3434 			rc = -EINTR;
3435 			break;
3436 		}
3437 
3438 		if (open_file->invalidHandle) {
3439 			rc = cifs_reopen_file(open_file, false);
3440 			if (rc == -EAGAIN)
3441 				continue;
3442 			else if (rc)
3443 				break;
3444 		}
3445 
3446 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3447 						   &wsize, credits);
3448 		if (rc)
3449 			break;
3450 
3451 		max_len = min_t(const size_t, len, wsize);
3452 		if (!max_len) {
3453 			rc = -EAGAIN;
3454 			add_credits_and_wake_if(server, credits, 0);
3455 			break;
3456 		}
3457 
3458 		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3459 		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3460 			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3461 		if (cur_len == 0) {
3462 			rc = -EIO;
3463 			add_credits_and_wake_if(server, credits, 0);
3464 			break;
3465 		}
3466 
3467 		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3468 		if (!wdata) {
3469 			rc = -ENOMEM;
3470 			add_credits_and_wake_if(server, credits, 0);
3471 			break;
3472 		}
3473 
3474 		wdata->sync_mode = WB_SYNC_ALL;
3475 		wdata->offset	= (__u64)fpos;
3476 		wdata->cfile	= cifsFileInfo_get(open_file);
3477 		wdata->server	= server;
3478 		wdata->pid	= pid;
3479 		wdata->bytes	= cur_len;
3480 		wdata->credits	= credits_on_stack;
3481 		wdata->iter	= *from;
3482 		wdata->ctx	= ctx;
3483 		kref_get(&ctx->refcount);
3484 
3485 		iov_iter_truncate(&wdata->iter, cur_len);
3486 
3487 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3488 
3489 		if (!rc) {
3490 			if (wdata->cfile->invalidHandle)
3491 				rc = -EAGAIN;
3492 			else
3493 				rc = server->ops->async_writev(wdata,
3494 					cifs_uncached_writedata_release);
3495 		}
3496 
3497 		if (rc) {
3498 			add_credits_and_wake_if(server, &wdata->credits, 0);
3499 			kref_put(&wdata->refcount,
3500 				 cifs_uncached_writedata_release);
3501 			if (rc == -EAGAIN)
3502 				continue;
3503 			break;
3504 		}
3505 
3506 		list_add_tail(&wdata->list, wdata_list);
3507 		iov_iter_advance(from, cur_len);
3508 		fpos += cur_len;
3509 		len -= cur_len;
3510 	} while (len > 0);
3511 
3512 	free_xid(xid);
3513 	return rc;
3514 }
3515 
3516 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3517 {
3518 	struct cifs_writedata *wdata, *tmp;
3519 	struct cifs_tcon *tcon;
3520 	struct cifs_sb_info *cifs_sb;
3521 	struct dentry *dentry = ctx->cfile->dentry;
3522 	ssize_t rc;
3523 
3524 	tcon = tlink_tcon(ctx->cfile->tlink);
3525 	cifs_sb = CIFS_SB(dentry->d_sb);
3526 
3527 	mutex_lock(&ctx->aio_mutex);
3528 
3529 	if (list_empty(&ctx->list)) {
3530 		mutex_unlock(&ctx->aio_mutex);
3531 		return;
3532 	}
3533 
3534 	rc = ctx->rc;
3535 	/*
3536 	 * Wait for and collect replies for any successful sends in order of
3537 	 * increasing offset. Once an error is hit, then return without waiting
3538 	 * for any more replies.
3539 	 */
3540 restart_loop:
3541 	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3542 		if (!rc) {
3543 			if (!try_wait_for_completion(&wdata->done)) {
3544 				mutex_unlock(&ctx->aio_mutex);
3545 				return;
3546 			}
3547 
3548 			if (wdata->result)
3549 				rc = wdata->result;
3550 			else
3551 				ctx->total_len += wdata->bytes;
3552 
3553 			/* resend call if it's a retryable error */
3554 			if (rc == -EAGAIN) {
3555 				struct list_head tmp_list;
3556 				struct iov_iter tmp_from = ctx->iter;
3557 
3558 				INIT_LIST_HEAD(&tmp_list);
3559 				list_del_init(&wdata->list);
3560 
3561 				if (ctx->direct_io)
3562 					rc = cifs_resend_wdata(
3563 						wdata, &tmp_list, ctx);
3564 				else {
3565 					iov_iter_advance(&tmp_from,
3566 						 wdata->offset - ctx->pos);
3567 
3568 					rc = cifs_write_from_iter(wdata->offset,
3569 						wdata->bytes, &tmp_from,
3570 						ctx->cfile, cifs_sb, &tmp_list,
3571 						ctx);
3572 
3573 					kref_put(&wdata->refcount,
3574 						cifs_uncached_writedata_release);
3575 				}
3576 
3577 				list_splice(&tmp_list, &ctx->list);
3578 				goto restart_loop;
3579 			}
3580 		}
3581 		list_del_init(&wdata->list);
3582 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3583 	}
3584 
3585 	cifs_stats_bytes_written(tcon, ctx->total_len);
3586 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3587 
3588 	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3589 
3590 	mutex_unlock(&ctx->aio_mutex);
3591 
3592 	if (ctx->iocb && ctx->iocb->ki_complete)
3593 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3594 	else
3595 		complete(&ctx->done);
3596 }
3597 
3598 static ssize_t __cifs_writev(
3599 	struct kiocb *iocb, struct iov_iter *from, bool direct)
3600 {
3601 	struct file *file = iocb->ki_filp;
3602 	ssize_t total_written = 0;
3603 	struct cifsFileInfo *cfile;
3604 	struct cifs_tcon *tcon;
3605 	struct cifs_sb_info *cifs_sb;
3606 	struct cifs_aio_ctx *ctx;
3607 	int rc;
3608 
3609 	rc = generic_write_checks(iocb, from);
3610 	if (rc <= 0)
3611 		return rc;
3612 
3613 	cifs_sb = CIFS_FILE_SB(file);
3614 	cfile = file->private_data;
3615 	tcon = tlink_tcon(cfile->tlink);
3616 
3617 	if (!tcon->ses->server->ops->async_writev)
3618 		return -ENOSYS;
3619 
3620 	ctx = cifs_aio_ctx_alloc();
3621 	if (!ctx)
3622 		return -ENOMEM;
3623 
3624 	ctx->cfile = cifsFileInfo_get(cfile);
3625 
3626 	if (!is_sync_kiocb(iocb))
3627 		ctx->iocb = iocb;
3628 
3629 	ctx->pos = iocb->ki_pos;
3630 	ctx->direct_io = direct;
3631 	ctx->nr_pinned_pages = 0;
3632 
3633 	if (user_backed_iter(from)) {
3634 		/*
3635 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3636 		 * they contain references to the calling process's virtual
3637 		 * memory layout which won't be available in an async worker
3638 		 * thread.  This also takes a pin on every folio involved.
3639 		 */
3640 		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3641 					     &ctx->iter, 0);
3642 		if (rc < 0) {
3643 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3644 			return rc;
3645 		}
3646 
3647 		ctx->nr_pinned_pages = rc;
3648 		ctx->bv = (void *)ctx->iter.bvec;
3649 		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3650 	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3651 		   !is_sync_kiocb(iocb)) {
3652 		/*
3653 		 * If the op is asynchronous, we need to copy the list attached
3654 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3655 		 * will be pinned by the caller; in any case, we may or may not
3656 		 * be able to pin the pages, so we don't try.
3657 		 */
3658 		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3659 		if (!ctx->bv) {
3660 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3661 			return -ENOMEM;
3662 		}
3663 	} else {
3664 		/*
3665 		 * Otherwise, we just pass the iterator down as-is and rely on
3666 		 * the caller to make sure the pages referred to by the
3667 		 * iterator don't evaporate.
3668 		 */
3669 		ctx->iter = *from;
3670 	}
3671 
3672 	ctx->len = iov_iter_count(&ctx->iter);
3673 
3674 	/* grab a lock here due to read response handlers can access ctx */
3675 	mutex_lock(&ctx->aio_mutex);
3676 
3677 	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3678 				  cfile, cifs_sb, &ctx->list, ctx);
3679 
3680 	/*
3681 	 * If at least one write was successfully sent, then discard any rc
3682 	 * value from the later writes. If the other write succeeds, then
3683 	 * we'll end up returning whatever was written. If it fails, then
3684 	 * we'll get a new rc value from that.
3685 	 */
3686 	if (!list_empty(&ctx->list))
3687 		rc = 0;
3688 
3689 	mutex_unlock(&ctx->aio_mutex);
3690 
3691 	if (rc) {
3692 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3693 		return rc;
3694 	}
3695 
3696 	if (!is_sync_kiocb(iocb)) {
3697 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3698 		return -EIOCBQUEUED;
3699 	}
3700 
3701 	rc = wait_for_completion_killable(&ctx->done);
3702 	if (rc) {
3703 		mutex_lock(&ctx->aio_mutex);
3704 		ctx->rc = rc = -EINTR;
3705 		total_written = ctx->total_len;
3706 		mutex_unlock(&ctx->aio_mutex);
3707 	} else {
3708 		rc = ctx->rc;
3709 		total_written = ctx->total_len;
3710 	}
3711 
3712 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3713 
3714 	if (unlikely(!total_written))
3715 		return rc;
3716 
3717 	iocb->ki_pos += total_written;
3718 	return total_written;
3719 }
3720 
3721 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3722 {
3723 	struct file *file = iocb->ki_filp;
3724 
3725 	cifs_revalidate_mapping(file->f_inode);
3726 	return __cifs_writev(iocb, from, true);
3727 }
3728 
3729 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3730 {
3731 	return __cifs_writev(iocb, from, false);
3732 }
3733 
3734 static ssize_t
3735 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3736 {
3737 	struct file *file = iocb->ki_filp;
3738 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3739 	struct inode *inode = file->f_mapping->host;
3740 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3741 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3742 	ssize_t rc;
3743 
3744 	inode_lock(inode);
3745 	/*
3746 	 * We need to hold the sem to be sure nobody modifies lock list
3747 	 * with a brlock that prevents writing.
3748 	 */
3749 	down_read(&cinode->lock_sem);
3750 
3751 	rc = generic_write_checks(iocb, from);
3752 	if (rc <= 0)
3753 		goto out;
3754 
3755 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3756 				     server->vals->exclusive_lock_type, 0,
3757 				     NULL, CIFS_WRITE_OP))
3758 		rc = __generic_file_write_iter(iocb, from);
3759 	else
3760 		rc = -EACCES;
3761 out:
3762 	up_read(&cinode->lock_sem);
3763 	inode_unlock(inode);
3764 
3765 	if (rc > 0)
3766 		rc = generic_write_sync(iocb, rc);
3767 	return rc;
3768 }
3769 
3770 ssize_t
3771 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3772 {
3773 	struct inode *inode = file_inode(iocb->ki_filp);
3774 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3775 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3776 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3777 						iocb->ki_filp->private_data;
3778 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3779 	ssize_t written;
3780 
3781 	written = cifs_get_writer(cinode);
3782 	if (written)
3783 		return written;
3784 
3785 	if (CIFS_CACHE_WRITE(cinode)) {
3786 		if (cap_unix(tcon->ses) &&
3787 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3788 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3789 			written = generic_file_write_iter(iocb, from);
3790 			goto out;
3791 		}
3792 		written = cifs_writev(iocb, from);
3793 		goto out;
3794 	}
3795 	/*
3796 	 * For non-oplocked files in strict cache mode we need to write the data
3797 	 * to the server exactly from the pos to pos+len-1 rather than flush all
3798 	 * affected pages because it may cause a error with mandatory locks on
3799 	 * these pages but not on the region from pos to ppos+len-1.
3800 	 */
3801 	written = cifs_user_writev(iocb, from);
3802 	if (CIFS_CACHE_READ(cinode)) {
3803 		/*
3804 		 * We have read level caching and we have just sent a write
3805 		 * request to the server thus making data in the cache stale.
3806 		 * Zap the cache and set oplock/lease level to NONE to avoid
3807 		 * reading stale data from the cache. All subsequent read
3808 		 * operations will read new data from the server.
3809 		 */
3810 		cifs_zap_mapping(inode);
3811 		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3812 			 inode);
3813 		cinode->oplock = 0;
3814 	}
3815 out:
3816 	cifs_put_writer(cinode);
3817 	return written;
3818 }
3819 
3820 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3821 {
3822 	struct cifs_readdata *rdata;
3823 
3824 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3825 	if (rdata) {
3826 		kref_init(&rdata->refcount);
3827 		INIT_LIST_HEAD(&rdata->list);
3828 		init_completion(&rdata->done);
3829 		INIT_WORK(&rdata->work, complete);
3830 	}
3831 
3832 	return rdata;
3833 }
3834 
3835 void
3836 cifs_readdata_release(struct kref *refcount)
3837 {
3838 	struct cifs_readdata *rdata = container_of(refcount,
3839 					struct cifs_readdata, refcount);
3840 
3841 	if (rdata->ctx)
3842 		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3843 #ifdef CONFIG_CIFS_SMB_DIRECT
3844 	if (rdata->mr) {
3845 		smbd_deregister_mr(rdata->mr);
3846 		rdata->mr = NULL;
3847 	}
3848 #endif
3849 	if (rdata->cfile)
3850 		cifsFileInfo_put(rdata->cfile);
3851 
3852 	kfree(rdata);
3853 }
3854 
3855 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3856 
3857 static void
3858 cifs_uncached_readv_complete(struct work_struct *work)
3859 {
3860 	struct cifs_readdata *rdata = container_of(work,
3861 						struct cifs_readdata, work);
3862 
3863 	complete(&rdata->done);
3864 	collect_uncached_read_data(rdata->ctx);
3865 	/* the below call can possibly free the last ref to aio ctx */
3866 	kref_put(&rdata->refcount, cifs_readdata_release);
3867 }
3868 
3869 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3870 			struct list_head *rdata_list,
3871 			struct cifs_aio_ctx *ctx)
3872 {
3873 	unsigned int rsize;
3874 	struct cifs_credits credits;
3875 	int rc;
3876 	struct TCP_Server_Info *server;
3877 
3878 	/* XXX: should we pick a new channel here? */
3879 	server = rdata->server;
3880 
3881 	do {
3882 		if (rdata->cfile->invalidHandle) {
3883 			rc = cifs_reopen_file(rdata->cfile, true);
3884 			if (rc == -EAGAIN)
3885 				continue;
3886 			else if (rc)
3887 				break;
3888 		}
3889 
3890 		/*
3891 		 * Wait for credits to resend this rdata.
3892 		 * Note: we are attempting to resend the whole rdata not in
3893 		 * segments
3894 		 */
3895 		do {
3896 			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3897 						&rsize, &credits);
3898 
3899 			if (rc)
3900 				goto fail;
3901 
3902 			if (rsize < rdata->bytes) {
3903 				add_credits_and_wake_if(server, &credits, 0);
3904 				msleep(1000);
3905 			}
3906 		} while (rsize < rdata->bytes);
3907 		rdata->credits = credits;
3908 
3909 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3910 		if (!rc) {
3911 			if (rdata->cfile->invalidHandle)
3912 				rc = -EAGAIN;
3913 			else {
3914 #ifdef CONFIG_CIFS_SMB_DIRECT
3915 				if (rdata->mr) {
3916 					rdata->mr->need_invalidate = true;
3917 					smbd_deregister_mr(rdata->mr);
3918 					rdata->mr = NULL;
3919 				}
3920 #endif
3921 				rc = server->ops->async_readv(rdata);
3922 			}
3923 		}
3924 
3925 		/* If the read was successfully sent, we are done */
3926 		if (!rc) {
3927 			/* Add to aio pending list */
3928 			list_add_tail(&rdata->list, rdata_list);
3929 			return 0;
3930 		}
3931 
3932 		/* Roll back credits and retry if needed */
3933 		add_credits_and_wake_if(server, &rdata->credits, 0);
3934 	} while (rc == -EAGAIN);
3935 
3936 fail:
3937 	kref_put(&rdata->refcount, cifs_readdata_release);
3938 	return rc;
3939 }
3940 
3941 static int
3942 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
3943 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3944 		     struct cifs_aio_ctx *ctx)
3945 {
3946 	struct cifs_readdata *rdata;
3947 	unsigned int rsize, nsegs, max_segs = INT_MAX;
3948 	struct cifs_credits credits_on_stack;
3949 	struct cifs_credits *credits = &credits_on_stack;
3950 	size_t cur_len, max_len;
3951 	int rc;
3952 	pid_t pid;
3953 	struct TCP_Server_Info *server;
3954 
3955 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3956 
3957 #ifdef CONFIG_CIFS_SMB_DIRECT
3958 	if (server->smbd_conn)
3959 		max_segs = server->smbd_conn->max_frmr_depth;
3960 #endif
3961 
3962 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3963 		pid = open_file->pid;
3964 	else
3965 		pid = current->tgid;
3966 
3967 	do {
3968 		if (open_file->invalidHandle) {
3969 			rc = cifs_reopen_file(open_file, true);
3970 			if (rc == -EAGAIN)
3971 				continue;
3972 			else if (rc)
3973 				break;
3974 		}
3975 
3976 		if (cifs_sb->ctx->rsize == 0)
3977 			cifs_sb->ctx->rsize =
3978 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3979 							     cifs_sb->ctx);
3980 
3981 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3982 						   &rsize, credits);
3983 		if (rc)
3984 			break;
3985 
3986 		max_len = min_t(size_t, len, rsize);
3987 
3988 		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
3989 						 max_segs, &nsegs);
3990 		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3991 			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
3992 		if (cur_len == 0) {
3993 			rc = -EIO;
3994 			add_credits_and_wake_if(server, credits, 0);
3995 			break;
3996 		}
3997 
3998 		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
3999 		if (!rdata) {
4000 			add_credits_and_wake_if(server, credits, 0);
4001 			rc = -ENOMEM;
4002 			break;
4003 		}
4004 
4005 		rdata->server	= server;
4006 		rdata->cfile	= cifsFileInfo_get(open_file);
4007 		rdata->offset	= fpos;
4008 		rdata->bytes	= cur_len;
4009 		rdata->pid	= pid;
4010 		rdata->credits	= credits_on_stack;
4011 		rdata->ctx	= ctx;
4012 		kref_get(&ctx->refcount);
4013 
4014 		rdata->iter	= ctx->iter;
4015 		iov_iter_truncate(&rdata->iter, cur_len);
4016 
4017 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4018 
4019 		if (!rc) {
4020 			if (rdata->cfile->invalidHandle)
4021 				rc = -EAGAIN;
4022 			else
4023 				rc = server->ops->async_readv(rdata);
4024 		}
4025 
4026 		if (rc) {
4027 			add_credits_and_wake_if(server, &rdata->credits, 0);
4028 			kref_put(&rdata->refcount, cifs_readdata_release);
4029 			if (rc == -EAGAIN)
4030 				continue;
4031 			break;
4032 		}
4033 
4034 		list_add_tail(&rdata->list, rdata_list);
4035 		iov_iter_advance(&ctx->iter, cur_len);
4036 		fpos += cur_len;
4037 		len -= cur_len;
4038 	} while (len > 0);
4039 
4040 	return rc;
4041 }
4042 
4043 static void
4044 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4045 {
4046 	struct cifs_readdata *rdata, *tmp;
4047 	struct cifs_sb_info *cifs_sb;
4048 	int rc;
4049 
4050 	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4051 
4052 	mutex_lock(&ctx->aio_mutex);
4053 
4054 	if (list_empty(&ctx->list)) {
4055 		mutex_unlock(&ctx->aio_mutex);
4056 		return;
4057 	}
4058 
4059 	rc = ctx->rc;
4060 	/* the loop below should proceed in the order of increasing offsets */
4061 again:
4062 	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4063 		if (!rc) {
4064 			if (!try_wait_for_completion(&rdata->done)) {
4065 				mutex_unlock(&ctx->aio_mutex);
4066 				return;
4067 			}
4068 
4069 			if (rdata->result == -EAGAIN) {
4070 				/* resend call if it's a retryable error */
4071 				struct list_head tmp_list;
4072 				unsigned int got_bytes = rdata->got_bytes;
4073 
4074 				list_del_init(&rdata->list);
4075 				INIT_LIST_HEAD(&tmp_list);
4076 
4077 				if (ctx->direct_io) {
4078 					/*
4079 					 * Re-use rdata as this is a
4080 					 * direct I/O
4081 					 */
4082 					rc = cifs_resend_rdata(
4083 						rdata,
4084 						&tmp_list, ctx);
4085 				} else {
4086 					rc = cifs_send_async_read(
4087 						rdata->offset + got_bytes,
4088 						rdata->bytes - got_bytes,
4089 						rdata->cfile, cifs_sb,
4090 						&tmp_list, ctx);
4091 
4092 					kref_put(&rdata->refcount,
4093 						cifs_readdata_release);
4094 				}
4095 
4096 				list_splice(&tmp_list, &ctx->list);
4097 
4098 				goto again;
4099 			} else if (rdata->result)
4100 				rc = rdata->result;
4101 
4102 			/* if there was a short read -- discard anything left */
4103 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4104 				rc = -ENODATA;
4105 
4106 			ctx->total_len += rdata->got_bytes;
4107 		}
4108 		list_del_init(&rdata->list);
4109 		kref_put(&rdata->refcount, cifs_readdata_release);
4110 	}
4111 
4112 	/* mask nodata case */
4113 	if (rc == -ENODATA)
4114 		rc = 0;
4115 
4116 	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4117 
4118 	mutex_unlock(&ctx->aio_mutex);
4119 
4120 	if (ctx->iocb && ctx->iocb->ki_complete)
4121 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4122 	else
4123 		complete(&ctx->done);
4124 }
4125 
4126 static ssize_t __cifs_readv(
4127 	struct kiocb *iocb, struct iov_iter *to, bool direct)
4128 {
4129 	size_t len;
4130 	struct file *file = iocb->ki_filp;
4131 	struct cifs_sb_info *cifs_sb;
4132 	struct cifsFileInfo *cfile;
4133 	struct cifs_tcon *tcon;
4134 	ssize_t rc, total_read = 0;
4135 	loff_t offset = iocb->ki_pos;
4136 	struct cifs_aio_ctx *ctx;
4137 
4138 	len = iov_iter_count(to);
4139 	if (!len)
4140 		return 0;
4141 
4142 	cifs_sb = CIFS_FILE_SB(file);
4143 	cfile = file->private_data;
4144 	tcon = tlink_tcon(cfile->tlink);
4145 
4146 	if (!tcon->ses->server->ops->async_readv)
4147 		return -ENOSYS;
4148 
4149 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4150 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4151 
4152 	ctx = cifs_aio_ctx_alloc();
4153 	if (!ctx)
4154 		return -ENOMEM;
4155 
4156 	ctx->pos	= offset;
4157 	ctx->direct_io	= direct;
4158 	ctx->len	= len;
4159 	ctx->cfile	= cifsFileInfo_get(cfile);
4160 	ctx->nr_pinned_pages = 0;
4161 
4162 	if (!is_sync_kiocb(iocb))
4163 		ctx->iocb = iocb;
4164 
4165 	if (user_backed_iter(to)) {
4166 		/*
4167 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4168 		 * they contain references to the calling process's virtual
4169 		 * memory layout which won't be available in an async worker
4170 		 * thread.  This also takes a pin on every folio involved.
4171 		 */
4172 		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4173 					     &ctx->iter, 0);
4174 		if (rc < 0) {
4175 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4176 			return rc;
4177 		}
4178 
4179 		ctx->nr_pinned_pages = rc;
4180 		ctx->bv = (void *)ctx->iter.bvec;
4181 		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4182 		ctx->should_dirty = true;
4183 	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4184 		   !is_sync_kiocb(iocb)) {
4185 		/*
4186 		 * If the op is asynchronous, we need to copy the list attached
4187 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4188 		 * will be retained by the caller; in any case, we may or may
4189 		 * not be able to pin the pages, so we don't try.
4190 		 */
4191 		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4192 		if (!ctx->bv) {
4193 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4194 			return -ENOMEM;
4195 		}
4196 	} else {
4197 		/*
4198 		 * Otherwise, we just pass the iterator down as-is and rely on
4199 		 * the caller to make sure the pages referred to by the
4200 		 * iterator don't evaporate.
4201 		 */
4202 		ctx->iter = *to;
4203 	}
4204 
4205 	if (direct) {
4206 		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4207 						  offset, offset + len - 1);
4208 		if (rc) {
4209 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4210 			return -EAGAIN;
4211 		}
4212 	}
4213 
4214 	/* grab a lock here due to read response handlers can access ctx */
4215 	mutex_lock(&ctx->aio_mutex);
4216 
4217 	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4218 
4219 	/* if at least one read request send succeeded, then reset rc */
4220 	if (!list_empty(&ctx->list))
4221 		rc = 0;
4222 
4223 	mutex_unlock(&ctx->aio_mutex);
4224 
4225 	if (rc) {
4226 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4227 		return rc;
4228 	}
4229 
4230 	if (!is_sync_kiocb(iocb)) {
4231 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4232 		return -EIOCBQUEUED;
4233 	}
4234 
4235 	rc = wait_for_completion_killable(&ctx->done);
4236 	if (rc) {
4237 		mutex_lock(&ctx->aio_mutex);
4238 		ctx->rc = rc = -EINTR;
4239 		total_read = ctx->total_len;
4240 		mutex_unlock(&ctx->aio_mutex);
4241 	} else {
4242 		rc = ctx->rc;
4243 		total_read = ctx->total_len;
4244 	}
4245 
4246 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4247 
4248 	if (total_read) {
4249 		iocb->ki_pos += total_read;
4250 		return total_read;
4251 	}
4252 	return rc;
4253 }
4254 
4255 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4256 {
4257 	return __cifs_readv(iocb, to, true);
4258 }
4259 
4260 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4261 {
4262 	return __cifs_readv(iocb, to, false);
4263 }
4264 
4265 ssize_t
4266 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4267 {
4268 	struct inode *inode = file_inode(iocb->ki_filp);
4269 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4270 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4271 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4272 						iocb->ki_filp->private_data;
4273 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4274 	int rc = -EACCES;
4275 
4276 	/*
4277 	 * In strict cache mode we need to read from the server all the time
4278 	 * if we don't have level II oplock because the server can delay mtime
4279 	 * change - so we can't make a decision about inode invalidating.
4280 	 * And we can also fail with pagereading if there are mandatory locks
4281 	 * on pages affected by this read but not on the region from pos to
4282 	 * pos+len-1.
4283 	 */
4284 	if (!CIFS_CACHE_READ(cinode))
4285 		return cifs_user_readv(iocb, to);
4286 
4287 	if (cap_unix(tcon->ses) &&
4288 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4289 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4290 		return generic_file_read_iter(iocb, to);
4291 
4292 	/*
4293 	 * We need to hold the sem to be sure nobody modifies lock list
4294 	 * with a brlock that prevents reading.
4295 	 */
4296 	down_read(&cinode->lock_sem);
4297 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4298 				     tcon->ses->server->vals->shared_lock_type,
4299 				     0, NULL, CIFS_READ_OP))
4300 		rc = generic_file_read_iter(iocb, to);
4301 	up_read(&cinode->lock_sem);
4302 	return rc;
4303 }
4304 
4305 static ssize_t
4306 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4307 {
4308 	int rc = -EACCES;
4309 	unsigned int bytes_read = 0;
4310 	unsigned int total_read;
4311 	unsigned int current_read_size;
4312 	unsigned int rsize;
4313 	struct cifs_sb_info *cifs_sb;
4314 	struct cifs_tcon *tcon;
4315 	struct TCP_Server_Info *server;
4316 	unsigned int xid;
4317 	char *cur_offset;
4318 	struct cifsFileInfo *open_file;
4319 	struct cifs_io_parms io_parms = {0};
4320 	int buf_type = CIFS_NO_BUFFER;
4321 	__u32 pid;
4322 
4323 	xid = get_xid();
4324 	cifs_sb = CIFS_FILE_SB(file);
4325 
4326 	/* FIXME: set up handlers for larger reads and/or convert to async */
4327 	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4328 
4329 	if (file->private_data == NULL) {
4330 		rc = -EBADF;
4331 		free_xid(xid);
4332 		return rc;
4333 	}
4334 	open_file = file->private_data;
4335 	tcon = tlink_tcon(open_file->tlink);
4336 	server = cifs_pick_channel(tcon->ses);
4337 
4338 	if (!server->ops->sync_read) {
4339 		free_xid(xid);
4340 		return -ENOSYS;
4341 	}
4342 
4343 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4344 		pid = open_file->pid;
4345 	else
4346 		pid = current->tgid;
4347 
4348 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4349 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4350 
4351 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4352 	     total_read += bytes_read, cur_offset += bytes_read) {
4353 		do {
4354 			current_read_size = min_t(uint, read_size - total_read,
4355 						  rsize);
4356 			/*
4357 			 * For windows me and 9x we do not want to request more
4358 			 * than it negotiated since it will refuse the read
4359 			 * then.
4360 			 */
4361 			if (!(tcon->ses->capabilities &
4362 				tcon->ses->server->vals->cap_large_files)) {
4363 				current_read_size = min_t(uint,
4364 					current_read_size, CIFSMaxBufSize);
4365 			}
4366 			if (open_file->invalidHandle) {
4367 				rc = cifs_reopen_file(open_file, true);
4368 				if (rc != 0)
4369 					break;
4370 			}
4371 			io_parms.pid = pid;
4372 			io_parms.tcon = tcon;
4373 			io_parms.offset = *offset;
4374 			io_parms.length = current_read_size;
4375 			io_parms.server = server;
4376 			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4377 						    &bytes_read, &cur_offset,
4378 						    &buf_type);
4379 		} while (rc == -EAGAIN);
4380 
4381 		if (rc || (bytes_read == 0)) {
4382 			if (total_read) {
4383 				break;
4384 			} else {
4385 				free_xid(xid);
4386 				return rc;
4387 			}
4388 		} else {
4389 			cifs_stats_bytes_read(tcon, total_read);
4390 			*offset += bytes_read;
4391 		}
4392 	}
4393 	free_xid(xid);
4394 	return total_read;
4395 }
4396 
4397 /*
4398  * If the page is mmap'ed into a process' page tables, then we need to make
4399  * sure that it doesn't change while being written back.
4400  */
4401 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4402 {
4403 	struct folio *folio = page_folio(vmf->page);
4404 
4405 	/* Wait for the folio to be written to the cache before we allow it to
4406 	 * be modified.  We then assume the entire folio will need writing back.
4407 	 */
4408 #ifdef CONFIG_CIFS_FSCACHE
4409 	if (folio_test_fscache(folio) &&
4410 	    folio_wait_fscache_killable(folio) < 0)
4411 		return VM_FAULT_RETRY;
4412 #endif
4413 
4414 	folio_wait_writeback(folio);
4415 
4416 	if (folio_lock_killable(folio) < 0)
4417 		return VM_FAULT_RETRY;
4418 	return VM_FAULT_LOCKED;
4419 }
4420 
4421 static const struct vm_operations_struct cifs_file_vm_ops = {
4422 	.fault = filemap_fault,
4423 	.map_pages = filemap_map_pages,
4424 	.page_mkwrite = cifs_page_mkwrite,
4425 };
4426 
4427 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4428 {
4429 	int xid, rc = 0;
4430 	struct inode *inode = file_inode(file);
4431 
4432 	xid = get_xid();
4433 
4434 	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4435 		rc = cifs_zap_mapping(inode);
4436 	if (!rc)
4437 		rc = generic_file_mmap(file, vma);
4438 	if (!rc)
4439 		vma->vm_ops = &cifs_file_vm_ops;
4440 
4441 	free_xid(xid);
4442 	return rc;
4443 }
4444 
4445 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4446 {
4447 	int rc, xid;
4448 
4449 	xid = get_xid();
4450 
4451 	rc = cifs_revalidate_file(file);
4452 	if (rc)
4453 		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4454 			 rc);
4455 	if (!rc)
4456 		rc = generic_file_mmap(file, vma);
4457 	if (!rc)
4458 		vma->vm_ops = &cifs_file_vm_ops;
4459 
4460 	free_xid(xid);
4461 	return rc;
4462 }
4463 
4464 /*
4465  * Unlock a bunch of folios in the pagecache.
4466  */
4467 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4468 {
4469 	struct folio *folio;
4470 	XA_STATE(xas, &mapping->i_pages, first);
4471 
4472 	rcu_read_lock();
4473 	xas_for_each(&xas, folio, last) {
4474 		folio_unlock(folio);
4475 	}
4476 	rcu_read_unlock();
4477 }
4478 
4479 static void cifs_readahead_complete(struct work_struct *work)
4480 {
4481 	struct cifs_readdata *rdata = container_of(work,
4482 						   struct cifs_readdata, work);
4483 	struct folio *folio;
4484 	pgoff_t last;
4485 	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4486 
4487 	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4488 
4489 	if (good)
4490 		cifs_readahead_to_fscache(rdata->mapping->host,
4491 					  rdata->offset, rdata->bytes);
4492 
4493 	if (iov_iter_count(&rdata->iter) > 0)
4494 		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4495 
4496 	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4497 
4498 	rcu_read_lock();
4499 	xas_for_each(&xas, folio, last) {
4500 		if (good) {
4501 			flush_dcache_folio(folio);
4502 			folio_mark_uptodate(folio);
4503 		}
4504 		folio_unlock(folio);
4505 	}
4506 	rcu_read_unlock();
4507 
4508 	kref_put(&rdata->refcount, cifs_readdata_release);
4509 }
4510 
4511 static void cifs_readahead(struct readahead_control *ractl)
4512 {
4513 	struct cifsFileInfo *open_file = ractl->file->private_data;
4514 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4515 	struct TCP_Server_Info *server;
4516 	unsigned int xid, nr_pages, cache_nr_pages = 0;
4517 	unsigned int ra_pages;
4518 	pgoff_t next_cached = ULONG_MAX, ra_index;
4519 	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4520 		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4521 	bool check_cache = caching;
4522 	pid_t pid;
4523 	int rc = 0;
4524 
4525 	/* Note that readahead_count() lags behind our dequeuing of pages from
4526 	 * the ractl, wo we have to keep track for ourselves.
4527 	 */
4528 	ra_pages = readahead_count(ractl);
4529 	ra_index = readahead_index(ractl);
4530 
4531 	xid = get_xid();
4532 
4533 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4534 		pid = open_file->pid;
4535 	else
4536 		pid = current->tgid;
4537 
4538 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4539 
4540 	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4541 		 __func__, ractl->file, ractl->mapping, ra_pages);
4542 
4543 	/*
4544 	 * Chop the readahead request up into rsize-sized read requests.
4545 	 */
4546 	while ((nr_pages = ra_pages)) {
4547 		unsigned int i, rsize;
4548 		struct cifs_readdata *rdata;
4549 		struct cifs_credits credits_on_stack;
4550 		struct cifs_credits *credits = &credits_on_stack;
4551 		struct folio *folio;
4552 		pgoff_t fsize;
4553 
4554 		/*
4555 		 * Find out if we have anything cached in the range of
4556 		 * interest, and if so, where the next chunk of cached data is.
4557 		 */
4558 		if (caching) {
4559 			if (check_cache) {
4560 				rc = cifs_fscache_query_occupancy(
4561 					ractl->mapping->host, ra_index, nr_pages,
4562 					&next_cached, &cache_nr_pages);
4563 				if (rc < 0)
4564 					caching = false;
4565 				check_cache = false;
4566 			}
4567 
4568 			if (ra_index == next_cached) {
4569 				/*
4570 				 * TODO: Send a whole batch of pages to be read
4571 				 * by the cache.
4572 				 */
4573 				folio = readahead_folio(ractl);
4574 				fsize = folio_nr_pages(folio);
4575 				ra_pages -= fsize;
4576 				ra_index += fsize;
4577 				if (cifs_readpage_from_fscache(ractl->mapping->host,
4578 							       &folio->page) < 0) {
4579 					/*
4580 					 * TODO: Deal with cache read failure
4581 					 * here, but for the moment, delegate
4582 					 * that to readpage.
4583 					 */
4584 					caching = false;
4585 				}
4586 				folio_unlock(folio);
4587 				next_cached += fsize;
4588 				cache_nr_pages -= fsize;
4589 				if (cache_nr_pages == 0)
4590 					check_cache = true;
4591 				continue;
4592 			}
4593 		}
4594 
4595 		if (open_file->invalidHandle) {
4596 			rc = cifs_reopen_file(open_file, true);
4597 			if (rc) {
4598 				if (rc == -EAGAIN)
4599 					continue;
4600 				break;
4601 			}
4602 		}
4603 
4604 		if (cifs_sb->ctx->rsize == 0)
4605 			cifs_sb->ctx->rsize =
4606 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4607 							     cifs_sb->ctx);
4608 
4609 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4610 						   &rsize, credits);
4611 		if (rc)
4612 			break;
4613 		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4614 		if (next_cached != ULONG_MAX)
4615 			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4616 
4617 		/*
4618 		 * Give up immediately if rsize is too small to read an entire
4619 		 * page. The VFS will fall back to readpage. We should never
4620 		 * reach this point however since we set ra_pages to 0 when the
4621 		 * rsize is smaller than a cache page.
4622 		 */
4623 		if (unlikely(!nr_pages)) {
4624 			add_credits_and_wake_if(server, credits, 0);
4625 			break;
4626 		}
4627 
4628 		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4629 		if (!rdata) {
4630 			/* best to give up if we're out of mem */
4631 			add_credits_and_wake_if(server, credits, 0);
4632 			break;
4633 		}
4634 
4635 		rdata->offset	= ra_index * PAGE_SIZE;
4636 		rdata->bytes	= nr_pages * PAGE_SIZE;
4637 		rdata->cfile	= cifsFileInfo_get(open_file);
4638 		rdata->server	= server;
4639 		rdata->mapping	= ractl->mapping;
4640 		rdata->pid	= pid;
4641 		rdata->credits	= credits_on_stack;
4642 
4643 		for (i = 0; i < nr_pages; i++) {
4644 			if (!readahead_folio(ractl))
4645 				WARN_ON(1);
4646 		}
4647 		ra_pages -= nr_pages;
4648 		ra_index += nr_pages;
4649 
4650 		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4651 				rdata->offset, rdata->bytes);
4652 
4653 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4654 		if (!rc) {
4655 			if (rdata->cfile->invalidHandle)
4656 				rc = -EAGAIN;
4657 			else
4658 				rc = server->ops->async_readv(rdata);
4659 		}
4660 
4661 		if (rc) {
4662 			add_credits_and_wake_if(server, &rdata->credits, 0);
4663 			cifs_unlock_folios(rdata->mapping,
4664 					   rdata->offset / PAGE_SIZE,
4665 					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4666 			/* Fallback to the readpage in error/reconnect cases */
4667 			kref_put(&rdata->refcount, cifs_readdata_release);
4668 			break;
4669 		}
4670 
4671 		kref_put(&rdata->refcount, cifs_readdata_release);
4672 	}
4673 
4674 	free_xid(xid);
4675 }
4676 
4677 /*
4678  * cifs_readpage_worker must be called with the page pinned
4679  */
4680 static int cifs_readpage_worker(struct file *file, struct page *page,
4681 	loff_t *poffset)
4682 {
4683 	struct inode *inode = file_inode(file);
4684 	struct timespec64 atime, mtime;
4685 	char *read_data;
4686 	int rc;
4687 
4688 	/* Is the page cached? */
4689 	rc = cifs_readpage_from_fscache(inode, page);
4690 	if (rc == 0)
4691 		goto read_complete;
4692 
4693 	read_data = kmap(page);
4694 	/* for reads over a certain size could initiate async read ahead */
4695 
4696 	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4697 
4698 	if (rc < 0)
4699 		goto io_error;
4700 	else
4701 		cifs_dbg(FYI, "Bytes read %d\n", rc);
4702 
4703 	/* we do not want atime to be less than mtime, it broke some apps */
4704 	atime = inode_set_atime_to_ts(inode, current_time(inode));
4705 	mtime = inode_get_mtime(inode);
4706 	if (timespec64_compare(&atime, &mtime) < 0)
4707 		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4708 
4709 	if (PAGE_SIZE > rc)
4710 		memset(read_data + rc, 0, PAGE_SIZE - rc);
4711 
4712 	flush_dcache_page(page);
4713 	SetPageUptodate(page);
4714 	rc = 0;
4715 
4716 io_error:
4717 	kunmap(page);
4718 
4719 read_complete:
4720 	unlock_page(page);
4721 	return rc;
4722 }
4723 
4724 static int cifs_read_folio(struct file *file, struct folio *folio)
4725 {
4726 	struct page *page = &folio->page;
4727 	loff_t offset = page_file_offset(page);
4728 	int rc = -EACCES;
4729 	unsigned int xid;
4730 
4731 	xid = get_xid();
4732 
4733 	if (file->private_data == NULL) {
4734 		rc = -EBADF;
4735 		free_xid(xid);
4736 		return rc;
4737 	}
4738 
4739 	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4740 		 page, (int)offset, (int)offset);
4741 
4742 	rc = cifs_readpage_worker(file, page, &offset);
4743 
4744 	free_xid(xid);
4745 	return rc;
4746 }
4747 
4748 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4749 {
4750 	struct cifsFileInfo *open_file;
4751 
4752 	spin_lock(&cifs_inode->open_file_lock);
4753 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4754 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4755 			spin_unlock(&cifs_inode->open_file_lock);
4756 			return 1;
4757 		}
4758 	}
4759 	spin_unlock(&cifs_inode->open_file_lock);
4760 	return 0;
4761 }
4762 
4763 /* We do not want to update the file size from server for inodes
4764    open for write - to avoid races with writepage extending
4765    the file - in the future we could consider allowing
4766    refreshing the inode only on increases in the file size
4767    but this is tricky to do without racing with writebehind
4768    page caching in the current Linux kernel design */
4769 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4770 {
4771 	if (!cifsInode)
4772 		return true;
4773 
4774 	if (is_inode_writable(cifsInode)) {
4775 		/* This inode is open for write at least once */
4776 		struct cifs_sb_info *cifs_sb;
4777 
4778 		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4779 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4780 			/* since no page cache to corrupt on directio
4781 			we can change size safely */
4782 			return true;
4783 		}
4784 
4785 		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4786 			return true;
4787 
4788 		return false;
4789 	} else
4790 		return true;
4791 }
4792 
4793 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4794 			loff_t pos, unsigned len,
4795 			struct page **pagep, void **fsdata)
4796 {
4797 	int oncethru = 0;
4798 	pgoff_t index = pos >> PAGE_SHIFT;
4799 	loff_t offset = pos & (PAGE_SIZE - 1);
4800 	loff_t page_start = pos & PAGE_MASK;
4801 	loff_t i_size;
4802 	struct page *page;
4803 	int rc = 0;
4804 
4805 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4806 
4807 start:
4808 	page = grab_cache_page_write_begin(mapping, index);
4809 	if (!page) {
4810 		rc = -ENOMEM;
4811 		goto out;
4812 	}
4813 
4814 	if (PageUptodate(page))
4815 		goto out;
4816 
4817 	/*
4818 	 * If we write a full page it will be up to date, no need to read from
4819 	 * the server. If the write is short, we'll end up doing a sync write
4820 	 * instead.
4821 	 */
4822 	if (len == PAGE_SIZE)
4823 		goto out;
4824 
4825 	/*
4826 	 * optimize away the read when we have an oplock, and we're not
4827 	 * expecting to use any of the data we'd be reading in. That
4828 	 * is, when the page lies beyond the EOF, or straddles the EOF
4829 	 * and the write will cover all of the existing data.
4830 	 */
4831 	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4832 		i_size = i_size_read(mapping->host);
4833 		if (page_start >= i_size ||
4834 		    (offset == 0 && (pos + len) >= i_size)) {
4835 			zero_user_segments(page, 0, offset,
4836 					   offset + len,
4837 					   PAGE_SIZE);
4838 			/*
4839 			 * PageChecked means that the parts of the page
4840 			 * to which we're not writing are considered up
4841 			 * to date. Once the data is copied to the
4842 			 * page, it can be set uptodate.
4843 			 */
4844 			SetPageChecked(page);
4845 			goto out;
4846 		}
4847 	}
4848 
4849 	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4850 		/*
4851 		 * might as well read a page, it is fast enough. If we get
4852 		 * an error, we don't need to return it. cifs_write_end will
4853 		 * do a sync write instead since PG_uptodate isn't set.
4854 		 */
4855 		cifs_readpage_worker(file, page, &page_start);
4856 		put_page(page);
4857 		oncethru = 1;
4858 		goto start;
4859 	} else {
4860 		/* we could try using another file handle if there is one -
4861 		   but how would we lock it to prevent close of that handle
4862 		   racing with this read? In any case
4863 		   this will be written out by write_end so is fine */
4864 	}
4865 out:
4866 	*pagep = page;
4867 	return rc;
4868 }
4869 
4870 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4871 {
4872 	if (folio_test_private(folio))
4873 		return 0;
4874 	if (folio_test_fscache(folio)) {
4875 		if (current_is_kswapd() || !(gfp & __GFP_FS))
4876 			return false;
4877 		folio_wait_fscache(folio);
4878 	}
4879 	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4880 	return true;
4881 }
4882 
4883 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4884 				 size_t length)
4885 {
4886 	folio_wait_fscache(folio);
4887 }
4888 
4889 static int cifs_launder_folio(struct folio *folio)
4890 {
4891 	int rc = 0;
4892 	loff_t range_start = folio_pos(folio);
4893 	loff_t range_end = range_start + folio_size(folio);
4894 	struct writeback_control wbc = {
4895 		.sync_mode = WB_SYNC_ALL,
4896 		.nr_to_write = 0,
4897 		.range_start = range_start,
4898 		.range_end = range_end,
4899 	};
4900 
4901 	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4902 
4903 	if (folio_clear_dirty_for_io(folio))
4904 		rc = cifs_writepage_locked(&folio->page, &wbc);
4905 
4906 	folio_wait_fscache(folio);
4907 	return rc;
4908 }
4909 
4910 void cifs_oplock_break(struct work_struct *work)
4911 {
4912 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4913 						  oplock_break);
4914 	struct inode *inode = d_inode(cfile->dentry);
4915 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4916 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4917 	struct cifs_tcon *tcon;
4918 	struct TCP_Server_Info *server;
4919 	struct tcon_link *tlink;
4920 	int rc = 0;
4921 	bool purge_cache = false, oplock_break_cancelled;
4922 	__u64 persistent_fid, volatile_fid;
4923 	__u16 net_fid;
4924 
4925 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4926 			TASK_UNINTERRUPTIBLE);
4927 
4928 	tlink = cifs_sb_tlink(cifs_sb);
4929 	if (IS_ERR(tlink))
4930 		goto out;
4931 	tcon = tlink_tcon(tlink);
4932 	server = tcon->ses->server;
4933 
4934 	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4935 				      cfile->oplock_epoch, &purge_cache);
4936 
4937 	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4938 						cifs_has_mand_locks(cinode)) {
4939 		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4940 			 inode);
4941 		cinode->oplock = 0;
4942 	}
4943 
4944 	if (inode && S_ISREG(inode->i_mode)) {
4945 		if (CIFS_CACHE_READ(cinode))
4946 			break_lease(inode, O_RDONLY);
4947 		else
4948 			break_lease(inode, O_WRONLY);
4949 		rc = filemap_fdatawrite(inode->i_mapping);
4950 		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4951 			rc = filemap_fdatawait(inode->i_mapping);
4952 			mapping_set_error(inode->i_mapping, rc);
4953 			cifs_zap_mapping(inode);
4954 		}
4955 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4956 		if (CIFS_CACHE_WRITE(cinode))
4957 			goto oplock_break_ack;
4958 	}
4959 
4960 	rc = cifs_push_locks(cfile);
4961 	if (rc)
4962 		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4963 
4964 oplock_break_ack:
4965 	/*
4966 	 * When oplock break is received and there are no active
4967 	 * file handles but cached, then schedule deferred close immediately.
4968 	 * So, new open will not use cached handle.
4969 	 */
4970 
4971 	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
4972 		cifs_close_deferred_file(cinode);
4973 
4974 	persistent_fid = cfile->fid.persistent_fid;
4975 	volatile_fid = cfile->fid.volatile_fid;
4976 	net_fid = cfile->fid.netfid;
4977 	oplock_break_cancelled = cfile->oplock_break_cancelled;
4978 
4979 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4980 	/*
4981 	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
4982 	 * an acknowledgment to be sent when the file has already been closed.
4983 	 */
4984 	spin_lock(&cinode->open_file_lock);
4985 	/* check list empty since can race with kill_sb calling tree disconnect */
4986 	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
4987 		spin_unlock(&cinode->open_file_lock);
4988 		rc = server->ops->oplock_response(tcon, persistent_fid,
4989 						  volatile_fid, net_fid, cinode);
4990 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4991 	} else
4992 		spin_unlock(&cinode->open_file_lock);
4993 
4994 	cifs_put_tlink(tlink);
4995 out:
4996 	cifs_done_oplock_break(cinode);
4997 }
4998 
4999 /*
5000  * The presence of cifs_direct_io() in the address space ops vector
5001  * allowes open() O_DIRECT flags which would have failed otherwise.
5002  *
5003  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5004  * so this method should never be called.
5005  *
5006  * Direct IO is not yet supported in the cached mode.
5007  */
5008 static ssize_t
5009 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5010 {
5011         /*
5012          * FIXME
5013          * Eventually need to support direct IO for non forcedirectio mounts
5014          */
5015         return -EINVAL;
5016 }
5017 
5018 static int cifs_swap_activate(struct swap_info_struct *sis,
5019 			      struct file *swap_file, sector_t *span)
5020 {
5021 	struct cifsFileInfo *cfile = swap_file->private_data;
5022 	struct inode *inode = swap_file->f_mapping->host;
5023 	unsigned long blocks;
5024 	long long isize;
5025 
5026 	cifs_dbg(FYI, "swap activate\n");
5027 
5028 	if (!swap_file->f_mapping->a_ops->swap_rw)
5029 		/* Cannot support swap */
5030 		return -EINVAL;
5031 
5032 	spin_lock(&inode->i_lock);
5033 	blocks = inode->i_blocks;
5034 	isize = inode->i_size;
5035 	spin_unlock(&inode->i_lock);
5036 	if (blocks*512 < isize) {
5037 		pr_warn("swap activate: swapfile has holes\n");
5038 		return -EINVAL;
5039 	}
5040 	*span = sis->pages;
5041 
5042 	pr_warn_once("Swap support over SMB3 is experimental\n");
5043 
5044 	/*
5045 	 * TODO: consider adding ACL (or documenting how) to prevent other
5046 	 * users (on this or other systems) from reading it
5047 	 */
5048 
5049 
5050 	/* TODO: add sk_set_memalloc(inet) or similar */
5051 
5052 	if (cfile)
5053 		cfile->swapfile = true;
5054 	/*
5055 	 * TODO: Since file already open, we can't open with DENY_ALL here
5056 	 * but we could add call to grab a byte range lock to prevent others
5057 	 * from reading or writing the file
5058 	 */
5059 
5060 	sis->flags |= SWP_FS_OPS;
5061 	return add_swap_extent(sis, 0, sis->max, 0);
5062 }
5063 
5064 static void cifs_swap_deactivate(struct file *file)
5065 {
5066 	struct cifsFileInfo *cfile = file->private_data;
5067 
5068 	cifs_dbg(FYI, "swap deactivate\n");
5069 
5070 	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5071 
5072 	if (cfile)
5073 		cfile->swapfile = false;
5074 
5075 	/* do we need to unpin (or unlock) the file */
5076 }
5077 
5078 /*
5079  * Mark a page as having been made dirty and thus needing writeback.  We also
5080  * need to pin the cache object to write back to.
5081  */
5082 #ifdef CONFIG_CIFS_FSCACHE
5083 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5084 {
5085 	return fscache_dirty_folio(mapping, folio,
5086 					cifs_inode_cookie(mapping->host));
5087 }
5088 #else
5089 #define cifs_dirty_folio filemap_dirty_folio
5090 #endif
5091 
5092 const struct address_space_operations cifs_addr_ops = {
5093 	.read_folio = cifs_read_folio,
5094 	.readahead = cifs_readahead,
5095 	.writepages = cifs_writepages,
5096 	.write_begin = cifs_write_begin,
5097 	.write_end = cifs_write_end,
5098 	.dirty_folio = cifs_dirty_folio,
5099 	.release_folio = cifs_release_folio,
5100 	.direct_IO = cifs_direct_io,
5101 	.invalidate_folio = cifs_invalidate_folio,
5102 	.launder_folio = cifs_launder_folio,
5103 	.migrate_folio = filemap_migrate_folio,
5104 	/*
5105 	 * TODO: investigate and if useful we could add an is_dirty_writeback
5106 	 * helper if needed
5107 	 */
5108 	.swap_activate = cifs_swap_activate,
5109 	.swap_deactivate = cifs_swap_deactivate,
5110 };
5111 
5112 /*
5113  * cifs_readahead requires the server to support a buffer large enough to
5114  * contain the header plus one complete page of data.  Otherwise, we need
5115  * to leave cifs_readahead out of the address space operations.
5116  */
5117 const struct address_space_operations cifs_addr_ops_smallbuf = {
5118 	.read_folio = cifs_read_folio,
5119 	.writepages = cifs_writepages,
5120 	.write_begin = cifs_write_begin,
5121 	.write_end = cifs_write_end,
5122 	.dirty_folio = cifs_dirty_folio,
5123 	.release_folio = cifs_release_folio,
5124 	.invalidate_folio = cifs_invalidate_folio,
5125 	.launder_folio = cifs_launder_folio,
5126 	.migrate_folio = filemap_migrate_folio,
5127 };
5128