xref: /openbmc/linux/fs/afs/write.c (revision 8fb72b4a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* handling of writes to regular files and writing back to the server
3  *
4  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/backing-dev.h>
9 #include <linux/slab.h>
10 #include <linux/fs.h>
11 #include <linux/pagemap.h>
12 #include <linux/writeback.h>
13 #include <linux/pagevec.h>
14 #include <linux/netfs.h>
15 #include "internal.h"
16 
17 static void afs_write_to_cache(struct afs_vnode *vnode, loff_t start, size_t len,
18 			       loff_t i_size, bool caching);
19 
20 #ifdef CONFIG_AFS_FSCACHE
21 /*
22  * Mark a page as having been made dirty and thus needing writeback.  We also
23  * need to pin the cache object to write back to.
24  */
25 bool afs_dirty_folio(struct address_space *mapping, struct folio *folio)
26 {
27 	return fscache_dirty_folio(mapping, folio,
28 				afs_vnode_cache(AFS_FS_I(mapping->host)));
29 }
30 static void afs_folio_start_fscache(bool caching, struct folio *folio)
31 {
32 	if (caching)
33 		folio_start_fscache(folio);
34 }
35 #else
36 static void afs_folio_start_fscache(bool caching, struct folio *folio)
37 {
38 }
39 #endif
40 
41 /*
42  * prepare to perform part of a write to a page
43  */
44 int afs_write_begin(struct file *file, struct address_space *mapping,
45 		    loff_t pos, unsigned len, unsigned flags,
46 		    struct page **_page, void **fsdata)
47 {
48 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
49 	struct folio *folio;
50 	unsigned long priv;
51 	unsigned f, from;
52 	unsigned t, to;
53 	pgoff_t index;
54 	int ret;
55 
56 	_enter("{%llx:%llu},%llx,%x",
57 	       vnode->fid.vid, vnode->fid.vnode, pos, len);
58 
59 	/* Prefetch area to be written into the cache if we're caching this
60 	 * file.  We need to do this before we get a lock on the page in case
61 	 * there's more than one writer competing for the same cache block.
62 	 */
63 	ret = netfs_write_begin(file, mapping, pos, len, flags, &folio, fsdata,
64 				&afs_req_ops, NULL);
65 	if (ret < 0)
66 		return ret;
67 
68 	index = folio_index(folio);
69 	from = pos - index * PAGE_SIZE;
70 	to = from + len;
71 
72 try_again:
73 	/* See if this page is already partially written in a way that we can
74 	 * merge the new write with.
75 	 */
76 	if (folio_test_private(folio)) {
77 		priv = (unsigned long)folio_get_private(folio);
78 		f = afs_folio_dirty_from(folio, priv);
79 		t = afs_folio_dirty_to(folio, priv);
80 		ASSERTCMP(f, <=, t);
81 
82 		if (folio_test_writeback(folio)) {
83 			trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio);
84 			goto flush_conflicting_write;
85 		}
86 		/* If the file is being filled locally, allow inter-write
87 		 * spaces to be merged into writes.  If it's not, only write
88 		 * back what the user gives us.
89 		 */
90 		if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) &&
91 		    (to < f || from > t))
92 			goto flush_conflicting_write;
93 	}
94 
95 	*_page = &folio->page;
96 	_leave(" = 0");
97 	return 0;
98 
99 	/* The previous write and this write aren't adjacent or overlapping, so
100 	 * flush the page out.
101 	 */
102 flush_conflicting_write:
103 	_debug("flush conflict");
104 	ret = folio_write_one(folio);
105 	if (ret < 0)
106 		goto error;
107 
108 	ret = folio_lock_killable(folio);
109 	if (ret < 0)
110 		goto error;
111 	goto try_again;
112 
113 error:
114 	folio_put(folio);
115 	_leave(" = %d", ret);
116 	return ret;
117 }
118 
119 /*
120  * finalise part of a write to a page
121  */
122 int afs_write_end(struct file *file, struct address_space *mapping,
123 		  loff_t pos, unsigned len, unsigned copied,
124 		  struct page *subpage, void *fsdata)
125 {
126 	struct folio *folio = page_folio(subpage);
127 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
128 	unsigned long priv;
129 	unsigned int f, from = offset_in_folio(folio, pos);
130 	unsigned int t, to = from + copied;
131 	loff_t i_size, write_end_pos;
132 
133 	_enter("{%llx:%llu},{%lx}",
134 	       vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
135 
136 	if (!folio_test_uptodate(folio)) {
137 		if (copied < len) {
138 			copied = 0;
139 			goto out;
140 		}
141 
142 		folio_mark_uptodate(folio);
143 	}
144 
145 	if (copied == 0)
146 		goto out;
147 
148 	write_end_pos = pos + copied;
149 
150 	i_size = i_size_read(&vnode->vfs_inode);
151 	if (write_end_pos > i_size) {
152 		write_seqlock(&vnode->cb_lock);
153 		i_size = i_size_read(&vnode->vfs_inode);
154 		if (write_end_pos > i_size)
155 			afs_set_i_size(vnode, write_end_pos);
156 		write_sequnlock(&vnode->cb_lock);
157 		fscache_update_cookie(afs_vnode_cache(vnode), NULL, &write_end_pos);
158 	}
159 
160 	if (folio_test_private(folio)) {
161 		priv = (unsigned long)folio_get_private(folio);
162 		f = afs_folio_dirty_from(folio, priv);
163 		t = afs_folio_dirty_to(folio, priv);
164 		if (from < f)
165 			f = from;
166 		if (to > t)
167 			t = to;
168 		priv = afs_folio_dirty(folio, f, t);
169 		folio_change_private(folio, (void *)priv);
170 		trace_afs_folio_dirty(vnode, tracepoint_string("dirty+"), folio);
171 	} else {
172 		priv = afs_folio_dirty(folio, from, to);
173 		folio_attach_private(folio, (void *)priv);
174 		trace_afs_folio_dirty(vnode, tracepoint_string("dirty"), folio);
175 	}
176 
177 	if (folio_mark_dirty(folio))
178 		_debug("dirtied %lx", folio_index(folio));
179 
180 out:
181 	folio_unlock(folio);
182 	folio_put(folio);
183 	return copied;
184 }
185 
186 /*
187  * kill all the pages in the given range
188  */
189 static void afs_kill_pages(struct address_space *mapping,
190 			   loff_t start, loff_t len)
191 {
192 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
193 	struct folio *folio;
194 	pgoff_t index = start / PAGE_SIZE;
195 	pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
196 
197 	_enter("{%llx:%llu},%llx @%llx",
198 	       vnode->fid.vid, vnode->fid.vnode, len, start);
199 
200 	do {
201 		_debug("kill %lx (to %lx)", index, last);
202 
203 		folio = filemap_get_folio(mapping, index);
204 		if (!folio) {
205 			next = index + 1;
206 			continue;
207 		}
208 
209 		next = folio_next_index(folio);
210 
211 		folio_clear_uptodate(folio);
212 		folio_end_writeback(folio);
213 		folio_lock(folio);
214 		generic_error_remove_page(mapping, &folio->page);
215 		folio_unlock(folio);
216 		folio_put(folio);
217 
218 	} while (index = next, index <= last);
219 
220 	_leave("");
221 }
222 
223 /*
224  * Redirty all the pages in a given range.
225  */
226 static void afs_redirty_pages(struct writeback_control *wbc,
227 			      struct address_space *mapping,
228 			      loff_t start, loff_t len)
229 {
230 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
231 	struct folio *folio;
232 	pgoff_t index = start / PAGE_SIZE;
233 	pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
234 
235 	_enter("{%llx:%llu},%llx @%llx",
236 	       vnode->fid.vid, vnode->fid.vnode, len, start);
237 
238 	do {
239 		_debug("redirty %llx @%llx", len, start);
240 
241 		folio = filemap_get_folio(mapping, index);
242 		if (!folio) {
243 			next = index + 1;
244 			continue;
245 		}
246 
247 		next = index + folio_nr_pages(folio);
248 		folio_redirty_for_writepage(wbc, folio);
249 		folio_end_writeback(folio);
250 		folio_put(folio);
251 	} while (index = next, index <= last);
252 
253 	_leave("");
254 }
255 
256 /*
257  * completion of write to server
258  */
259 static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
260 {
261 	struct address_space *mapping = vnode->vfs_inode.i_mapping;
262 	struct folio *folio;
263 	pgoff_t end;
264 
265 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
266 
267 	_enter("{%llx:%llu},{%x @%llx}",
268 	       vnode->fid.vid, vnode->fid.vnode, len, start);
269 
270 	rcu_read_lock();
271 
272 	end = (start + len - 1) / PAGE_SIZE;
273 	xas_for_each(&xas, folio, end) {
274 		if (!folio_test_writeback(folio)) {
275 			kdebug("bad %x @%llx page %lx %lx",
276 			       len, start, folio_index(folio), end);
277 			ASSERT(folio_test_writeback(folio));
278 		}
279 
280 		trace_afs_folio_dirty(vnode, tracepoint_string("clear"), folio);
281 		folio_detach_private(folio);
282 		folio_end_writeback(folio);
283 	}
284 
285 	rcu_read_unlock();
286 
287 	afs_prune_wb_keys(vnode);
288 	_leave("");
289 }
290 
291 /*
292  * Find a key to use for the writeback.  We cached the keys used to author the
293  * writes on the vnode.  *_wbk will contain the last writeback key used or NULL
294  * and we need to start from there if it's set.
295  */
296 static int afs_get_writeback_key(struct afs_vnode *vnode,
297 				 struct afs_wb_key **_wbk)
298 {
299 	struct afs_wb_key *wbk = NULL;
300 	struct list_head *p;
301 	int ret = -ENOKEY, ret2;
302 
303 	spin_lock(&vnode->wb_lock);
304 	if (*_wbk)
305 		p = (*_wbk)->vnode_link.next;
306 	else
307 		p = vnode->wb_keys.next;
308 
309 	while (p != &vnode->wb_keys) {
310 		wbk = list_entry(p, struct afs_wb_key, vnode_link);
311 		_debug("wbk %u", key_serial(wbk->key));
312 		ret2 = key_validate(wbk->key);
313 		if (ret2 == 0) {
314 			refcount_inc(&wbk->usage);
315 			_debug("USE WB KEY %u", key_serial(wbk->key));
316 			break;
317 		}
318 
319 		wbk = NULL;
320 		if (ret == -ENOKEY)
321 			ret = ret2;
322 		p = p->next;
323 	}
324 
325 	spin_unlock(&vnode->wb_lock);
326 	if (*_wbk)
327 		afs_put_wb_key(*_wbk);
328 	*_wbk = wbk;
329 	return 0;
330 }
331 
332 static void afs_store_data_success(struct afs_operation *op)
333 {
334 	struct afs_vnode *vnode = op->file[0].vnode;
335 
336 	op->ctime = op->file[0].scb.status.mtime_client;
337 	afs_vnode_commit_status(op, &op->file[0]);
338 	if (op->error == 0) {
339 		if (!op->store.laundering)
340 			afs_pages_written_back(vnode, op->store.pos, op->store.size);
341 		afs_stat_v(vnode, n_stores);
342 		atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes);
343 	}
344 }
345 
346 static const struct afs_operation_ops afs_store_data_operation = {
347 	.issue_afs_rpc	= afs_fs_store_data,
348 	.issue_yfs_rpc	= yfs_fs_store_data,
349 	.success	= afs_store_data_success,
350 };
351 
352 /*
353  * write to a file
354  */
355 static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos,
356 			  bool laundering)
357 {
358 	struct afs_operation *op;
359 	struct afs_wb_key *wbk = NULL;
360 	loff_t size = iov_iter_count(iter), i_size;
361 	int ret = -ENOKEY;
362 
363 	_enter("%s{%llx:%llu.%u},%llx,%llx",
364 	       vnode->volume->name,
365 	       vnode->fid.vid,
366 	       vnode->fid.vnode,
367 	       vnode->fid.unique,
368 	       size, pos);
369 
370 	ret = afs_get_writeback_key(vnode, &wbk);
371 	if (ret) {
372 		_leave(" = %d [no keys]", ret);
373 		return ret;
374 	}
375 
376 	op = afs_alloc_operation(wbk->key, vnode->volume);
377 	if (IS_ERR(op)) {
378 		afs_put_wb_key(wbk);
379 		return -ENOMEM;
380 	}
381 
382 	i_size = i_size_read(&vnode->vfs_inode);
383 
384 	afs_op_set_vnode(op, 0, vnode);
385 	op->file[0].dv_delta = 1;
386 	op->file[0].modification = true;
387 	op->store.write_iter = iter;
388 	op->store.pos = pos;
389 	op->store.size = size;
390 	op->store.i_size = max(pos + size, i_size);
391 	op->store.laundering = laundering;
392 	op->mtime = vnode->vfs_inode.i_mtime;
393 	op->flags |= AFS_OPERATION_UNINTR;
394 	op->ops = &afs_store_data_operation;
395 
396 try_next_key:
397 	afs_begin_vnode_operation(op);
398 	afs_wait_for_operation(op);
399 
400 	switch (op->error) {
401 	case -EACCES:
402 	case -EPERM:
403 	case -ENOKEY:
404 	case -EKEYEXPIRED:
405 	case -EKEYREJECTED:
406 	case -EKEYREVOKED:
407 		_debug("next");
408 
409 		ret = afs_get_writeback_key(vnode, &wbk);
410 		if (ret == 0) {
411 			key_put(op->key);
412 			op->key = key_get(wbk->key);
413 			goto try_next_key;
414 		}
415 		break;
416 	}
417 
418 	afs_put_wb_key(wbk);
419 	_leave(" = %d", op->error);
420 	return afs_put_operation(op);
421 }
422 
423 /*
424  * Extend the region to be written back to include subsequent contiguously
425  * dirty pages if possible, but don't sleep while doing so.
426  *
427  * If this page holds new content, then we can include filler zeros in the
428  * writeback.
429  */
430 static void afs_extend_writeback(struct address_space *mapping,
431 				 struct afs_vnode *vnode,
432 				 long *_count,
433 				 loff_t start,
434 				 loff_t max_len,
435 				 bool new_content,
436 				 bool caching,
437 				 unsigned int *_len)
438 {
439 	struct pagevec pvec;
440 	struct folio *folio;
441 	unsigned long priv;
442 	unsigned int psize, filler = 0;
443 	unsigned int f, t;
444 	loff_t len = *_len;
445 	pgoff_t index = (start + len) / PAGE_SIZE;
446 	bool stop = true;
447 	unsigned int i;
448 
449 	XA_STATE(xas, &mapping->i_pages, index);
450 	pagevec_init(&pvec);
451 
452 	do {
453 		/* Firstly, we gather up a batch of contiguous dirty pages
454 		 * under the RCU read lock - but we can't clear the dirty flags
455 		 * there if any of those pages are mapped.
456 		 */
457 		rcu_read_lock();
458 
459 		xas_for_each(&xas, folio, ULONG_MAX) {
460 			stop = true;
461 			if (xas_retry(&xas, folio))
462 				continue;
463 			if (xa_is_value(folio))
464 				break;
465 			if (folio_index(folio) != index)
466 				break;
467 
468 			if (!folio_try_get_rcu(folio)) {
469 				xas_reset(&xas);
470 				continue;
471 			}
472 
473 			/* Has the page moved or been split? */
474 			if (unlikely(folio != xas_reload(&xas))) {
475 				folio_put(folio);
476 				break;
477 			}
478 
479 			if (!folio_trylock(folio)) {
480 				folio_put(folio);
481 				break;
482 			}
483 			if (!folio_test_dirty(folio) ||
484 			    folio_test_writeback(folio) ||
485 			    folio_test_fscache(folio)) {
486 				folio_unlock(folio);
487 				folio_put(folio);
488 				break;
489 			}
490 
491 			psize = folio_size(folio);
492 			priv = (unsigned long)folio_get_private(folio);
493 			f = afs_folio_dirty_from(folio, priv);
494 			t = afs_folio_dirty_to(folio, priv);
495 			if (f != 0 && !new_content) {
496 				folio_unlock(folio);
497 				folio_put(folio);
498 				break;
499 			}
500 
501 			len += filler + t;
502 			filler = psize - t;
503 			if (len >= max_len || *_count <= 0)
504 				stop = true;
505 			else if (t == psize || new_content)
506 				stop = false;
507 
508 			index += folio_nr_pages(folio);
509 			if (!pagevec_add(&pvec, &folio->page))
510 				break;
511 			if (stop)
512 				break;
513 		}
514 
515 		if (!stop)
516 			xas_pause(&xas);
517 		rcu_read_unlock();
518 
519 		/* Now, if we obtained any pages, we can shift them to being
520 		 * writable and mark them for caching.
521 		 */
522 		if (!pagevec_count(&pvec))
523 			break;
524 
525 		for (i = 0; i < pagevec_count(&pvec); i++) {
526 			folio = page_folio(pvec.pages[i]);
527 			trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio);
528 
529 			if (!folio_clear_dirty_for_io(folio))
530 				BUG();
531 			if (folio_start_writeback(folio))
532 				BUG();
533 			afs_folio_start_fscache(caching, folio);
534 
535 			*_count -= folio_nr_pages(folio);
536 			folio_unlock(folio);
537 		}
538 
539 		pagevec_release(&pvec);
540 		cond_resched();
541 	} while (!stop);
542 
543 	*_len = len;
544 }
545 
546 /*
547  * Synchronously write back the locked page and any subsequent non-locked dirty
548  * pages.
549  */
550 static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
551 						struct writeback_control *wbc,
552 						struct folio *folio,
553 						loff_t start, loff_t end)
554 {
555 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
556 	struct iov_iter iter;
557 	unsigned long priv;
558 	unsigned int offset, to, len, max_len;
559 	loff_t i_size = i_size_read(&vnode->vfs_inode);
560 	bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
561 	bool caching = fscache_cookie_enabled(afs_vnode_cache(vnode));
562 	long count = wbc->nr_to_write;
563 	int ret;
564 
565 	_enter(",%lx,%llx-%llx", folio_index(folio), start, end);
566 
567 	if (folio_start_writeback(folio))
568 		BUG();
569 	afs_folio_start_fscache(caching, folio);
570 
571 	count -= folio_nr_pages(folio);
572 
573 	/* Find all consecutive lockable dirty pages that have contiguous
574 	 * written regions, stopping when we find a page that is not
575 	 * immediately lockable, is not dirty or is missing, or we reach the
576 	 * end of the range.
577 	 */
578 	priv = (unsigned long)folio_get_private(folio);
579 	offset = afs_folio_dirty_from(folio, priv);
580 	to = afs_folio_dirty_to(folio, priv);
581 	trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio);
582 
583 	len = to - offset;
584 	start += offset;
585 	if (start < i_size) {
586 		/* Trim the write to the EOF; the extra data is ignored.  Also
587 		 * put an upper limit on the size of a single storedata op.
588 		 */
589 		max_len = 65536 * 4096;
590 		max_len = min_t(unsigned long long, max_len, end - start + 1);
591 		max_len = min_t(unsigned long long, max_len, i_size - start);
592 
593 		if (len < max_len &&
594 		    (to == folio_size(folio) || new_content))
595 			afs_extend_writeback(mapping, vnode, &count,
596 					     start, max_len, new_content,
597 					     caching, &len);
598 		len = min_t(loff_t, len, max_len);
599 	}
600 
601 	/* We now have a contiguous set of dirty pages, each with writeback
602 	 * set; the first page is still locked at this point, but all the rest
603 	 * have been unlocked.
604 	 */
605 	folio_unlock(folio);
606 
607 	if (start < i_size) {
608 		_debug("write back %x @%llx [%llx]", len, start, i_size);
609 
610 		/* Speculatively write to the cache.  We have to fix this up
611 		 * later if the store fails.
612 		 */
613 		afs_write_to_cache(vnode, start, len, i_size, caching);
614 
615 		iov_iter_xarray(&iter, WRITE, &mapping->i_pages, start, len);
616 		ret = afs_store_data(vnode, &iter, start, false);
617 	} else {
618 		_debug("write discard %x @%llx [%llx]", len, start, i_size);
619 
620 		/* The dirty region was entirely beyond the EOF. */
621 		fscache_clear_page_bits(afs_vnode_cache(vnode),
622 					mapping, start, len, caching);
623 		afs_pages_written_back(vnode, start, len);
624 		ret = 0;
625 	}
626 
627 	switch (ret) {
628 	case 0:
629 		wbc->nr_to_write = count;
630 		ret = len;
631 		break;
632 
633 	default:
634 		pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret);
635 		fallthrough;
636 	case -EACCES:
637 	case -EPERM:
638 	case -ENOKEY:
639 	case -EKEYEXPIRED:
640 	case -EKEYREJECTED:
641 	case -EKEYREVOKED:
642 		afs_redirty_pages(wbc, mapping, start, len);
643 		mapping_set_error(mapping, ret);
644 		break;
645 
646 	case -EDQUOT:
647 	case -ENOSPC:
648 		afs_redirty_pages(wbc, mapping, start, len);
649 		mapping_set_error(mapping, -ENOSPC);
650 		break;
651 
652 	case -EROFS:
653 	case -EIO:
654 	case -EREMOTEIO:
655 	case -EFBIG:
656 	case -ENOENT:
657 	case -ENOMEDIUM:
658 	case -ENXIO:
659 		trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail);
660 		afs_kill_pages(mapping, start, len);
661 		mapping_set_error(mapping, ret);
662 		break;
663 	}
664 
665 	_leave(" = %d", ret);
666 	return ret;
667 }
668 
669 /*
670  * write a page back to the server
671  * - the caller locked the page for us
672  */
673 int afs_writepage(struct page *subpage, struct writeback_control *wbc)
674 {
675 	struct folio *folio = page_folio(subpage);
676 	ssize_t ret;
677 	loff_t start;
678 
679 	_enter("{%lx},", folio_index(folio));
680 
681 #ifdef CONFIG_AFS_FSCACHE
682 	folio_wait_fscache(folio);
683 #endif
684 
685 	start = folio_index(folio) * PAGE_SIZE;
686 	ret = afs_write_back_from_locked_folio(folio_mapping(folio), wbc,
687 					       folio, start, LLONG_MAX - start);
688 	if (ret < 0) {
689 		_leave(" = %zd", ret);
690 		return ret;
691 	}
692 
693 	_leave(" = 0");
694 	return 0;
695 }
696 
697 /*
698  * write a region of pages back to the server
699  */
700 static int afs_writepages_region(struct address_space *mapping,
701 				 struct writeback_control *wbc,
702 				 loff_t start, loff_t end, loff_t *_next)
703 {
704 	struct folio *folio;
705 	struct page *head_page;
706 	ssize_t ret;
707 	int n;
708 
709 	_enter("%llx,%llx,", start, end);
710 
711 	do {
712 		pgoff_t index = start / PAGE_SIZE;
713 
714 		n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
715 					     PAGECACHE_TAG_DIRTY, 1, &head_page);
716 		if (!n)
717 			break;
718 
719 		folio = page_folio(head_page);
720 		start = folio_pos(folio); /* May regress with THPs */
721 
722 		_debug("wback %lx", folio_index(folio));
723 
724 		/* At this point we hold neither the i_pages lock nor the
725 		 * page lock: the page may be truncated or invalidated
726 		 * (changing page->mapping to NULL), or even swizzled
727 		 * back from swapper_space to tmpfs file mapping
728 		 */
729 		if (wbc->sync_mode != WB_SYNC_NONE) {
730 			ret = folio_lock_killable(folio);
731 			if (ret < 0) {
732 				folio_put(folio);
733 				return ret;
734 			}
735 		} else {
736 			if (!folio_trylock(folio)) {
737 				folio_put(folio);
738 				return 0;
739 			}
740 		}
741 
742 		if (folio_mapping(folio) != mapping ||
743 		    !folio_test_dirty(folio)) {
744 			start += folio_size(folio);
745 			folio_unlock(folio);
746 			folio_put(folio);
747 			continue;
748 		}
749 
750 		if (folio_test_writeback(folio) ||
751 		    folio_test_fscache(folio)) {
752 			folio_unlock(folio);
753 			if (wbc->sync_mode != WB_SYNC_NONE) {
754 				folio_wait_writeback(folio);
755 #ifdef CONFIG_AFS_FSCACHE
756 				folio_wait_fscache(folio);
757 #endif
758 			}
759 			folio_put(folio);
760 			continue;
761 		}
762 
763 		if (!folio_clear_dirty_for_io(folio))
764 			BUG();
765 		ret = afs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
766 		folio_put(folio);
767 		if (ret < 0) {
768 			_leave(" = %zd", ret);
769 			return ret;
770 		}
771 
772 		start += ret;
773 
774 		cond_resched();
775 	} while (wbc->nr_to_write > 0);
776 
777 	*_next = start;
778 	_leave(" = 0 [%llx]", *_next);
779 	return 0;
780 }
781 
782 /*
783  * write some of the pending data back to the server
784  */
785 int afs_writepages(struct address_space *mapping,
786 		   struct writeback_control *wbc)
787 {
788 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
789 	loff_t start, next;
790 	int ret;
791 
792 	_enter("");
793 
794 	/* We have to be careful as we can end up racing with setattr()
795 	 * truncating the pagecache since the caller doesn't take a lock here
796 	 * to prevent it.
797 	 */
798 	if (wbc->sync_mode == WB_SYNC_ALL)
799 		down_read(&vnode->validate_lock);
800 	else if (!down_read_trylock(&vnode->validate_lock))
801 		return 0;
802 
803 	if (wbc->range_cyclic) {
804 		start = mapping->writeback_index * PAGE_SIZE;
805 		ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
806 		if (ret == 0) {
807 			mapping->writeback_index = next / PAGE_SIZE;
808 			if (start > 0 && wbc->nr_to_write > 0) {
809 				ret = afs_writepages_region(mapping, wbc, 0,
810 							    start, &next);
811 				if (ret == 0)
812 					mapping->writeback_index =
813 						next / PAGE_SIZE;
814 			}
815 		}
816 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
817 		ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
818 		if (wbc->nr_to_write > 0 && ret == 0)
819 			mapping->writeback_index = next / PAGE_SIZE;
820 	} else {
821 		ret = afs_writepages_region(mapping, wbc,
822 					    wbc->range_start, wbc->range_end, &next);
823 	}
824 
825 	up_read(&vnode->validate_lock);
826 	_leave(" = %d", ret);
827 	return ret;
828 }
829 
830 /*
831  * write to an AFS file
832  */
833 ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
834 {
835 	struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
836 	struct afs_file *af = iocb->ki_filp->private_data;
837 	ssize_t result;
838 	size_t count = iov_iter_count(from);
839 
840 	_enter("{%llx:%llu},{%zu},",
841 	       vnode->fid.vid, vnode->fid.vnode, count);
842 
843 	if (IS_SWAPFILE(&vnode->vfs_inode)) {
844 		printk(KERN_INFO
845 		       "AFS: Attempt to write to active swap file!\n");
846 		return -EBUSY;
847 	}
848 
849 	if (!count)
850 		return 0;
851 
852 	result = afs_validate(vnode, af->key);
853 	if (result < 0)
854 		return result;
855 
856 	result = generic_file_write_iter(iocb, from);
857 
858 	_leave(" = %zd", result);
859 	return result;
860 }
861 
862 /*
863  * flush any dirty pages for this process, and check for write errors.
864  * - the return status from this call provides a reliable indication of
865  *   whether any write errors occurred for this process.
866  */
867 int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
868 {
869 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
870 	struct afs_file *af = file->private_data;
871 	int ret;
872 
873 	_enter("{%llx:%llu},{n=%pD},%d",
874 	       vnode->fid.vid, vnode->fid.vnode, file,
875 	       datasync);
876 
877 	ret = afs_validate(vnode, af->key);
878 	if (ret < 0)
879 		return ret;
880 
881 	return file_write_and_wait_range(file, start, end);
882 }
883 
884 /*
885  * notification that a previously read-only page is about to become writable
886  * - if it returns an error, the caller will deliver a bus error signal
887  */
888 vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
889 {
890 	struct folio *folio = page_folio(vmf->page);
891 	struct file *file = vmf->vma->vm_file;
892 	struct inode *inode = file_inode(file);
893 	struct afs_vnode *vnode = AFS_FS_I(inode);
894 	struct afs_file *af = file->private_data;
895 	unsigned long priv;
896 	vm_fault_t ret = VM_FAULT_RETRY;
897 
898 	_enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
899 
900 	afs_validate(vnode, af->key);
901 
902 	sb_start_pagefault(inode->i_sb);
903 
904 	/* Wait for the page to be written to the cache before we allow it to
905 	 * be modified.  We then assume the entire page will need writing back.
906 	 */
907 #ifdef CONFIG_AFS_FSCACHE
908 	if (folio_test_fscache(folio) &&
909 	    folio_wait_fscache_killable(folio) < 0)
910 		goto out;
911 #endif
912 
913 	if (folio_wait_writeback_killable(folio))
914 		goto out;
915 
916 	if (folio_lock_killable(folio) < 0)
917 		goto out;
918 
919 	/* We mustn't change folio->private until writeback is complete as that
920 	 * details the portion of the page we need to write back and we might
921 	 * need to redirty the page if there's a problem.
922 	 */
923 	if (folio_wait_writeback_killable(folio) < 0) {
924 		folio_unlock(folio);
925 		goto out;
926 	}
927 
928 	priv = afs_folio_dirty(folio, 0, folio_size(folio));
929 	priv = afs_folio_dirty_mmapped(priv);
930 	if (folio_test_private(folio)) {
931 		folio_change_private(folio, (void *)priv);
932 		trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite+"), folio);
933 	} else {
934 		folio_attach_private(folio, (void *)priv);
935 		trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite"), folio);
936 	}
937 	file_update_time(file);
938 
939 	ret = VM_FAULT_LOCKED;
940 out:
941 	sb_end_pagefault(inode->i_sb);
942 	return ret;
943 }
944 
945 /*
946  * Prune the keys cached for writeback.  The caller must hold vnode->wb_lock.
947  */
948 void afs_prune_wb_keys(struct afs_vnode *vnode)
949 {
950 	LIST_HEAD(graveyard);
951 	struct afs_wb_key *wbk, *tmp;
952 
953 	/* Discard unused keys */
954 	spin_lock(&vnode->wb_lock);
955 
956 	if (!mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_WRITEBACK) &&
957 	    !mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_DIRTY)) {
958 		list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) {
959 			if (refcount_read(&wbk->usage) == 1)
960 				list_move(&wbk->vnode_link, &graveyard);
961 		}
962 	}
963 
964 	spin_unlock(&vnode->wb_lock);
965 
966 	while (!list_empty(&graveyard)) {
967 		wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link);
968 		list_del(&wbk->vnode_link);
969 		afs_put_wb_key(wbk);
970 	}
971 }
972 
973 /*
974  * Clean up a page during invalidation.
975  */
976 int afs_launder_folio(struct folio *folio)
977 {
978 	struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
979 	struct iov_iter iter;
980 	struct bio_vec bv[1];
981 	unsigned long priv;
982 	unsigned int f, t;
983 	int ret = 0;
984 
985 	_enter("{%lx}", folio->index);
986 
987 	priv = (unsigned long)folio_get_private(folio);
988 	if (folio_clear_dirty_for_io(folio)) {
989 		f = 0;
990 		t = folio_size(folio);
991 		if (folio_test_private(folio)) {
992 			f = afs_folio_dirty_from(folio, priv);
993 			t = afs_folio_dirty_to(folio, priv);
994 		}
995 
996 		bv[0].bv_page = &folio->page;
997 		bv[0].bv_offset = f;
998 		bv[0].bv_len = t - f;
999 		iov_iter_bvec(&iter, WRITE, bv, 1, bv[0].bv_len);
1000 
1001 		trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio);
1002 		ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true);
1003 	}
1004 
1005 	trace_afs_folio_dirty(vnode, tracepoint_string("laundered"), folio);
1006 	folio_detach_private(folio);
1007 	folio_wait_fscache(folio);
1008 	return ret;
1009 }
1010 
1011 /*
1012  * Deal with the completion of writing the data to the cache.
1013  */
1014 static void afs_write_to_cache_done(void *priv, ssize_t transferred_or_error,
1015 				    bool was_async)
1016 {
1017 	struct afs_vnode *vnode = priv;
1018 
1019 	if (IS_ERR_VALUE(transferred_or_error) &&
1020 	    transferred_or_error != -ENOBUFS)
1021 		afs_invalidate_cache(vnode, 0);
1022 }
1023 
1024 /*
1025  * Save the write to the cache also.
1026  */
1027 static void afs_write_to_cache(struct afs_vnode *vnode,
1028 			       loff_t start, size_t len, loff_t i_size,
1029 			       bool caching)
1030 {
1031 	fscache_write_to_cache(afs_vnode_cache(vnode),
1032 			       vnode->vfs_inode.i_mapping, start, len, i_size,
1033 			       afs_write_to_cache_done, vnode, caching);
1034 }
1035