xref: /openbmc/linux/fs/afs/write.c (revision 5cbf0398)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* handling of writes to regular files and writing back to the server
3  *
4  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/backing-dev.h>
9 #include <linux/slab.h>
10 #include <linux/fs.h>
11 #include <linux/pagemap.h>
12 #include <linux/writeback.h>
13 #include <linux/pagevec.h>
14 #include "internal.h"
15 
16 /*
17  * mark a page as having been made dirty and thus needing writeback
18  */
19 int afs_set_page_dirty(struct page *page)
20 {
21 	_enter("");
22 	return __set_page_dirty_nobuffers(page);
23 }
24 
25 /*
26  * Handle completion of a read operation to fill a page.
27  */
28 static void afs_fill_hole(struct afs_read *req)
29 {
30 	if (iov_iter_count(req->iter) > 0)
31 		/* The read was short - clear the excess buffer. */
32 		iov_iter_zero(iov_iter_count(req->iter), req->iter);
33 }
34 
35 /*
36  * partly or wholly fill a page that's under preparation for writing
37  */
38 static int afs_fill_page(struct file *file,
39 			 loff_t pos, unsigned int len, struct page *page)
40 {
41 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
42 	struct afs_read *req;
43 	size_t p;
44 	void *data;
45 	int ret;
46 
47 	_enter(",,%llu", (unsigned long long)pos);
48 
49 	if (pos >= vnode->vfs_inode.i_size) {
50 		p = pos & ~PAGE_MASK;
51 		ASSERTCMP(p + len, <=, PAGE_SIZE);
52 		data = kmap(page);
53 		memset(data + p, 0, len);
54 		kunmap(page);
55 		return 0;
56 	}
57 
58 	req = kzalloc(sizeof(struct afs_read), GFP_KERNEL);
59 	if (!req)
60 		return -ENOMEM;
61 
62 	refcount_set(&req->usage, 1);
63 	req->vnode	= vnode;
64 	req->done	= afs_fill_hole;
65 	req->key	= key_get(afs_file_key(file));
66 	req->pos	= pos;
67 	req->len	= len;
68 	req->nr_pages	= 1;
69 	req->iter	= &req->def_iter;
70 	iov_iter_xarray(&req->def_iter, READ, &file->f_mapping->i_pages, pos, len);
71 
72 	ret = afs_fetch_data(vnode, req);
73 	afs_put_read(req);
74 	if (ret < 0) {
75 		if (ret == -ENOENT) {
76 			_debug("got NOENT from server"
77 			       " - marking file deleted and stale");
78 			set_bit(AFS_VNODE_DELETED, &vnode->flags);
79 			ret = -ESTALE;
80 		}
81 	}
82 
83 	_leave(" = %d", ret);
84 	return ret;
85 }
86 
87 /*
88  * prepare to perform part of a write to a page
89  */
90 int afs_write_begin(struct file *file, struct address_space *mapping,
91 		    loff_t pos, unsigned len, unsigned flags,
92 		    struct page **_page, void **fsdata)
93 {
94 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
95 	struct page *page;
96 	unsigned long priv;
97 	unsigned f, from;
98 	unsigned t, to;
99 	pgoff_t index;
100 	int ret;
101 
102 	_enter("{%llx:%llu},%llx,%x",
103 	       vnode->fid.vid, vnode->fid.vnode, pos, len);
104 
105 	page = grab_cache_page_write_begin(mapping, pos / PAGE_SIZE, flags);
106 	if (!page)
107 		return -ENOMEM;
108 
109 	if (!PageUptodate(page) && len != PAGE_SIZE) {
110 		ret = afs_fill_page(file, pos & PAGE_MASK, PAGE_SIZE, page);
111 		if (ret < 0) {
112 			unlock_page(page);
113 			put_page(page);
114 			_leave(" = %d [prep]", ret);
115 			return ret;
116 		}
117 		SetPageUptodate(page);
118 	}
119 
120 #ifdef CONFIG_AFS_FSCACHE
121 	wait_on_page_fscache(page);
122 #endif
123 
124 	index = page->index;
125 	from = pos - index * PAGE_SIZE;
126 	to = from + len;
127 
128 try_again:
129 	/* See if this page is already partially written in a way that we can
130 	 * merge the new write with.
131 	 */
132 	if (PagePrivate(page)) {
133 		priv = page_private(page);
134 		f = afs_page_dirty_from(page, priv);
135 		t = afs_page_dirty_to(page, priv);
136 		ASSERTCMP(f, <=, t);
137 
138 		if (PageWriteback(page)) {
139 			trace_afs_page_dirty(vnode, tracepoint_string("alrdy"), page);
140 			goto flush_conflicting_write;
141 		}
142 		/* If the file is being filled locally, allow inter-write
143 		 * spaces to be merged into writes.  If it's not, only write
144 		 * back what the user gives us.
145 		 */
146 		if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) &&
147 		    (to < f || from > t))
148 			goto flush_conflicting_write;
149 	}
150 
151 	*_page = page;
152 	_leave(" = 0");
153 	return 0;
154 
155 	/* The previous write and this write aren't adjacent or overlapping, so
156 	 * flush the page out.
157 	 */
158 flush_conflicting_write:
159 	_debug("flush conflict");
160 	ret = write_one_page(page);
161 	if (ret < 0)
162 		goto error;
163 
164 	ret = lock_page_killable(page);
165 	if (ret < 0)
166 		goto error;
167 	goto try_again;
168 
169 error:
170 	put_page(page);
171 	_leave(" = %d", ret);
172 	return ret;
173 }
174 
175 /*
176  * finalise part of a write to a page
177  */
178 int afs_write_end(struct file *file, struct address_space *mapping,
179 		  loff_t pos, unsigned len, unsigned copied,
180 		  struct page *page, void *fsdata)
181 {
182 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
183 	unsigned long priv;
184 	unsigned int f, from = pos & (thp_size(page) - 1);
185 	unsigned int t, to = from + copied;
186 	loff_t i_size, maybe_i_size;
187 	int ret = 0;
188 
189 	_enter("{%llx:%llu},{%lx}",
190 	       vnode->fid.vid, vnode->fid.vnode, page->index);
191 
192 	if (copied == 0)
193 		goto out;
194 
195 	maybe_i_size = pos + copied;
196 
197 	i_size = i_size_read(&vnode->vfs_inode);
198 	if (maybe_i_size > i_size) {
199 		write_seqlock(&vnode->cb_lock);
200 		i_size = i_size_read(&vnode->vfs_inode);
201 		if (maybe_i_size > i_size)
202 			i_size_write(&vnode->vfs_inode, maybe_i_size);
203 		write_sequnlock(&vnode->cb_lock);
204 	}
205 
206 	if (!PageUptodate(page)) {
207 		if (copied < len) {
208 			/* Try and load any missing data from the server.  The
209 			 * unmarshalling routine will take care of clearing any
210 			 * bits that are beyond the EOF.
211 			 */
212 			ret = afs_fill_page(file, pos + copied,
213 					    len - copied, page);
214 			if (ret < 0)
215 				goto out;
216 		}
217 		SetPageUptodate(page);
218 	}
219 
220 	if (PagePrivate(page)) {
221 		priv = page_private(page);
222 		f = afs_page_dirty_from(page, priv);
223 		t = afs_page_dirty_to(page, priv);
224 		if (from < f)
225 			f = from;
226 		if (to > t)
227 			t = to;
228 		priv = afs_page_dirty(page, f, t);
229 		set_page_private(page, priv);
230 		trace_afs_page_dirty(vnode, tracepoint_string("dirty+"), page);
231 	} else {
232 		priv = afs_page_dirty(page, from, to);
233 		attach_page_private(page, (void *)priv);
234 		trace_afs_page_dirty(vnode, tracepoint_string("dirty"), page);
235 	}
236 
237 	if (set_page_dirty(page))
238 		_debug("dirtied %lx", page->index);
239 	ret = copied;
240 
241 out:
242 	unlock_page(page);
243 	put_page(page);
244 	return ret;
245 }
246 
247 /*
248  * kill all the pages in the given range
249  */
250 static void afs_kill_pages(struct address_space *mapping,
251 			   loff_t start, loff_t len)
252 {
253 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
254 	struct pagevec pv;
255 	unsigned int loop, psize;
256 
257 	_enter("{%llx:%llu},%llx @%llx",
258 	       vnode->fid.vid, vnode->fid.vnode, len, start);
259 
260 	pagevec_init(&pv);
261 
262 	do {
263 		_debug("kill %llx @%llx", len, start);
264 
265 		pv.nr = find_get_pages_contig(mapping, start / PAGE_SIZE,
266 					      PAGEVEC_SIZE, pv.pages);
267 		if (pv.nr == 0)
268 			break;
269 
270 		for (loop = 0; loop < pv.nr; loop++) {
271 			struct page *page = pv.pages[loop];
272 
273 			if (page->index * PAGE_SIZE >= start + len)
274 				break;
275 
276 			psize = thp_size(page);
277 			start += psize;
278 			len -= psize;
279 			ClearPageUptodate(page);
280 			end_page_writeback(page);
281 			lock_page(page);
282 			generic_error_remove_page(mapping, page);
283 			unlock_page(page);
284 		}
285 
286 		__pagevec_release(&pv);
287 	} while (len > 0);
288 
289 	_leave("");
290 }
291 
292 /*
293  * Redirty all the pages in a given range.
294  */
295 static void afs_redirty_pages(struct writeback_control *wbc,
296 			      struct address_space *mapping,
297 			      loff_t start, loff_t len)
298 {
299 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
300 	struct pagevec pv;
301 	unsigned int loop, psize;
302 
303 	_enter("{%llx:%llu},%llx @%llx",
304 	       vnode->fid.vid, vnode->fid.vnode, len, start);
305 
306 	pagevec_init(&pv);
307 
308 	do {
309 		_debug("redirty %llx @%llx", len, start);
310 
311 		pv.nr = find_get_pages_contig(mapping, start / PAGE_SIZE,
312 					      PAGEVEC_SIZE, pv.pages);
313 		if (pv.nr == 0)
314 			break;
315 
316 		for (loop = 0; loop < pv.nr; loop++) {
317 			struct page *page = pv.pages[loop];
318 
319 			if (page->index * PAGE_SIZE >= start + len)
320 				break;
321 
322 			psize = thp_size(page);
323 			start += psize;
324 			len -= psize;
325 			redirty_page_for_writepage(wbc, page);
326 			end_page_writeback(page);
327 		}
328 
329 		__pagevec_release(&pv);
330 	} while (len > 0);
331 
332 	_leave("");
333 }
334 
335 /*
336  * completion of write to server
337  */
338 static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
339 {
340 	struct address_space *mapping = vnode->vfs_inode.i_mapping;
341 	struct page *page;
342 	pgoff_t end;
343 
344 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
345 
346 	_enter("{%llx:%llu},{%x @%llx}",
347 	       vnode->fid.vid, vnode->fid.vnode, len, start);
348 
349 	rcu_read_lock();
350 
351 	end = (start + len - 1) / PAGE_SIZE;
352 	xas_for_each(&xas, page, end) {
353 		if (!PageWriteback(page)) {
354 			kdebug("bad %x @%llx page %lx %lx", len, start, page->index, end);
355 			ASSERT(PageWriteback(page));
356 		}
357 
358 		trace_afs_page_dirty(vnode, tracepoint_string("clear"), page);
359 		detach_page_private(page);
360 		page_endio(page, true, 0);
361 	}
362 
363 	rcu_read_unlock();
364 
365 	afs_prune_wb_keys(vnode);
366 	_leave("");
367 }
368 
369 /*
370  * Find a key to use for the writeback.  We cached the keys used to author the
371  * writes on the vnode.  *_wbk will contain the last writeback key used or NULL
372  * and we need to start from there if it's set.
373  */
374 static int afs_get_writeback_key(struct afs_vnode *vnode,
375 				 struct afs_wb_key **_wbk)
376 {
377 	struct afs_wb_key *wbk = NULL;
378 	struct list_head *p;
379 	int ret = -ENOKEY, ret2;
380 
381 	spin_lock(&vnode->wb_lock);
382 	if (*_wbk)
383 		p = (*_wbk)->vnode_link.next;
384 	else
385 		p = vnode->wb_keys.next;
386 
387 	while (p != &vnode->wb_keys) {
388 		wbk = list_entry(p, struct afs_wb_key, vnode_link);
389 		_debug("wbk %u", key_serial(wbk->key));
390 		ret2 = key_validate(wbk->key);
391 		if (ret2 == 0) {
392 			refcount_inc(&wbk->usage);
393 			_debug("USE WB KEY %u", key_serial(wbk->key));
394 			break;
395 		}
396 
397 		wbk = NULL;
398 		if (ret == -ENOKEY)
399 			ret = ret2;
400 		p = p->next;
401 	}
402 
403 	spin_unlock(&vnode->wb_lock);
404 	if (*_wbk)
405 		afs_put_wb_key(*_wbk);
406 	*_wbk = wbk;
407 	return 0;
408 }
409 
410 static void afs_store_data_success(struct afs_operation *op)
411 {
412 	struct afs_vnode *vnode = op->file[0].vnode;
413 
414 	op->ctime = op->file[0].scb.status.mtime_client;
415 	afs_vnode_commit_status(op, &op->file[0]);
416 	if (op->error == 0) {
417 		if (!op->store.laundering)
418 			afs_pages_written_back(vnode, op->store.pos, op->store.size);
419 		afs_stat_v(vnode, n_stores);
420 		atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes);
421 	}
422 }
423 
424 static const struct afs_operation_ops afs_store_data_operation = {
425 	.issue_afs_rpc	= afs_fs_store_data,
426 	.issue_yfs_rpc	= yfs_fs_store_data,
427 	.success	= afs_store_data_success,
428 };
429 
430 /*
431  * write to a file
432  */
433 static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos,
434 			  bool laundering)
435 {
436 	struct afs_operation *op;
437 	struct afs_wb_key *wbk = NULL;
438 	loff_t size = iov_iter_count(iter), i_size;
439 	int ret = -ENOKEY;
440 
441 	_enter("%s{%llx:%llu.%u},%llx,%llx",
442 	       vnode->volume->name,
443 	       vnode->fid.vid,
444 	       vnode->fid.vnode,
445 	       vnode->fid.unique,
446 	       size, pos);
447 
448 	ret = afs_get_writeback_key(vnode, &wbk);
449 	if (ret) {
450 		_leave(" = %d [no keys]", ret);
451 		return ret;
452 	}
453 
454 	op = afs_alloc_operation(wbk->key, vnode->volume);
455 	if (IS_ERR(op)) {
456 		afs_put_wb_key(wbk);
457 		return -ENOMEM;
458 	}
459 
460 	i_size = i_size_read(&vnode->vfs_inode);
461 
462 	afs_op_set_vnode(op, 0, vnode);
463 	op->file[0].dv_delta = 1;
464 	op->store.write_iter = iter;
465 	op->store.pos = pos;
466 	op->store.size = size;
467 	op->store.i_size = max(pos + size, i_size);
468 	op->store.laundering = laundering;
469 	op->mtime = vnode->vfs_inode.i_mtime;
470 	op->flags |= AFS_OPERATION_UNINTR;
471 	op->ops = &afs_store_data_operation;
472 
473 try_next_key:
474 	afs_begin_vnode_operation(op);
475 	afs_wait_for_operation(op);
476 
477 	switch (op->error) {
478 	case -EACCES:
479 	case -EPERM:
480 	case -ENOKEY:
481 	case -EKEYEXPIRED:
482 	case -EKEYREJECTED:
483 	case -EKEYREVOKED:
484 		_debug("next");
485 
486 		ret = afs_get_writeback_key(vnode, &wbk);
487 		if (ret == 0) {
488 			key_put(op->key);
489 			op->key = key_get(wbk->key);
490 			goto try_next_key;
491 		}
492 		break;
493 	}
494 
495 	afs_put_wb_key(wbk);
496 	_leave(" = %d", op->error);
497 	return afs_put_operation(op);
498 }
499 
500 /*
501  * Extend the region to be written back to include subsequent contiguously
502  * dirty pages if possible, but don't sleep while doing so.
503  *
504  * If this page holds new content, then we can include filler zeros in the
505  * writeback.
506  */
507 static void afs_extend_writeback(struct address_space *mapping,
508 				 struct afs_vnode *vnode,
509 				 long *_count,
510 				 loff_t start,
511 				 loff_t max_len,
512 				 bool new_content,
513 				 unsigned int *_len)
514 {
515 	struct pagevec pvec;
516 	struct page *page;
517 	unsigned long priv;
518 	unsigned int psize, filler = 0;
519 	unsigned int f, t;
520 	loff_t len = *_len;
521 	pgoff_t index = (start + len) / PAGE_SIZE;
522 	bool stop = true;
523 	unsigned int i;
524 
525 	XA_STATE(xas, &mapping->i_pages, index);
526 	pagevec_init(&pvec);
527 
528 	do {
529 		/* Firstly, we gather up a batch of contiguous dirty pages
530 		 * under the RCU read lock - but we can't clear the dirty flags
531 		 * there if any of those pages are mapped.
532 		 */
533 		rcu_read_lock();
534 
535 		xas_for_each(&xas, page, ULONG_MAX) {
536 			stop = true;
537 			if (xas_retry(&xas, page))
538 				continue;
539 			if (xa_is_value(page))
540 				break;
541 			if (page->index != index)
542 				break;
543 
544 			if (!page_cache_get_speculative(page)) {
545 				xas_reset(&xas);
546 				continue;
547 			}
548 
549 			/* Has the page moved or been split? */
550 			if (unlikely(page != xas_reload(&xas)))
551 				break;
552 
553 			if (!trylock_page(page))
554 				break;
555 			if (!PageDirty(page) || PageWriteback(page)) {
556 				unlock_page(page);
557 				break;
558 			}
559 
560 			psize = thp_size(page);
561 			priv = page_private(page);
562 			f = afs_page_dirty_from(page, priv);
563 			t = afs_page_dirty_to(page, priv);
564 			if (f != 0 && !new_content) {
565 				unlock_page(page);
566 				break;
567 			}
568 
569 			len += filler + t;
570 			filler = psize - t;
571 			if (len >= max_len || *_count <= 0)
572 				stop = true;
573 			else if (t == psize || new_content)
574 				stop = false;
575 
576 			index += thp_nr_pages(page);
577 			if (!pagevec_add(&pvec, page))
578 				break;
579 			if (stop)
580 				break;
581 		}
582 
583 		if (!stop)
584 			xas_pause(&xas);
585 		rcu_read_unlock();
586 
587 		/* Now, if we obtained any pages, we can shift them to being
588 		 * writable and mark them for caching.
589 		 */
590 		if (!pagevec_count(&pvec))
591 			break;
592 
593 		for (i = 0; i < pagevec_count(&pvec); i++) {
594 			page = pvec.pages[i];
595 			trace_afs_page_dirty(vnode, tracepoint_string("store+"), page);
596 
597 			if (!clear_page_dirty_for_io(page))
598 				BUG();
599 			if (test_set_page_writeback(page))
600 				BUG();
601 
602 			*_count -= thp_nr_pages(page);
603 			unlock_page(page);
604 		}
605 
606 		pagevec_release(&pvec);
607 		cond_resched();
608 	} while (!stop);
609 
610 	*_len = len;
611 }
612 
613 /*
614  * Synchronously write back the locked page and any subsequent non-locked dirty
615  * pages.
616  */
617 static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
618 					       struct writeback_control *wbc,
619 					       struct page *page,
620 					       loff_t start, loff_t end)
621 {
622 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
623 	struct iov_iter iter;
624 	unsigned long priv;
625 	unsigned int offset, to, len, max_len;
626 	loff_t i_size = i_size_read(&vnode->vfs_inode);
627 	bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
628 	long count = wbc->nr_to_write;
629 	int ret;
630 
631 	_enter(",%lx,%llx-%llx", page->index, start, end);
632 
633 	if (test_set_page_writeback(page))
634 		BUG();
635 
636 	count -= thp_nr_pages(page);
637 
638 	/* Find all consecutive lockable dirty pages that have contiguous
639 	 * written regions, stopping when we find a page that is not
640 	 * immediately lockable, is not dirty or is missing, or we reach the
641 	 * end of the range.
642 	 */
643 	priv = page_private(page);
644 	offset = afs_page_dirty_from(page, priv);
645 	to = afs_page_dirty_to(page, priv);
646 	trace_afs_page_dirty(vnode, tracepoint_string("store"), page);
647 
648 	len = to - offset;
649 	start += offset;
650 	if (start < i_size) {
651 		/* Trim the write to the EOF; the extra data is ignored.  Also
652 		 * put an upper limit on the size of a single storedata op.
653 		 */
654 		max_len = 65536 * 4096;
655 		max_len = min_t(unsigned long long, max_len, end - start + 1);
656 		max_len = min_t(unsigned long long, max_len, i_size - start);
657 
658 		if (len < max_len &&
659 		    (to == thp_size(page) || new_content))
660 			afs_extend_writeback(mapping, vnode, &count,
661 					     start, max_len, new_content, &len);
662 		len = min_t(loff_t, len, max_len);
663 	}
664 
665 	/* We now have a contiguous set of dirty pages, each with writeback
666 	 * set; the first page is still locked at this point, but all the rest
667 	 * have been unlocked.
668 	 */
669 	unlock_page(page);
670 
671 	if (start < i_size) {
672 		_debug("write back %x @%llx [%llx]", len, start, i_size);
673 
674 		iov_iter_xarray(&iter, WRITE, &mapping->i_pages, start, len);
675 		ret = afs_store_data(vnode, &iter, start, false);
676 	} else {
677 		_debug("write discard %x @%llx [%llx]", len, start, i_size);
678 
679 		/* The dirty region was entirely beyond the EOF. */
680 		afs_pages_written_back(vnode, start, len);
681 		ret = 0;
682 	}
683 
684 	switch (ret) {
685 	case 0:
686 		wbc->nr_to_write = count;
687 		ret = len;
688 		break;
689 
690 	default:
691 		pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret);
692 		fallthrough;
693 	case -EACCES:
694 	case -EPERM:
695 	case -ENOKEY:
696 	case -EKEYEXPIRED:
697 	case -EKEYREJECTED:
698 	case -EKEYREVOKED:
699 		afs_redirty_pages(wbc, mapping, start, len);
700 		mapping_set_error(mapping, ret);
701 		break;
702 
703 	case -EDQUOT:
704 	case -ENOSPC:
705 		afs_redirty_pages(wbc, mapping, start, len);
706 		mapping_set_error(mapping, -ENOSPC);
707 		break;
708 
709 	case -EROFS:
710 	case -EIO:
711 	case -EREMOTEIO:
712 	case -EFBIG:
713 	case -ENOENT:
714 	case -ENOMEDIUM:
715 	case -ENXIO:
716 		trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail);
717 		afs_kill_pages(mapping, start, len);
718 		mapping_set_error(mapping, ret);
719 		break;
720 	}
721 
722 	_leave(" = %d", ret);
723 	return ret;
724 }
725 
726 /*
727  * write a page back to the server
728  * - the caller locked the page for us
729  */
730 int afs_writepage(struct page *page, struct writeback_control *wbc)
731 {
732 	ssize_t ret;
733 	loff_t start;
734 
735 	_enter("{%lx},", page->index);
736 
737 	start = page->index * PAGE_SIZE;
738 	ret = afs_write_back_from_locked_page(page->mapping, wbc, page,
739 					      start, LLONG_MAX - start);
740 	if (ret < 0) {
741 		_leave(" = %zd", ret);
742 		return ret;
743 	}
744 
745 	_leave(" = 0");
746 	return 0;
747 }
748 
749 /*
750  * write a region of pages back to the server
751  */
752 static int afs_writepages_region(struct address_space *mapping,
753 				 struct writeback_control *wbc,
754 				 loff_t start, loff_t end, loff_t *_next)
755 {
756 	struct page *page;
757 	ssize_t ret;
758 	int n;
759 
760 	_enter("%llx,%llx,", start, end);
761 
762 	do {
763 		pgoff_t index = start / PAGE_SIZE;
764 
765 		n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
766 					     PAGECACHE_TAG_DIRTY, 1, &page);
767 		if (!n)
768 			break;
769 
770 		start = (loff_t)page->index * PAGE_SIZE; /* May regress with THPs */
771 
772 		_debug("wback %lx", page->index);
773 
774 		/* At this point we hold neither the i_pages lock nor the
775 		 * page lock: the page may be truncated or invalidated
776 		 * (changing page->mapping to NULL), or even swizzled
777 		 * back from swapper_space to tmpfs file mapping
778 		 */
779 		if (wbc->sync_mode != WB_SYNC_NONE) {
780 			ret = lock_page_killable(page);
781 			if (ret < 0) {
782 				put_page(page);
783 				return ret;
784 			}
785 		} else {
786 			if (!trylock_page(page)) {
787 				put_page(page);
788 				return 0;
789 			}
790 		}
791 
792 		if (page->mapping != mapping || !PageDirty(page)) {
793 			start += thp_size(page);
794 			unlock_page(page);
795 			put_page(page);
796 			continue;
797 		}
798 
799 		if (PageWriteback(page)) {
800 			unlock_page(page);
801 			if (wbc->sync_mode != WB_SYNC_NONE)
802 				wait_on_page_writeback(page);
803 			put_page(page);
804 			continue;
805 		}
806 
807 		if (!clear_page_dirty_for_io(page))
808 			BUG();
809 		ret = afs_write_back_from_locked_page(mapping, wbc, page, start, end);
810 		put_page(page);
811 		if (ret < 0) {
812 			_leave(" = %zd", ret);
813 			return ret;
814 		}
815 
816 		start += ret * PAGE_SIZE;
817 
818 		cond_resched();
819 	} while (wbc->nr_to_write > 0);
820 
821 	*_next = start;
822 	_leave(" = 0 [%llx]", *_next);
823 	return 0;
824 }
825 
826 /*
827  * write some of the pending data back to the server
828  */
829 int afs_writepages(struct address_space *mapping,
830 		   struct writeback_control *wbc)
831 {
832 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
833 	loff_t start, next;
834 	int ret;
835 
836 	_enter("");
837 
838 	/* We have to be careful as we can end up racing with setattr()
839 	 * truncating the pagecache since the caller doesn't take a lock here
840 	 * to prevent it.
841 	 */
842 	if (wbc->sync_mode == WB_SYNC_ALL)
843 		down_read(&vnode->validate_lock);
844 	else if (!down_read_trylock(&vnode->validate_lock))
845 		return 0;
846 
847 	if (wbc->range_cyclic) {
848 		start = mapping->writeback_index * PAGE_SIZE;
849 		ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
850 		if (start > 0 && wbc->nr_to_write > 0 && ret == 0)
851 			ret = afs_writepages_region(mapping, wbc, 0, start,
852 						    &next);
853 		mapping->writeback_index = next / PAGE_SIZE;
854 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
855 		ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
856 		if (wbc->nr_to_write > 0)
857 			mapping->writeback_index = next;
858 	} else {
859 		ret = afs_writepages_region(mapping, wbc,
860 					    wbc->range_start, wbc->range_end, &next);
861 	}
862 
863 	up_read(&vnode->validate_lock);
864 	_leave(" = %d", ret);
865 	return ret;
866 }
867 
868 /*
869  * write to an AFS file
870  */
871 ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
872 {
873 	struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
874 	ssize_t result;
875 	size_t count = iov_iter_count(from);
876 
877 	_enter("{%llx:%llu},{%zu},",
878 	       vnode->fid.vid, vnode->fid.vnode, count);
879 
880 	if (IS_SWAPFILE(&vnode->vfs_inode)) {
881 		printk(KERN_INFO
882 		       "AFS: Attempt to write to active swap file!\n");
883 		return -EBUSY;
884 	}
885 
886 	if (!count)
887 		return 0;
888 
889 	result = generic_file_write_iter(iocb, from);
890 
891 	_leave(" = %zd", result);
892 	return result;
893 }
894 
895 /*
896  * flush any dirty pages for this process, and check for write errors.
897  * - the return status from this call provides a reliable indication of
898  *   whether any write errors occurred for this process.
899  */
900 int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
901 {
902 	struct inode *inode = file_inode(file);
903 	struct afs_vnode *vnode = AFS_FS_I(inode);
904 
905 	_enter("{%llx:%llu},{n=%pD},%d",
906 	       vnode->fid.vid, vnode->fid.vnode, file,
907 	       datasync);
908 
909 	return file_write_and_wait_range(file, start, end);
910 }
911 
912 /*
913  * notification that a previously read-only page is about to become writable
914  * - if it returns an error, the caller will deliver a bus error signal
915  */
916 vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
917 {
918 	struct page *page = thp_head(vmf->page);
919 	struct file *file = vmf->vma->vm_file;
920 	struct inode *inode = file_inode(file);
921 	struct afs_vnode *vnode = AFS_FS_I(inode);
922 	unsigned long priv;
923 
924 	_enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index);
925 
926 	sb_start_pagefault(inode->i_sb);
927 
928 	/* Wait for the page to be written to the cache before we allow it to
929 	 * be modified.  We then assume the entire page will need writing back.
930 	 */
931 #ifdef CONFIG_AFS_FSCACHE
932 	if (PageFsCache(page) &&
933 	    wait_on_page_fscache_killable(page) < 0)
934 		return VM_FAULT_RETRY;
935 #endif
936 
937 	if (wait_on_page_writeback_killable(page))
938 		return VM_FAULT_RETRY;
939 
940 	if (lock_page_killable(page) < 0)
941 		return VM_FAULT_RETRY;
942 
943 	/* We mustn't change page->private until writeback is complete as that
944 	 * details the portion of the page we need to write back and we might
945 	 * need to redirty the page if there's a problem.
946 	 */
947 	if (wait_on_page_writeback_killable(page) < 0) {
948 		unlock_page(page);
949 		return VM_FAULT_RETRY;
950 	}
951 
952 	priv = afs_page_dirty(page, 0, thp_size(page));
953 	priv = afs_page_dirty_mmapped(priv);
954 	if (PagePrivate(page)) {
955 		set_page_private(page, priv);
956 		trace_afs_page_dirty(vnode, tracepoint_string("mkwrite+"), page);
957 	} else {
958 		attach_page_private(page, (void *)priv);
959 		trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), page);
960 	}
961 	file_update_time(file);
962 
963 	sb_end_pagefault(inode->i_sb);
964 	return VM_FAULT_LOCKED;
965 }
966 
967 /*
968  * Prune the keys cached for writeback.  The caller must hold vnode->wb_lock.
969  */
970 void afs_prune_wb_keys(struct afs_vnode *vnode)
971 {
972 	LIST_HEAD(graveyard);
973 	struct afs_wb_key *wbk, *tmp;
974 
975 	/* Discard unused keys */
976 	spin_lock(&vnode->wb_lock);
977 
978 	if (!mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_WRITEBACK) &&
979 	    !mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_DIRTY)) {
980 		list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) {
981 			if (refcount_read(&wbk->usage) == 1)
982 				list_move(&wbk->vnode_link, &graveyard);
983 		}
984 	}
985 
986 	spin_unlock(&vnode->wb_lock);
987 
988 	while (!list_empty(&graveyard)) {
989 		wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link);
990 		list_del(&wbk->vnode_link);
991 		afs_put_wb_key(wbk);
992 	}
993 }
994 
995 /*
996  * Clean up a page during invalidation.
997  */
998 int afs_launder_page(struct page *page)
999 {
1000 	struct address_space *mapping = page->mapping;
1001 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
1002 	struct iov_iter iter;
1003 	struct bio_vec bv[1];
1004 	unsigned long priv;
1005 	unsigned int f, t;
1006 	int ret = 0;
1007 
1008 	_enter("{%lx}", page->index);
1009 
1010 	priv = page_private(page);
1011 	if (clear_page_dirty_for_io(page)) {
1012 		f = 0;
1013 		t = thp_size(page);
1014 		if (PagePrivate(page)) {
1015 			f = afs_page_dirty_from(page, priv);
1016 			t = afs_page_dirty_to(page, priv);
1017 		}
1018 
1019 		bv[0].bv_page = page;
1020 		bv[0].bv_offset = f;
1021 		bv[0].bv_len = t - f;
1022 		iov_iter_bvec(&iter, WRITE, bv, 1, bv[0].bv_len);
1023 
1024 		trace_afs_page_dirty(vnode, tracepoint_string("launder"), page);
1025 		ret = afs_store_data(vnode, &iter, (loff_t)page->index * PAGE_SIZE,
1026 				     true);
1027 	}
1028 
1029 	trace_afs_page_dirty(vnode, tracepoint_string("laundered"), page);
1030 	detach_page_private(page);
1031 	wait_on_page_fscache(page);
1032 	return ret;
1033 }
1034