xref: /openbmc/linux/fs/gfs2/aops.c (revision 64bc06bb)
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9 
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/mpage.h>
18 #include <linux/fs.h>
19 #include <linux/writeback.h>
20 #include <linux/swap.h>
21 #include <linux/gfs2_ondisk.h>
22 #include <linux/backing-dev.h>
23 #include <linux/uio.h>
24 #include <trace/events/writeback.h>
25 #include <linux/sched/signal.h>
26 
27 #include "gfs2.h"
28 #include "incore.h"
29 #include "bmap.h"
30 #include "glock.h"
31 #include "inode.h"
32 #include "log.h"
33 #include "meta_io.h"
34 #include "quota.h"
35 #include "trans.h"
36 #include "rgrp.h"
37 #include "super.h"
38 #include "util.h"
39 #include "glops.h"
40 #include "aops.h"
41 
42 
43 void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
44 			    unsigned int from, unsigned int len)
45 {
46 	struct buffer_head *head = page_buffers(page);
47 	unsigned int bsize = head->b_size;
48 	struct buffer_head *bh;
49 	unsigned int to = from + len;
50 	unsigned int start, end;
51 
52 	for (bh = head, start = 0; bh != head || !start;
53 	     bh = bh->b_this_page, start = end) {
54 		end = start + bsize;
55 		if (end <= from)
56 			continue;
57 		if (start >= to)
58 			break;
59 		set_buffer_uptodate(bh);
60 		gfs2_trans_add_data(ip->i_gl, bh);
61 	}
62 }
63 
64 /**
65  * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
66  * @inode: The inode
67  * @lblock: The block number to look up
68  * @bh_result: The buffer head to return the result in
69  * @create: Non-zero if we may add block to the file
70  *
71  * Returns: errno
72  */
73 
74 static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
75 				  struct buffer_head *bh_result, int create)
76 {
77 	int error;
78 
79 	error = gfs2_block_map(inode, lblock, bh_result, 0);
80 	if (error)
81 		return error;
82 	if (!buffer_mapped(bh_result))
83 		return -EIO;
84 	return 0;
85 }
86 
87 static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
88 				 struct buffer_head *bh_result, int create)
89 {
90 	return gfs2_block_map(inode, lblock, bh_result, 0);
91 }
92 
93 /**
94  * gfs2_writepage_common - Common bits of writepage
95  * @page: The page to be written
96  * @wbc: The writeback control
97  *
98  * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
99  */
100 
101 static int gfs2_writepage_common(struct page *page,
102 				 struct writeback_control *wbc)
103 {
104 	struct inode *inode = page->mapping->host;
105 	struct gfs2_inode *ip = GFS2_I(inode);
106 	struct gfs2_sbd *sdp = GFS2_SB(inode);
107 	loff_t i_size = i_size_read(inode);
108 	pgoff_t end_index = i_size >> PAGE_SHIFT;
109 	unsigned offset;
110 
111 	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
112 		goto out;
113 	if (current->journal_info)
114 		goto redirty;
115 	/* Is the page fully outside i_size? (truncate in progress) */
116 	offset = i_size & (PAGE_SIZE-1);
117 	if (page->index > end_index || (page->index == end_index && !offset)) {
118 		page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
119 		goto out;
120 	}
121 	return 1;
122 redirty:
123 	redirty_page_for_writepage(wbc, page);
124 out:
125 	unlock_page(page);
126 	return 0;
127 }
128 
129 /**
130  * gfs2_writepage - Write page for writeback mappings
131  * @page: The page
132  * @wbc: The writeback control
133  *
134  */
135 
136 static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
137 {
138 	int ret;
139 
140 	ret = gfs2_writepage_common(page, wbc);
141 	if (ret <= 0)
142 		return ret;
143 
144 	return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
145 }
146 
147 /* This is the same as calling block_write_full_page, but it also
148  * writes pages outside of i_size
149  */
150 static int gfs2_write_full_page(struct page *page, get_block_t *get_block,
151 				struct writeback_control *wbc)
152 {
153 	struct inode * const inode = page->mapping->host;
154 	loff_t i_size = i_size_read(inode);
155 	const pgoff_t end_index = i_size >> PAGE_SHIFT;
156 	unsigned offset;
157 
158 	/*
159 	 * The page straddles i_size.  It must be zeroed out on each and every
160 	 * writepage invocation because it may be mmapped.  "A file is mapped
161 	 * in multiples of the page size.  For a file that is not a multiple of
162 	 * the  page size, the remaining memory is zeroed when mapped, and
163 	 * writes to that region are not written out to the file."
164 	 */
165 	offset = i_size & (PAGE_SIZE-1);
166 	if (page->index == end_index && offset)
167 		zero_user_segment(page, offset, PAGE_SIZE);
168 
169 	return __block_write_full_page(inode, page, get_block, wbc,
170 				       end_buffer_async_write);
171 }
172 
173 /**
174  * __gfs2_jdata_writepage - The core of jdata writepage
175  * @page: The page to write
176  * @wbc: The writeback control
177  *
178  * This is shared between writepage and writepages and implements the
179  * core of the writepage operation. If a transaction is required then
180  * PageChecked will have been set and the transaction will have
181  * already been started before this is called.
182  */
183 
184 static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
185 {
186 	struct inode *inode = page->mapping->host;
187 	struct gfs2_inode *ip = GFS2_I(inode);
188 	struct gfs2_sbd *sdp = GFS2_SB(inode);
189 
190 	if (PageChecked(page)) {
191 		ClearPageChecked(page);
192 		if (!page_has_buffers(page)) {
193 			create_empty_buffers(page, inode->i_sb->s_blocksize,
194 					     BIT(BH_Dirty)|BIT(BH_Uptodate));
195 		}
196 		gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize);
197 	}
198 	return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
199 }
200 
201 /**
202  * gfs2_jdata_writepage - Write complete page
203  * @page: Page to write
204  * @wbc: The writeback control
205  *
206  * Returns: errno
207  *
208  */
209 
210 static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
211 {
212 	struct inode *inode = page->mapping->host;
213 	struct gfs2_inode *ip = GFS2_I(inode);
214 	struct gfs2_sbd *sdp = GFS2_SB(inode);
215 	int ret;
216 
217 	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
218 		goto out;
219 	if (PageChecked(page) || current->journal_info)
220 		goto out_ignore;
221 	ret = __gfs2_jdata_writepage(page, wbc);
222 	return ret;
223 
224 out_ignore:
225 	redirty_page_for_writepage(wbc, page);
226 out:
227 	unlock_page(page);
228 	return 0;
229 }
230 
231 /**
232  * gfs2_writepages - Write a bunch of dirty pages back to disk
233  * @mapping: The mapping to write
234  * @wbc: Write-back control
235  *
236  * Used for both ordered and writeback modes.
237  */
238 static int gfs2_writepages(struct address_space *mapping,
239 			   struct writeback_control *wbc)
240 {
241 	struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
242 	int ret = mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
243 
244 	/*
245 	 * Even if we didn't write any pages here, we might still be holding
246 	 * dirty pages in the ail. We forcibly flush the ail because we don't
247 	 * want balance_dirty_pages() to loop indefinitely trying to write out
248 	 * pages held in the ail that it can't find.
249 	 */
250 	if (ret == 0)
251 		set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
252 
253 	return ret;
254 }
255 
256 /**
257  * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
258  * @mapping: The mapping
259  * @wbc: The writeback control
260  * @pvec: The vector of pages
261  * @nr_pages: The number of pages to write
262  * @done_index: Page index
263  *
264  * Returns: non-zero if loop should terminate, zero otherwise
265  */
266 
267 static int gfs2_write_jdata_pagevec(struct address_space *mapping,
268 				    struct writeback_control *wbc,
269 				    struct pagevec *pvec,
270 				    int nr_pages,
271 				    pgoff_t *done_index)
272 {
273 	struct inode *inode = mapping->host;
274 	struct gfs2_sbd *sdp = GFS2_SB(inode);
275 	unsigned nrblocks = nr_pages * (PAGE_SIZE/inode->i_sb->s_blocksize);
276 	int i;
277 	int ret;
278 
279 	ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
280 	if (ret < 0)
281 		return ret;
282 
283 	for(i = 0; i < nr_pages; i++) {
284 		struct page *page = pvec->pages[i];
285 
286 		*done_index = page->index;
287 
288 		lock_page(page);
289 
290 		if (unlikely(page->mapping != mapping)) {
291 continue_unlock:
292 			unlock_page(page);
293 			continue;
294 		}
295 
296 		if (!PageDirty(page)) {
297 			/* someone wrote it for us */
298 			goto continue_unlock;
299 		}
300 
301 		if (PageWriteback(page)) {
302 			if (wbc->sync_mode != WB_SYNC_NONE)
303 				wait_on_page_writeback(page);
304 			else
305 				goto continue_unlock;
306 		}
307 
308 		BUG_ON(PageWriteback(page));
309 		if (!clear_page_dirty_for_io(page))
310 			goto continue_unlock;
311 
312 		trace_wbc_writepage(wbc, inode_to_bdi(inode));
313 
314 		ret = __gfs2_jdata_writepage(page, wbc);
315 		if (unlikely(ret)) {
316 			if (ret == AOP_WRITEPAGE_ACTIVATE) {
317 				unlock_page(page);
318 				ret = 0;
319 			} else {
320 
321 				/*
322 				 * done_index is set past this page,
323 				 * so media errors will not choke
324 				 * background writeout for the entire
325 				 * file. This has consequences for
326 				 * range_cyclic semantics (ie. it may
327 				 * not be suitable for data integrity
328 				 * writeout).
329 				 */
330 				*done_index = page->index + 1;
331 				ret = 1;
332 				break;
333 			}
334 		}
335 
336 		/*
337 		 * We stop writing back only if we are not doing
338 		 * integrity sync. In case of integrity sync we have to
339 		 * keep going until we have written all the pages
340 		 * we tagged for writeback prior to entering this loop.
341 		 */
342 		if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) {
343 			ret = 1;
344 			break;
345 		}
346 
347 	}
348 	gfs2_trans_end(sdp);
349 	return ret;
350 }
351 
352 /**
353  * gfs2_write_cache_jdata - Like write_cache_pages but different
354  * @mapping: The mapping to write
355  * @wbc: The writeback control
356  *
357  * The reason that we use our own function here is that we need to
358  * start transactions before we grab page locks. This allows us
359  * to get the ordering right.
360  */
361 
362 static int gfs2_write_cache_jdata(struct address_space *mapping,
363 				  struct writeback_control *wbc)
364 {
365 	int ret = 0;
366 	int done = 0;
367 	struct pagevec pvec;
368 	int nr_pages;
369 	pgoff_t uninitialized_var(writeback_index);
370 	pgoff_t index;
371 	pgoff_t end;
372 	pgoff_t done_index;
373 	int cycled;
374 	int range_whole = 0;
375 	int tag;
376 
377 	pagevec_init(&pvec);
378 	if (wbc->range_cyclic) {
379 		writeback_index = mapping->writeback_index; /* prev offset */
380 		index = writeback_index;
381 		if (index == 0)
382 			cycled = 1;
383 		else
384 			cycled = 0;
385 		end = -1;
386 	} else {
387 		index = wbc->range_start >> PAGE_SHIFT;
388 		end = wbc->range_end >> PAGE_SHIFT;
389 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
390 			range_whole = 1;
391 		cycled = 1; /* ignore range_cyclic tests */
392 	}
393 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
394 		tag = PAGECACHE_TAG_TOWRITE;
395 	else
396 		tag = PAGECACHE_TAG_DIRTY;
397 
398 retry:
399 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
400 		tag_pages_for_writeback(mapping, index, end);
401 	done_index = index;
402 	while (!done && (index <= end)) {
403 		nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
404 				tag);
405 		if (nr_pages == 0)
406 			break;
407 
408 		ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, &done_index);
409 		if (ret)
410 			done = 1;
411 		if (ret > 0)
412 			ret = 0;
413 		pagevec_release(&pvec);
414 		cond_resched();
415 	}
416 
417 	if (!cycled && !done) {
418 		/*
419 		 * range_cyclic:
420 		 * We hit the last page and there is more work to be done: wrap
421 		 * back to the start of the file
422 		 */
423 		cycled = 1;
424 		index = 0;
425 		end = writeback_index - 1;
426 		goto retry;
427 	}
428 
429 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
430 		mapping->writeback_index = done_index;
431 
432 	return ret;
433 }
434 
435 
436 /**
437  * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
438  * @mapping: The mapping to write
439  * @wbc: The writeback control
440  *
441  */
442 
443 static int gfs2_jdata_writepages(struct address_space *mapping,
444 				 struct writeback_control *wbc)
445 {
446 	struct gfs2_inode *ip = GFS2_I(mapping->host);
447 	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
448 	int ret;
449 
450 	ret = gfs2_write_cache_jdata(mapping, wbc);
451 	if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
452 		gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
453 			       GFS2_LFC_JDATA_WPAGES);
454 		ret = gfs2_write_cache_jdata(mapping, wbc);
455 	}
456 	return ret;
457 }
458 
459 /**
460  * stuffed_readpage - Fill in a Linux page with stuffed file data
461  * @ip: the inode
462  * @page: the page
463  *
464  * Returns: errno
465  */
466 
467 int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
468 {
469 	struct buffer_head *dibh;
470 	u64 dsize = i_size_read(&ip->i_inode);
471 	void *kaddr;
472 	int error;
473 
474 	/*
475 	 * Due to the order of unstuffing files and ->fault(), we can be
476 	 * asked for a zero page in the case of a stuffed file being extended,
477 	 * so we need to supply one here. It doesn't happen often.
478 	 */
479 	if (unlikely(page->index)) {
480 		zero_user(page, 0, PAGE_SIZE);
481 		SetPageUptodate(page);
482 		return 0;
483 	}
484 
485 	error = gfs2_meta_inode_buffer(ip, &dibh);
486 	if (error)
487 		return error;
488 
489 	kaddr = kmap_atomic(page);
490 	if (dsize > gfs2_max_stuffed_size(ip))
491 		dsize = gfs2_max_stuffed_size(ip);
492 	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
493 	memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
494 	kunmap_atomic(kaddr);
495 	flush_dcache_page(page);
496 	brelse(dibh);
497 	SetPageUptodate(page);
498 
499 	return 0;
500 }
501 
502 
503 /**
504  * __gfs2_readpage - readpage
505  * @file: The file to read a page for
506  * @page: The page to read
507  *
508  * This is the core of gfs2's readpage. It's used by the internal file
509  * reading code as in that case we already hold the glock. Also it's
510  * called by gfs2_readpage() once the required lock has been granted.
511  */
512 
513 static int __gfs2_readpage(void *file, struct page *page)
514 {
515 	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
516 	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
517 	int error;
518 
519 	if (gfs2_is_stuffed(ip)) {
520 		error = stuffed_readpage(ip, page);
521 		unlock_page(page);
522 	} else {
523 		error = mpage_readpage(page, gfs2_block_map);
524 	}
525 
526 	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
527 		return -EIO;
528 
529 	return error;
530 }
531 
532 /**
533  * gfs2_readpage - read a page of a file
534  * @file: The file to read
535  * @page: The page of the file
536  *
537  * This deals with the locking required. We have to unlock and
538  * relock the page in order to get the locking in the right
539  * order.
540  */
541 
542 static int gfs2_readpage(struct file *file, struct page *page)
543 {
544 	struct address_space *mapping = page->mapping;
545 	struct gfs2_inode *ip = GFS2_I(mapping->host);
546 	struct gfs2_holder gh;
547 	int error;
548 
549 	unlock_page(page);
550 	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
551 	error = gfs2_glock_nq(&gh);
552 	if (unlikely(error))
553 		goto out;
554 	error = AOP_TRUNCATED_PAGE;
555 	lock_page(page);
556 	if (page->mapping == mapping && !PageUptodate(page))
557 		error = __gfs2_readpage(file, page);
558 	else
559 		unlock_page(page);
560 	gfs2_glock_dq(&gh);
561 out:
562 	gfs2_holder_uninit(&gh);
563 	if (error && error != AOP_TRUNCATED_PAGE)
564 		lock_page(page);
565 	return error;
566 }
567 
568 /**
569  * gfs2_internal_read - read an internal file
570  * @ip: The gfs2 inode
571  * @buf: The buffer to fill
572  * @pos: The file position
573  * @size: The amount to read
574  *
575  */
576 
577 int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
578                        unsigned size)
579 {
580 	struct address_space *mapping = ip->i_inode.i_mapping;
581 	unsigned long index = *pos / PAGE_SIZE;
582 	unsigned offset = *pos & (PAGE_SIZE - 1);
583 	unsigned copied = 0;
584 	unsigned amt;
585 	struct page *page;
586 	void *p;
587 
588 	do {
589 		amt = size - copied;
590 		if (offset + size > PAGE_SIZE)
591 			amt = PAGE_SIZE - offset;
592 		page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
593 		if (IS_ERR(page))
594 			return PTR_ERR(page);
595 		p = kmap_atomic(page);
596 		memcpy(buf + copied, p + offset, amt);
597 		kunmap_atomic(p);
598 		put_page(page);
599 		copied += amt;
600 		index++;
601 		offset = 0;
602 	} while(copied < size);
603 	(*pos) += size;
604 	return size;
605 }
606 
607 /**
608  * gfs2_readpages - Read a bunch of pages at once
609  * @file: The file to read from
610  * @mapping: Address space info
611  * @pages: List of pages to read
612  * @nr_pages: Number of pages to read
613  *
614  * Some notes:
615  * 1. This is only for readahead, so we can simply ignore any things
616  *    which are slightly inconvenient (such as locking conflicts between
617  *    the page lock and the glock) and return having done no I/O. Its
618  *    obviously not something we'd want to do on too regular a basis.
619  *    Any I/O we ignore at this time will be done via readpage later.
620  * 2. We don't handle stuffed files here we let readpage do the honours.
621  * 3. mpage_readpages() does most of the heavy lifting in the common case.
622  * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
623  */
624 
625 static int gfs2_readpages(struct file *file, struct address_space *mapping,
626 			  struct list_head *pages, unsigned nr_pages)
627 {
628 	struct inode *inode = mapping->host;
629 	struct gfs2_inode *ip = GFS2_I(inode);
630 	struct gfs2_sbd *sdp = GFS2_SB(inode);
631 	struct gfs2_holder gh;
632 	int ret;
633 
634 	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
635 	ret = gfs2_glock_nq(&gh);
636 	if (unlikely(ret))
637 		goto out_uninit;
638 	if (!gfs2_is_stuffed(ip))
639 		ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map);
640 	gfs2_glock_dq(&gh);
641 out_uninit:
642 	gfs2_holder_uninit(&gh);
643 	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
644 		ret = -EIO;
645 	return ret;
646 }
647 
648 /**
649  * gfs2_write_begin - Begin to write to a file
650  * @file: The file to write to
651  * @mapping: The mapping in which to write
652  * @pos: The file offset at which to start writing
653  * @len: Length of the write
654  * @flags: Various flags
655  * @pagep: Pointer to return the page
656  * @fsdata: Pointer to return fs data (unused by GFS2)
657  *
658  * Returns: errno
659  */
660 
661 static int gfs2_write_begin(struct file *file, struct address_space *mapping,
662 			    loff_t pos, unsigned len, unsigned flags,
663 			    struct page **pagep, void **fsdata)
664 {
665 	struct gfs2_inode *ip = GFS2_I(mapping->host);
666 	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
667 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
668 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
669 	unsigned requested = 0;
670 	int alloc_required;
671 	int error = 0;
672 	pgoff_t index = pos >> PAGE_SHIFT;
673 	unsigned from = pos & (PAGE_SIZE - 1);
674 	struct page *page;
675 
676 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
677 	error = gfs2_glock_nq(&ip->i_gh);
678 	if (unlikely(error))
679 		goto out_uninit;
680 	if (&ip->i_inode == sdp->sd_rindex) {
681 		error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
682 					   GL_NOCACHE, &m_ip->i_gh);
683 		if (unlikely(error)) {
684 			gfs2_glock_dq(&ip->i_gh);
685 			goto out_uninit;
686 		}
687 	}
688 
689 	alloc_required = gfs2_write_alloc_required(ip, pos, len);
690 
691 	if (alloc_required || gfs2_is_jdata(ip))
692 		gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
693 
694 	if (alloc_required) {
695 		struct gfs2_alloc_parms ap = { .aflags = 0, };
696 		requested = data_blocks + ind_blocks;
697 		ap.target = requested;
698 		error = gfs2_quota_lock_check(ip, &ap);
699 		if (error)
700 			goto out_unlock;
701 
702 		error = gfs2_inplace_reserve(ip, &ap);
703 		if (error)
704 			goto out_qunlock;
705 	}
706 
707 	rblocks = RES_DINODE + ind_blocks;
708 	if (gfs2_is_jdata(ip))
709 		rblocks += data_blocks ? data_blocks : 1;
710 	if (ind_blocks || data_blocks)
711 		rblocks += RES_STATFS + RES_QUOTA;
712 	if (&ip->i_inode == sdp->sd_rindex)
713 		rblocks += 2 * RES_STATFS;
714 	if (alloc_required)
715 		rblocks += gfs2_rg_blocks(ip, requested);
716 
717 	error = gfs2_trans_begin(sdp, rblocks,
718 				 PAGE_SIZE/sdp->sd_sb.sb_bsize);
719 	if (error)
720 		goto out_trans_fail;
721 
722 	error = -ENOMEM;
723 	flags |= AOP_FLAG_NOFS;
724 	page = grab_cache_page_write_begin(mapping, index, flags);
725 	*pagep = page;
726 	if (unlikely(!page))
727 		goto out_endtrans;
728 
729 	if (gfs2_is_stuffed(ip)) {
730 		error = 0;
731 		if (pos + len > gfs2_max_stuffed_size(ip)) {
732 			error = gfs2_unstuff_dinode(ip, page);
733 			if (error == 0)
734 				goto prepare_write;
735 		} else if (!PageUptodate(page)) {
736 			error = stuffed_readpage(ip, page);
737 		}
738 		goto out;
739 	}
740 
741 prepare_write:
742 	error = __block_write_begin(page, from, len, gfs2_block_map);
743 out:
744 	if (error == 0)
745 		return 0;
746 
747 	unlock_page(page);
748 	put_page(page);
749 
750 	gfs2_trans_end(sdp);
751 	if (alloc_required) {
752 		gfs2_inplace_release(ip);
753 		if (pos + len > ip->i_inode.i_size)
754 			gfs2_trim_blocks(&ip->i_inode);
755 	}
756 	goto out_qunlock;
757 
758 out_endtrans:
759 	gfs2_trans_end(sdp);
760 out_trans_fail:
761 	if (alloc_required)
762 		gfs2_inplace_release(ip);
763 out_qunlock:
764 	if (alloc_required)
765 		gfs2_quota_unlock(ip);
766 out_unlock:
767 	if (&ip->i_inode == sdp->sd_rindex) {
768 		gfs2_glock_dq(&m_ip->i_gh);
769 		gfs2_holder_uninit(&m_ip->i_gh);
770 	}
771 	gfs2_glock_dq(&ip->i_gh);
772 out_uninit:
773 	gfs2_holder_uninit(&ip->i_gh);
774 	return error;
775 }
776 
777 /**
778  * adjust_fs_space - Adjusts the free space available due to gfs2_grow
779  * @inode: the rindex inode
780  */
781 void adjust_fs_space(struct inode *inode)
782 {
783 	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
784 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
785 	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
786 	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
787 	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
788 	struct buffer_head *m_bh, *l_bh;
789 	u64 fs_total, new_free;
790 
791 	/* Total up the file system space, according to the latest rindex. */
792 	fs_total = gfs2_ri_total(sdp);
793 	if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0)
794 		return;
795 
796 	spin_lock(&sdp->sd_statfs_spin);
797 	gfs2_statfs_change_in(m_sc, m_bh->b_data +
798 			      sizeof(struct gfs2_dinode));
799 	if (fs_total > (m_sc->sc_total + l_sc->sc_total))
800 		new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
801 	else
802 		new_free = 0;
803 	spin_unlock(&sdp->sd_statfs_spin);
804 	fs_warn(sdp, "File system extended by %llu blocks.\n",
805 		(unsigned long long)new_free);
806 	gfs2_statfs_change(sdp, new_free, new_free, 0);
807 
808 	if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0)
809 		goto out;
810 	update_statfs(sdp, m_bh, l_bh);
811 	brelse(l_bh);
812 out:
813 	brelse(m_bh);
814 }
815 
816 /**
817  * gfs2_stuffed_write_end - Write end for stuffed files
818  * @inode: The inode
819  * @dibh: The buffer_head containing the on-disk inode
820  * @pos: The file position
821  * @copied: How much was actually copied by the VFS
822  * @page: The page
823  *
824  * This copies the data from the page into the inode block after
825  * the inode data structure itself.
826  *
827  * Returns: copied bytes or errno
828  */
829 int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
830 			   loff_t pos, unsigned copied,
831 			   struct page *page)
832 {
833 	struct gfs2_inode *ip = GFS2_I(inode);
834 	u64 to = pos + copied;
835 	void *kaddr;
836 	unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
837 
838 	BUG_ON(pos + copied > gfs2_max_stuffed_size(ip));
839 
840 	kaddr = kmap_atomic(page);
841 	memcpy(buf + pos, kaddr + pos, copied);
842 	flush_dcache_page(page);
843 	kunmap_atomic(kaddr);
844 
845 	WARN_ON(!PageUptodate(page));
846 	unlock_page(page);
847 	put_page(page);
848 
849 	if (copied) {
850 		if (inode->i_size < to)
851 			i_size_write(inode, to);
852 		mark_inode_dirty(inode);
853 	}
854 	return copied;
855 }
856 
857 /**
858  * gfs2_write_end
859  * @file: The file to write to
860  * @mapping: The address space to write to
861  * @pos: The file position
862  * @len: The length of the data
863  * @copied: How much was actually copied by the VFS
864  * @page: The page that has been written
865  * @fsdata: The fsdata (unused in GFS2)
866  *
867  * The main write_end function for GFS2. We just put our locking around the VFS
868  * provided functions.
869  *
870  * Returns: copied bytes or errno
871  */
872 
873 static int gfs2_write_end(struct file *file, struct address_space *mapping,
874 			  loff_t pos, unsigned len, unsigned copied,
875 			  struct page *page, void *fsdata)
876 {
877 	struct inode *inode = page->mapping->host;
878 	struct gfs2_inode *ip = GFS2_I(inode);
879 	struct gfs2_sbd *sdp = GFS2_SB(inode);
880 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
881 	struct buffer_head *dibh;
882 	int ret;
883 	struct gfs2_trans *tr = current->journal_info;
884 	BUG_ON(!tr);
885 
886 	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
887 
888 	ret = gfs2_meta_inode_buffer(ip, &dibh);
889 	if (unlikely(ret))
890 		goto out;
891 
892 	if (gfs2_is_stuffed(ip)) {
893 		ret = gfs2_stuffed_write_end(inode, dibh, pos, copied, page);
894 		page = NULL;
895 		goto out2;
896 	}
897 
898 	if (gfs2_is_jdata(ip))
899 		gfs2_page_add_databufs(ip, page, pos & ~PAGE_MASK, len);
900 	else
901 		gfs2_ordered_add_inode(ip);
902 
903 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
904 	page = NULL;
905 	if (tr->tr_num_buf_new)
906 		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
907 	else
908 		gfs2_trans_add_meta(ip->i_gl, dibh);
909 
910 out2:
911 	if (inode == sdp->sd_rindex) {
912 		adjust_fs_space(inode);
913 		sdp->sd_rindex_uptodate = 0;
914 	}
915 
916 	brelse(dibh);
917 out:
918 	if (page) {
919 		unlock_page(page);
920 		put_page(page);
921 	}
922 	gfs2_trans_end(sdp);
923 	gfs2_inplace_release(ip);
924 	if (ip->i_qadata && ip->i_qadata->qa_qd_num)
925 		gfs2_quota_unlock(ip);
926 	if (inode == sdp->sd_rindex) {
927 		gfs2_glock_dq(&m_ip->i_gh);
928 		gfs2_holder_uninit(&m_ip->i_gh);
929 	}
930 	gfs2_glock_dq(&ip->i_gh);
931 	gfs2_holder_uninit(&ip->i_gh);
932 	return ret;
933 }
934 
935 /**
936  * jdata_set_page_dirty - Page dirtying function
937  * @page: The page to dirty
938  *
939  * Returns: 1 if it dirtyed the page, or 0 otherwise
940  */
941 
942 static int jdata_set_page_dirty(struct page *page)
943 {
944 	SetPageChecked(page);
945 	return __set_page_dirty_buffers(page);
946 }
947 
948 /**
949  * gfs2_bmap - Block map function
950  * @mapping: Address space info
951  * @lblock: The block to map
952  *
953  * Returns: The disk address for the block or 0 on hole or error
954  */
955 
956 static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
957 {
958 	struct gfs2_inode *ip = GFS2_I(mapping->host);
959 	struct gfs2_holder i_gh;
960 	sector_t dblock = 0;
961 	int error;
962 
963 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
964 	if (error)
965 		return 0;
966 
967 	if (!gfs2_is_stuffed(ip))
968 		dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
969 
970 	gfs2_glock_dq_uninit(&i_gh);
971 
972 	return dblock;
973 }
974 
975 static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
976 {
977 	struct gfs2_bufdata *bd;
978 
979 	lock_buffer(bh);
980 	gfs2_log_lock(sdp);
981 	clear_buffer_dirty(bh);
982 	bd = bh->b_private;
983 	if (bd) {
984 		if (!list_empty(&bd->bd_list) && !buffer_pinned(bh))
985 			list_del_init(&bd->bd_list);
986 		else
987 			gfs2_remove_from_journal(bh, REMOVE_JDATA);
988 	}
989 	bh->b_bdev = NULL;
990 	clear_buffer_mapped(bh);
991 	clear_buffer_req(bh);
992 	clear_buffer_new(bh);
993 	gfs2_log_unlock(sdp);
994 	unlock_buffer(bh);
995 }
996 
997 static void gfs2_invalidatepage(struct page *page, unsigned int offset,
998 				unsigned int length)
999 {
1000 	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
1001 	unsigned int stop = offset + length;
1002 	int partial_page = (offset || length < PAGE_SIZE);
1003 	struct buffer_head *bh, *head;
1004 	unsigned long pos = 0;
1005 
1006 	BUG_ON(!PageLocked(page));
1007 	if (!partial_page)
1008 		ClearPageChecked(page);
1009 	if (!page_has_buffers(page))
1010 		goto out;
1011 
1012 	bh = head = page_buffers(page);
1013 	do {
1014 		if (pos + bh->b_size > stop)
1015 			return;
1016 
1017 		if (offset <= pos)
1018 			gfs2_discard(sdp, bh);
1019 		pos += bh->b_size;
1020 		bh = bh->b_this_page;
1021 	} while (bh != head);
1022 out:
1023 	if (!partial_page)
1024 		try_to_release_page(page, 0);
1025 }
1026 
1027 /**
1028  * gfs2_ok_for_dio - check that dio is valid on this file
1029  * @ip: The inode
1030  * @offset: The offset at which we are reading or writing
1031  *
1032  * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
1033  *          1 (to accept the i/o request)
1034  */
1035 static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset)
1036 {
1037 	/*
1038 	 * Should we return an error here? I can't see that O_DIRECT for
1039 	 * a stuffed file makes any sense. For now we'll silently fall
1040 	 * back to buffered I/O
1041 	 */
1042 	if (gfs2_is_stuffed(ip))
1043 		return 0;
1044 
1045 	if (offset >= i_size_read(&ip->i_inode))
1046 		return 0;
1047 	return 1;
1048 }
1049 
1050 
1051 
1052 static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
1053 {
1054 	struct file *file = iocb->ki_filp;
1055 	struct inode *inode = file->f_mapping->host;
1056 	struct address_space *mapping = inode->i_mapping;
1057 	struct gfs2_inode *ip = GFS2_I(inode);
1058 	loff_t offset = iocb->ki_pos;
1059 	struct gfs2_holder gh;
1060 	int rv;
1061 
1062 	/*
1063 	 * Deferred lock, even if its a write, since we do no allocation
1064 	 * on this path. All we need change is atime, and this lock mode
1065 	 * ensures that other nodes have flushed their buffered read caches
1066 	 * (i.e. their page cache entries for this inode). We do not,
1067 	 * unfortunately have the option of only flushing a range like
1068 	 * the VFS does.
1069 	 */
1070 	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
1071 	rv = gfs2_glock_nq(&gh);
1072 	if (rv)
1073 		goto out_uninit;
1074 	rv = gfs2_ok_for_dio(ip, offset);
1075 	if (rv != 1)
1076 		goto out; /* dio not valid, fall back to buffered i/o */
1077 
1078 	/*
1079 	 * Now since we are holding a deferred (CW) lock at this point, you
1080 	 * might be wondering why this is ever needed. There is a case however
1081 	 * where we've granted a deferred local lock against a cached exclusive
1082 	 * glock. That is ok provided all granted local locks are deferred, but
1083 	 * it also means that it is possible to encounter pages which are
1084 	 * cached and possibly also mapped. So here we check for that and sort
1085 	 * them out ahead of the dio. The glock state machine will take care of
1086 	 * everything else.
1087 	 *
1088 	 * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
1089 	 * the first place, mapping->nr_pages will always be zero.
1090 	 */
1091 	if (mapping->nrpages) {
1092 		loff_t lstart = offset & ~(PAGE_SIZE - 1);
1093 		loff_t len = iov_iter_count(iter);
1094 		loff_t end = PAGE_ALIGN(offset + len) - 1;
1095 
1096 		rv = 0;
1097 		if (len == 0)
1098 			goto out;
1099 		if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
1100 			unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
1101 		rv = filemap_write_and_wait_range(mapping, lstart, end);
1102 		if (rv)
1103 			goto out;
1104 		if (iov_iter_rw(iter) == WRITE)
1105 			truncate_inode_pages_range(mapping, lstart, end);
1106 	}
1107 
1108 	rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
1109 				  gfs2_get_block_direct, NULL, NULL, 0);
1110 out:
1111 	gfs2_glock_dq(&gh);
1112 out_uninit:
1113 	gfs2_holder_uninit(&gh);
1114 	return rv;
1115 }
1116 
1117 /**
1118  * gfs2_releasepage - free the metadata associated with a page
1119  * @page: the page that's being released
1120  * @gfp_mask: passed from Linux VFS, ignored by us
1121  *
1122  * Call try_to_free_buffers() if the buffers in this page can be
1123  * released.
1124  *
1125  * Returns: 0
1126  */
1127 
1128 int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1129 {
1130 	struct address_space *mapping = page->mapping;
1131 	struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
1132 	struct buffer_head *bh, *head;
1133 	struct gfs2_bufdata *bd;
1134 
1135 	if (!page_has_buffers(page))
1136 		return 0;
1137 
1138 	/*
1139 	 * From xfs_vm_releasepage: mm accommodates an old ext3 case where
1140 	 * clean pages might not have had the dirty bit cleared.  Thus, it can
1141 	 * send actual dirty pages to ->releasepage() via shrink_active_list().
1142 	 *
1143 	 * As a workaround, we skip pages that contain dirty buffers below.
1144 	 * Once ->releasepage isn't called on dirty pages anymore, we can warn
1145 	 * on dirty buffers like we used to here again.
1146 	 */
1147 
1148 	gfs2_log_lock(sdp);
1149 	spin_lock(&sdp->sd_ail_lock);
1150 	head = bh = page_buffers(page);
1151 	do {
1152 		if (atomic_read(&bh->b_count))
1153 			goto cannot_release;
1154 		bd = bh->b_private;
1155 		if (bd && bd->bd_tr)
1156 			goto cannot_release;
1157 		if (buffer_dirty(bh) || WARN_ON(buffer_pinned(bh)))
1158 			goto cannot_release;
1159 		bh = bh->b_this_page;
1160 	} while(bh != head);
1161 	spin_unlock(&sdp->sd_ail_lock);
1162 
1163 	head = bh = page_buffers(page);
1164 	do {
1165 		bd = bh->b_private;
1166 		if (bd) {
1167 			gfs2_assert_warn(sdp, bd->bd_bh == bh);
1168 			if (!list_empty(&bd->bd_list))
1169 				list_del_init(&bd->bd_list);
1170 			bd->bd_bh = NULL;
1171 			bh->b_private = NULL;
1172 			kmem_cache_free(gfs2_bufdata_cachep, bd);
1173 		}
1174 
1175 		bh = bh->b_this_page;
1176 	} while (bh != head);
1177 	gfs2_log_unlock(sdp);
1178 
1179 	return try_to_free_buffers(page);
1180 
1181 cannot_release:
1182 	spin_unlock(&sdp->sd_ail_lock);
1183 	gfs2_log_unlock(sdp);
1184 	return 0;
1185 }
1186 
1187 static const struct address_space_operations gfs2_writeback_aops = {
1188 	.writepage = gfs2_writepage,
1189 	.writepages = gfs2_writepages,
1190 	.readpage = gfs2_readpage,
1191 	.readpages = gfs2_readpages,
1192 	.write_begin = gfs2_write_begin,
1193 	.write_end = gfs2_write_end,
1194 	.bmap = gfs2_bmap,
1195 	.invalidatepage = gfs2_invalidatepage,
1196 	.releasepage = gfs2_releasepage,
1197 	.direct_IO = gfs2_direct_IO,
1198 	.migratepage = buffer_migrate_page,
1199 	.is_partially_uptodate = block_is_partially_uptodate,
1200 	.error_remove_page = generic_error_remove_page,
1201 };
1202 
1203 static const struct address_space_operations gfs2_ordered_aops = {
1204 	.writepage = gfs2_writepage,
1205 	.writepages = gfs2_writepages,
1206 	.readpage = gfs2_readpage,
1207 	.readpages = gfs2_readpages,
1208 	.write_begin = gfs2_write_begin,
1209 	.write_end = gfs2_write_end,
1210 	.set_page_dirty = __set_page_dirty_buffers,
1211 	.bmap = gfs2_bmap,
1212 	.invalidatepage = gfs2_invalidatepage,
1213 	.releasepage = gfs2_releasepage,
1214 	.direct_IO = gfs2_direct_IO,
1215 	.migratepage = buffer_migrate_page,
1216 	.is_partially_uptodate = block_is_partially_uptodate,
1217 	.error_remove_page = generic_error_remove_page,
1218 };
1219 
1220 static const struct address_space_operations gfs2_jdata_aops = {
1221 	.writepage = gfs2_jdata_writepage,
1222 	.writepages = gfs2_jdata_writepages,
1223 	.readpage = gfs2_readpage,
1224 	.readpages = gfs2_readpages,
1225 	.write_begin = gfs2_write_begin,
1226 	.write_end = gfs2_write_end,
1227 	.set_page_dirty = jdata_set_page_dirty,
1228 	.bmap = gfs2_bmap,
1229 	.invalidatepage = gfs2_invalidatepage,
1230 	.releasepage = gfs2_releasepage,
1231 	.is_partially_uptodate = block_is_partially_uptodate,
1232 	.error_remove_page = generic_error_remove_page,
1233 };
1234 
1235 void gfs2_set_aops(struct inode *inode)
1236 {
1237 	struct gfs2_inode *ip = GFS2_I(inode);
1238 
1239 	if (gfs2_is_writeback(ip))
1240 		inode->i_mapping->a_ops = &gfs2_writeback_aops;
1241 	else if (gfs2_is_ordered(ip))
1242 		inode->i_mapping->a_ops = &gfs2_ordered_aops;
1243 	else if (gfs2_is_jdata(ip))
1244 		inode->i_mapping->a_ops = &gfs2_jdata_aops;
1245 	else
1246 		BUG();
1247 }
1248 
1249