xref: /openbmc/linux/fs/btrfs/file.c (revision 6da6abae027e2dbc59bca5f4168b0760f25068c7)
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/fs.h>
20 #include <linux/pagemap.h>
21 #include <linux/highmem.h>
22 #include <linux/time.h>
23 #include <linux/init.h>
24 #include <linux/string.h>
25 #include <linux/smp_lock.h>
26 #include <linux/backing-dev.h>
27 #include <linux/mpage.h>
28 #include <linux/swap.h>
29 #include <linux/writeback.h>
30 #include <linux/statfs.h>
31 #include <linux/compat.h>
32 #include <linux/version.h>
33 #include "ctree.h"
34 #include "disk-io.h"
35 #include "transaction.h"
36 #include "btrfs_inode.h"
37 #include "ioctl.h"
38 #include "print-tree.h"
39 
40 
41 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
42 				struct page **prepared_pages,
43 				const char __user * buf)
44 {
45 	long page_fault = 0;
46 	int i;
47 	int offset = pos & (PAGE_CACHE_SIZE - 1);
48 
49 	for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
50 		size_t count = min_t(size_t,
51 				     PAGE_CACHE_SIZE - offset, write_bytes);
52 		struct page *page = prepared_pages[i];
53 		fault_in_pages_readable(buf, count);
54 
55 		/* Copy data from userspace to the current page */
56 		kmap(page);
57 		page_fault = __copy_from_user(page_address(page) + offset,
58 					      buf, count);
59 		/* Flush processor's dcache for this page */
60 		flush_dcache_page(page);
61 		kunmap(page);
62 		buf += count;
63 		write_bytes -= count;
64 
65 		if (page_fault)
66 			break;
67 	}
68 	return page_fault ? -EFAULT : 0;
69 }
70 
71 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
72 {
73 	size_t i;
74 	for (i = 0; i < num_pages; i++) {
75 		if (!pages[i])
76 			break;
77 		unlock_page(pages[i]);
78 		mark_page_accessed(pages[i]);
79 		page_cache_release(pages[i]);
80 	}
81 }
82 
83 static int insert_inline_extent(struct btrfs_trans_handle *trans,
84 				struct btrfs_root *root, struct inode *inode,
85 				u64 offset, size_t size,
86 				struct page **pages, size_t page_offset,
87 				int num_pages)
88 {
89 	struct btrfs_key key;
90 	struct btrfs_path *path;
91 	struct extent_buffer *leaf;
92 	char *kaddr;
93 	unsigned long ptr;
94 	struct btrfs_file_extent_item *ei;
95 	struct page *page;
96 	u32 datasize;
97 	int err = 0;
98 	int ret;
99 	int i;
100 	ssize_t cur_size;
101 
102 	path = btrfs_alloc_path();
103 	if (!path)
104 		return -ENOMEM;
105 
106 	btrfs_set_trans_block_group(trans, inode);
107 
108 	key.objectid = inode->i_ino;
109 	key.offset = offset;
110 	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
111 
112 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
113 	if (ret < 0) {
114 		err = ret;
115 		goto fail;
116 	}
117 	if (ret == 1) {
118 		struct btrfs_key found_key;
119 
120 		if (path->slots[0] == 0)
121 			goto insert;
122 
123 		path->slots[0]--;
124 		leaf = path->nodes[0];
125 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
126 
127 		if (found_key.objectid != inode->i_ino)
128 			goto insert;
129 
130 		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
131 			goto insert;
132 		ei = btrfs_item_ptr(leaf, path->slots[0],
133 				    struct btrfs_file_extent_item);
134 
135 		if (btrfs_file_extent_type(leaf, ei) !=
136 		    BTRFS_FILE_EXTENT_INLINE) {
137 			goto insert;
138 		}
139 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
140 		ret = 0;
141 	}
142 	if (ret == 0) {
143 		u32 found_size;
144 		u64 found_end;
145 
146 		leaf = path->nodes[0];
147 		ei = btrfs_item_ptr(leaf, path->slots[0],
148 				    struct btrfs_file_extent_item);
149 
150 		if (btrfs_file_extent_type(leaf, ei) !=
151 		    BTRFS_FILE_EXTENT_INLINE) {
152 			err = ret;
153 			btrfs_print_leaf(root, leaf);
154 			printk("found wasn't inline offset %Lu inode %lu\n",
155 			       offset, inode->i_ino);
156 			goto fail;
157 		}
158 		found_size = btrfs_file_extent_inline_len(leaf,
159 					  btrfs_item_nr(leaf, path->slots[0]));
160 		found_end = key.offset + found_size;
161 
162 		if (found_end < offset + size) {
163 			btrfs_release_path(root, path);
164 			ret = btrfs_search_slot(trans, root, &key, path,
165 						offset + size - found_end, 1);
166 			BUG_ON(ret != 0);
167 
168 			ret = btrfs_extend_item(trans, root, path,
169 						offset + size - found_end);
170 			if (ret) {
171 				err = ret;
172 				goto fail;
173 			}
174 			leaf = path->nodes[0];
175 			ei = btrfs_item_ptr(leaf, path->slots[0],
176 					    struct btrfs_file_extent_item);
177 		}
178 		if (found_end < offset) {
179 			ptr = btrfs_file_extent_inline_start(ei) + found_size;
180 			memset_extent_buffer(leaf, 0, ptr, offset - found_end);
181 		}
182 	} else {
183 insert:
184 		btrfs_release_path(root, path);
185 		datasize = offset + size - key.offset;
186 		datasize = btrfs_file_extent_calc_inline_size(datasize);
187 		ret = btrfs_insert_empty_item(trans, root, path, &key,
188 					      datasize);
189 		if (ret) {
190 			err = ret;
191 			printk("got bad ret %d\n", ret);
192 			goto fail;
193 		}
194 		leaf = path->nodes[0];
195 		ei = btrfs_item_ptr(leaf, path->slots[0],
196 				    struct btrfs_file_extent_item);
197 		btrfs_set_file_extent_generation(leaf, ei, trans->transid);
198 		btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
199 	}
200 	ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset;
201 
202 	cur_size = size;
203 	i = 0;
204 	while (size > 0) {
205 		page = pages[i];
206 		kaddr = kmap_atomic(page, KM_USER0);
207 		cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size);
208 		write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size);
209 		kunmap_atomic(kaddr, KM_USER0);
210 		page_offset = 0;
211 		ptr += cur_size;
212 		size -= cur_size;
213 		if (i >= num_pages) {
214 			printk("i %d num_pages %d\n", i, num_pages);
215 		}
216 		i++;
217 	}
218 	btrfs_mark_buffer_dirty(leaf);
219 fail:
220 	btrfs_free_path(path);
221 	return err;
222 }
223 
224 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
225 				   struct btrfs_root *root,
226 				   struct file *file,
227 				   struct page **pages,
228 				   size_t num_pages,
229 				   loff_t pos,
230 				   size_t write_bytes)
231 {
232 	int err = 0;
233 	int i;
234 	struct inode *inode = fdentry(file)->d_inode;
235 	struct extent_map *em;
236 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
237 	u64 hint_byte;
238 	u64 num_bytes;
239 	u64 start_pos;
240 	u64 end_of_last_block;
241 	u64 end_pos = pos + write_bytes;
242 	u64 inline_size;
243 	loff_t isize = i_size_read(inode);
244 	em = alloc_extent_map(GFP_NOFS);
245 	if (!em)
246 		return -ENOMEM;
247 
248 	em->bdev = inode->i_sb->s_bdev;
249 
250 	start_pos = pos & ~((u64)root->sectorsize - 1);
251 	num_bytes = (write_bytes + pos - start_pos +
252 		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
253 
254 	down_read(&BTRFS_I(inode)->root->snap_sem);
255 	end_of_last_block = start_pos + num_bytes - 1;
256 
257 	lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
258 	mutex_lock(&root->fs_info->fs_mutex);
259 	trans = btrfs_start_transaction(root, 1);
260 	if (!trans) {
261 		err = -ENOMEM;
262 		goto out_unlock;
263 	}
264 	btrfs_set_trans_block_group(trans, inode);
265 	inode->i_blocks += num_bytes >> 9;
266 	hint_byte = 0;
267 
268 	if ((end_of_last_block & 4095) == 0) {
269 		printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
270 	}
271 	set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS);
272 
273 	/* FIXME...EIEIO, ENOSPC and more */
274 
275 	/* insert any holes we need to create */
276 	if (inode->i_size < start_pos) {
277 		u64 last_pos_in_file;
278 		u64 hole_size;
279 		u64 mask = root->sectorsize - 1;
280 		last_pos_in_file = (isize + mask) & ~mask;
281 		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
282 
283 		if (last_pos_in_file < start_pos) {
284 			err = btrfs_drop_extents(trans, root, inode,
285 						 last_pos_in_file,
286 						 last_pos_in_file + hole_size,
287 						 last_pos_in_file,
288 						 &hint_byte);
289 			if (err)
290 				goto failed;
291 
292 			err = btrfs_insert_file_extent(trans, root,
293 						       inode->i_ino,
294 						       last_pos_in_file,
295 						       0, 0, hole_size);
296 		}
297 		if (err)
298 			goto failed;
299 	}
300 
301 	/*
302 	 * either allocate an extent for the new bytes or setup the key
303 	 * to show we are doing inline data in the extent
304 	 */
305 	inline_size = end_pos;
306 	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
307 	    inline_size > 32768 ||
308 	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
309 		u64 last_end;
310 
311 		for (i = 0; i < num_pages; i++) {
312 			struct page *p = pages[i];
313 			SetPageUptodate(p);
314 			set_page_dirty(p);
315 		}
316 		last_end = (u64)(pages[num_pages -1]->index) <<
317 				PAGE_CACHE_SHIFT;
318 		last_end += PAGE_CACHE_SIZE - 1;
319 		set_extent_delalloc(em_tree, start_pos, end_of_last_block,
320 				 GFP_NOFS);
321 	} else {
322 		u64 aligned_end;
323 		/* step one, delete the existing extents in this range */
324 		aligned_end = (pos + write_bytes + root->sectorsize - 1) &
325 			~((u64)root->sectorsize - 1);
326 		err = btrfs_drop_extents(trans, root, inode, start_pos,
327 					 aligned_end, aligned_end, &hint_byte);
328 		if (err)
329 			goto failed;
330 		if (isize > inline_size)
331 			inline_size = min_t(u64, isize, aligned_end);
332 		inline_size -= start_pos;
333 		err = insert_inline_extent(trans, root, inode, start_pos,
334 					   inline_size, pages, 0, num_pages);
335 		BUG_ON(err);
336 	}
337 	if (end_pos > isize) {
338 		i_size_write(inode, end_pos);
339 		btrfs_update_inode(trans, root, inode);
340 	}
341 failed:
342 	err = btrfs_end_transaction(trans, root);
343 out_unlock:
344 	mutex_unlock(&root->fs_info->fs_mutex);
345 	unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
346 	free_extent_map(em);
347 	up_read(&BTRFS_I(inode)->root->snap_sem);
348 	return err;
349 }
350 
351 int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
352 {
353 	struct extent_map *em;
354 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
355 
356 	while(1) {
357 		em = lookup_extent_mapping(em_tree, start, end);
358 		if (!em)
359 			break;
360 		remove_extent_mapping(em_tree, em);
361 		/* once for us */
362 		free_extent_map(em);
363 		/* once for the tree*/
364 		free_extent_map(em);
365 	}
366 	return 0;
367 }
368 
369 /*
370  * this is very complex, but the basic idea is to drop all extents
371  * in the range start - end.  hint_block is filled in with a block number
372  * that would be a good hint to the block allocator for this file.
373  *
374  * If an extent intersects the range but is not entirely inside the range
375  * it is either truncated or split.  Anything entirely inside the range
376  * is deleted from the tree.
377  */
378 int btrfs_drop_extents(struct btrfs_trans_handle *trans,
379 		       struct btrfs_root *root, struct inode *inode,
380 		       u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
381 {
382 	u64 extent_end = 0;
383 	u64 search_start = start;
384 	struct extent_buffer *leaf;
385 	struct btrfs_file_extent_item *extent;
386 	struct btrfs_path *path;
387 	struct btrfs_key key;
388 	struct btrfs_file_extent_item old;
389 	int keep;
390 	int slot;
391 	int bookend;
392 	int found_type;
393 	int found_extent;
394 	int found_inline;
395 	int recow;
396 	int ret;
397 
398 	btrfs_drop_extent_cache(inode, start, end - 1);
399 
400 	path = btrfs_alloc_path();
401 	if (!path)
402 		return -ENOMEM;
403 	while(1) {
404 		recow = 0;
405 		btrfs_release_path(root, path);
406 		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
407 					       search_start, -1);
408 		if (ret < 0)
409 			goto out;
410 		if (ret > 0) {
411 			if (path->slots[0] == 0) {
412 				ret = 0;
413 				goto out;
414 			}
415 			path->slots[0]--;
416 		}
417 next_slot:
418 		keep = 0;
419 		bookend = 0;
420 		found_extent = 0;
421 		found_inline = 0;
422 		extent = NULL;
423 		leaf = path->nodes[0];
424 		slot = path->slots[0];
425 		ret = 0;
426 		btrfs_item_key_to_cpu(leaf, &key, slot);
427 		if (key.offset >= end || key.objectid != inode->i_ino) {
428 			goto out;
429 		}
430 		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) {
431 			goto out;
432 		}
433 		if (recow) {
434 			search_start = key.offset;
435 			continue;
436 		}
437 		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
438 			extent = btrfs_item_ptr(leaf, slot,
439 						struct btrfs_file_extent_item);
440 			found_type = btrfs_file_extent_type(leaf, extent);
441 			if (found_type == BTRFS_FILE_EXTENT_REG) {
442 				extent_end =
443 				     btrfs_file_extent_disk_bytenr(leaf,
444 								   extent);
445 				if (extent_end)
446 					*hint_byte = extent_end;
447 
448 				extent_end = key.offset +
449 				     btrfs_file_extent_num_bytes(leaf, extent);
450 				found_extent = 1;
451 			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
452 				struct btrfs_item *item;
453 				item = btrfs_item_nr(leaf, slot);
454 				found_inline = 1;
455 				extent_end = key.offset +
456 				     btrfs_file_extent_inline_len(leaf, item);
457 			}
458 		} else {
459 			extent_end = search_start;
460 		}
461 
462 		/* we found nothing we can drop */
463 		if ((!found_extent && !found_inline) ||
464 		    search_start >= extent_end) {
465 			int nextret;
466 			u32 nritems;
467 			nritems = btrfs_header_nritems(leaf);
468 			if (slot >= nritems - 1) {
469 				nextret = btrfs_next_leaf(root, path);
470 				if (nextret)
471 					goto out;
472 				recow = 1;
473 			} else {
474 				path->slots[0]++;
475 			}
476 			goto next_slot;
477 		}
478 
479 		if (found_inline) {
480 			u64 mask = root->sectorsize - 1;
481 			search_start = (extent_end + mask) & ~mask;
482 		} else
483 			search_start = extent_end;
484 		if (end <= extent_end && start >= key.offset && found_inline) {
485 			*hint_byte = EXTENT_MAP_INLINE;
486 			continue;
487 		}
488 		if (end < extent_end && end >= key.offset) {
489 			if (found_extent) {
490 				u64 disk_bytenr =
491 				    btrfs_file_extent_disk_bytenr(leaf, extent);
492 				u64 disk_num_bytes =
493 				    btrfs_file_extent_disk_num_bytes(leaf,
494 								      extent);
495 				read_extent_buffer(leaf, &old,
496 						   (unsigned long)extent,
497 						   sizeof(old));
498 				if (disk_bytenr != 0) {
499 					ret = btrfs_inc_extent_ref(trans, root,
500 					         disk_bytenr, disk_num_bytes,
501 						 root->root_key.objectid,
502 						 trans->transid,
503 						 key.objectid, end);
504 					BUG_ON(ret);
505 				}
506 			}
507 			bookend = 1;
508 			if (found_inline && start <= key.offset &&
509 			    inline_limit < extent_end)
510 				keep = 1;
511 		}
512 		/* truncate existing extent */
513 		if (start > key.offset) {
514 			u64 new_num;
515 			u64 old_num;
516 			keep = 1;
517 			WARN_ON(start & (root->sectorsize - 1));
518 			if (found_extent) {
519 				new_num = start - key.offset;
520 				old_num = btrfs_file_extent_num_bytes(leaf,
521 								      extent);
522 				*hint_byte =
523 					btrfs_file_extent_disk_bytenr(leaf,
524 								      extent);
525 				if (btrfs_file_extent_disk_bytenr(leaf,
526 								  extent)) {
527 					inode->i_blocks -=
528 						(old_num - new_num) >> 9;
529 				}
530 				btrfs_set_file_extent_num_bytes(leaf, extent,
531 								new_num);
532 				btrfs_mark_buffer_dirty(leaf);
533 			} else if (key.offset < inline_limit &&
534 				   (end > extent_end) &&
535 				   (inline_limit < extent_end)) {
536 				u32 new_size;
537 				new_size = btrfs_file_extent_calc_inline_size(
538 						   inline_limit - key.offset);
539 				btrfs_truncate_item(trans, root, path,
540 						    new_size, 1);
541 			}
542 		}
543 		/* delete the entire extent */
544 		if (!keep) {
545 			u64 disk_bytenr = 0;
546 			u64 disk_num_bytes = 0;
547 			u64 extent_num_bytes = 0;
548 			u64 root_gen;
549 			u64 root_owner;
550 
551 			root_gen = btrfs_header_generation(leaf);
552 			root_owner = btrfs_header_owner(leaf);
553 			if (found_extent) {
554 				disk_bytenr =
555 				      btrfs_file_extent_disk_bytenr(leaf,
556 								     extent);
557 				disk_num_bytes =
558 				      btrfs_file_extent_disk_num_bytes(leaf,
559 								       extent);
560 				extent_num_bytes =
561 				      btrfs_file_extent_num_bytes(leaf, extent);
562 				*hint_byte =
563 					btrfs_file_extent_disk_bytenr(leaf,
564 								      extent);
565 			}
566 			ret = btrfs_del_item(trans, root, path);
567 			/* TODO update progress marker and return */
568 			BUG_ON(ret);
569 			btrfs_release_path(root, path);
570 			extent = NULL;
571 			if (found_extent && disk_bytenr != 0) {
572 				inode->i_blocks -= extent_num_bytes >> 9;
573 				ret = btrfs_free_extent(trans, root,
574 						disk_bytenr,
575 						disk_num_bytes,
576 						root_owner,
577 						root_gen, inode->i_ino,
578 						key.offset, 0);
579 			}
580 
581 			BUG_ON(ret);
582 			if (!bookend && search_start >= end) {
583 				ret = 0;
584 				goto out;
585 			}
586 			if (!bookend)
587 				continue;
588 		}
589 		if (bookend && found_inline && start <= key.offset &&
590 		    inline_limit < extent_end && key.offset <= inline_limit) {
591 			u32 new_size;
592 			new_size = btrfs_file_extent_calc_inline_size(
593 						   extent_end - inline_limit);
594 			btrfs_truncate_item(trans, root, path, new_size, 0);
595 		}
596 		/* create bookend, splitting the extent in two */
597 		if (bookend && found_extent) {
598 			struct btrfs_key ins;
599 			ins.objectid = inode->i_ino;
600 			ins.offset = end;
601 			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
602 			btrfs_release_path(root, path);
603 			ret = btrfs_insert_empty_item(trans, root, path, &ins,
604 						      sizeof(*extent));
605 
606 			leaf = path->nodes[0];
607 			if (ret) {
608 				btrfs_print_leaf(root, leaf);
609 				printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.type, ins.offset, start, end, key.offset, extent_end, keep);
610 			}
611 			BUG_ON(ret);
612 			extent = btrfs_item_ptr(leaf, path->slots[0],
613 						struct btrfs_file_extent_item);
614 			write_extent_buffer(leaf, &old,
615 					    (unsigned long)extent, sizeof(old));
616 
617 			btrfs_set_file_extent_offset(leaf, extent,
618 				    le64_to_cpu(old.offset) + end - key.offset);
619 			WARN_ON(le64_to_cpu(old.num_bytes) <
620 				(extent_end - end));
621 			btrfs_set_file_extent_num_bytes(leaf, extent,
622 							extent_end - end);
623 			btrfs_set_file_extent_type(leaf, extent,
624 						   BTRFS_FILE_EXTENT_REG);
625 
626 			btrfs_mark_buffer_dirty(path->nodes[0]);
627 			if (le64_to_cpu(old.disk_bytenr) != 0) {
628 				inode->i_blocks +=
629 				      btrfs_file_extent_num_bytes(leaf,
630 								  extent) >> 9;
631 			}
632 			ret = 0;
633 			goto out;
634 		}
635 	}
636 out:
637 	btrfs_free_path(path);
638 	return ret;
639 }
640 
641 /*
642  * this gets pages into the page cache and locks them down
643  */
644 static int prepare_pages(struct btrfs_root *root,
645 			 struct file *file,
646 			 struct page **pages,
647 			 size_t num_pages,
648 			 loff_t pos,
649 			 unsigned long first_index,
650 			 unsigned long last_index,
651 			 size_t write_bytes)
652 {
653 	int i;
654 	unsigned long index = pos >> PAGE_CACHE_SHIFT;
655 	struct inode *inode = fdentry(file)->d_inode;
656 	int err = 0;
657 	u64 start_pos;
658 
659 	start_pos = pos & ~((u64)root->sectorsize - 1);
660 
661 	memset(pages, 0, num_pages * sizeof(struct page *));
662 
663 	for (i = 0; i < num_pages; i++) {
664 		pages[i] = grab_cache_page(inode->i_mapping, index + i);
665 		if (!pages[i]) {
666 			err = -ENOMEM;
667 			BUG_ON(1);
668 		}
669 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
670 		ClearPageDirty(pages[i]);
671 #else
672 		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
673 #endif
674 		wait_on_page_writeback(pages[i]);
675 		set_page_extent_mapped(pages[i]);
676 		WARN_ON(!PageLocked(pages[i]));
677 	}
678 	return 0;
679 }
680 
681 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
682 				size_t count, loff_t *ppos)
683 {
684 	loff_t pos;
685 	loff_t start_pos;
686 	ssize_t num_written = 0;
687 	ssize_t err = 0;
688 	int ret = 0;
689 	struct inode *inode = fdentry(file)->d_inode;
690 	struct btrfs_root *root = BTRFS_I(inode)->root;
691 	struct page **pages = NULL;
692 	int nrptrs;
693 	struct page *pinned[2];
694 	unsigned long first_index;
695 	unsigned long last_index;
696 
697 	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
698 		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
699 	pinned[0] = NULL;
700 	pinned[1] = NULL;
701 	if (file->f_flags & O_DIRECT)
702 		return -EINVAL;
703 
704 	pos = *ppos;
705 	start_pos = pos;
706 
707 	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
708 	current->backing_dev_info = inode->i_mapping->backing_dev_info;
709 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
710 	if (err)
711 		goto out;
712 	if (count == 0)
713 		goto out;
714 	err = remove_suid(fdentry(file));
715 	if (err)
716 		goto out;
717 	file_update_time(file);
718 
719 	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
720 
721 	mutex_lock(&inode->i_mutex);
722 	first_index = pos >> PAGE_CACHE_SHIFT;
723 	last_index = (pos + count) >> PAGE_CACHE_SHIFT;
724 
725 	/*
726 	 * there are lots of better ways to do this, but this code
727 	 * makes sure the first and last page in the file range are
728 	 * up to date and ready for cow
729 	 */
730 	if ((pos & (PAGE_CACHE_SIZE - 1))) {
731 		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
732 		if (!PageUptodate(pinned[0])) {
733 			ret = btrfs_readpage(NULL, pinned[0]);
734 			BUG_ON(ret);
735 			wait_on_page_locked(pinned[0]);
736 		} else {
737 			unlock_page(pinned[0]);
738 		}
739 	}
740 	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
741 		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
742 		if (!PageUptodate(pinned[1])) {
743 			ret = btrfs_readpage(NULL, pinned[1]);
744 			BUG_ON(ret);
745 			wait_on_page_locked(pinned[1]);
746 		} else {
747 			unlock_page(pinned[1]);
748 		}
749 	}
750 
751 	while(count > 0) {
752 		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
753 		size_t write_bytes = min(count, nrptrs *
754 					(size_t)PAGE_CACHE_SIZE -
755 					 offset);
756 		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
757 					PAGE_CACHE_SHIFT;
758 
759 		WARN_ON(num_pages > nrptrs);
760 		memset(pages, 0, sizeof(pages));
761 		ret = prepare_pages(root, file, pages, num_pages,
762 				    pos, first_index, last_index,
763 				    write_bytes);
764 		if (ret)
765 			goto out;
766 
767 		ret = btrfs_copy_from_user(pos, num_pages,
768 					   write_bytes, pages, buf);
769 		if (ret) {
770 			btrfs_drop_pages(pages, num_pages);
771 			goto out;
772 		}
773 
774 		ret = dirty_and_release_pages(NULL, root, file, pages,
775 					      num_pages, pos, write_bytes);
776 		btrfs_drop_pages(pages, num_pages);
777 		if (ret)
778 			goto out;
779 
780 		buf += write_bytes;
781 		count -= write_bytes;
782 		pos += write_bytes;
783 		num_written += write_bytes;
784 
785 		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
786 		if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
787 			btrfs_btree_balance_dirty(root, 1);
788 		cond_resched();
789 	}
790 	mutex_unlock(&inode->i_mutex);
791 out:
792 	kfree(pages);
793 	if (pinned[0])
794 		page_cache_release(pinned[0]);
795 	if (pinned[1])
796 		page_cache_release(pinned[1]);
797 	*ppos = pos;
798 
799 	if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
800 		err = sync_page_range(inode, inode->i_mapping,
801 				      start_pos, num_written);
802 		if (err < 0)
803 			num_written = err;
804 	}
805 	current->backing_dev_info = NULL;
806 	return num_written ? num_written : err;
807 }
808 
809 static int btrfs_sync_file(struct file *file,
810 			   struct dentry *dentry, int datasync)
811 {
812 	struct inode *inode = dentry->d_inode;
813 	struct btrfs_root *root = BTRFS_I(inode)->root;
814 	int ret = 0;
815 	struct btrfs_trans_handle *trans;
816 
817 	/*
818 	 * check the transaction that last modified this inode
819 	 * and see if its already been committed
820 	 */
821 	mutex_lock(&root->fs_info->fs_mutex);
822 	if (!BTRFS_I(inode)->last_trans)
823 		goto out;
824 	mutex_lock(&root->fs_info->trans_mutex);
825 	if (BTRFS_I(inode)->last_trans <=
826 	    root->fs_info->last_trans_committed) {
827 		BTRFS_I(inode)->last_trans = 0;
828 		mutex_unlock(&root->fs_info->trans_mutex);
829 		goto out;
830 	}
831 	mutex_unlock(&root->fs_info->trans_mutex);
832 
833 	/*
834 	 * ok we haven't committed the transaction yet, lets do a commit
835 	 */
836 	trans = btrfs_start_transaction(root, 1);
837 	if (!trans) {
838 		ret = -ENOMEM;
839 		goto out;
840 	}
841 	ret = btrfs_commit_transaction(trans, root);
842 out:
843 	mutex_unlock(&root->fs_info->fs_mutex);
844 	return ret > 0 ? EIO : ret;
845 }
846 
847 static struct vm_operations_struct btrfs_file_vm_ops = {
848 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
849 	.nopage         = filemap_nopage,
850 	.populate       = filemap_populate,
851 #else
852 	.fault		= filemap_fault,
853 #endif
854 	.page_mkwrite	= btrfs_page_mkwrite,
855 };
856 
857 static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
858 {
859 	vma->vm_ops = &btrfs_file_vm_ops;
860 	file_accessed(filp);
861 	return 0;
862 }
863 
864 struct file_operations btrfs_file_operations = {
865 	.llseek		= generic_file_llseek,
866 	.read		= do_sync_read,
867 	.aio_read       = generic_file_aio_read,
868 	.splice_read	= generic_file_splice_read,
869 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
870 	.sendfile	= generic_file_sendfile,
871 #endif
872 	.write		= btrfs_file_write,
873 	.mmap		= btrfs_file_mmap,
874 	.open		= generic_file_open,
875 	.fsync		= btrfs_sync_file,
876 	.unlocked_ioctl	= btrfs_ioctl,
877 #ifdef CONFIG_COMPAT
878 	.compat_ioctl	= btrfs_ioctl,
879 #endif
880 };
881 
882