inode.c (017ed8012e74ca15748863f45d2c078453026a0a) inode.c (1bce63d1a2a2c8929442b79acd4eab2e3db10a0b)
1/*
2 * linux/fs/ext4/inode.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *

--- 28 unchanged lines hidden (view full) ---

37#include <linux/kernel.h>
38#include <linux/printk.h>
39#include <linux/slab.h>
40#include <linux/ratelimit.h>
41
42#include "ext4_jbd2.h"
43#include "xattr.h"
44#include "acl.h"
1/*
2 * linux/fs/ext4/inode.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *

--- 28 unchanged lines hidden (view full) ---

37#include <linux/kernel.h>
38#include <linux/printk.h>
39#include <linux/slab.h>
40#include <linux/ratelimit.h>
41
42#include "ext4_jbd2.h"
43#include "xattr.h"
44#include "acl.h"
45#include "ext4_extents.h"
46#include "truncate.h"
47
48#include <trace/events/ext4.h>
49
50#define MPAGE_DA_EXTENT_TAIL 0x01
51
52static inline int ext4_begin_ordered_truncate(struct inode *inode,
53 loff_t new_size)

--- 209 unchanged lines hidden (view full) ---

263 */
264void ext4_da_update_reserve_space(struct inode *inode,
265 int used, int quota_claim)
266{
267 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
268 struct ext4_inode_info *ei = EXT4_I(inode);
269
270 spin_lock(&ei->i_block_reservation_lock);
45#include "truncate.h"
46
47#include <trace/events/ext4.h>
48
49#define MPAGE_DA_EXTENT_TAIL 0x01
50
51static inline int ext4_begin_ordered_truncate(struct inode *inode,
52 loff_t new_size)

--- 209 unchanged lines hidden (view full) ---

262 */
263void ext4_da_update_reserve_space(struct inode *inode,
264 int used, int quota_claim)
265{
266 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
267 struct ext4_inode_info *ei = EXT4_I(inode);
268
269 spin_lock(&ei->i_block_reservation_lock);
271 trace_ext4_da_update_reserve_space(inode, used);
270 trace_ext4_da_update_reserve_space(inode, used, quota_claim);
272 if (unlikely(used > ei->i_reserved_data_blocks)) {
273 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
274 "with only %d reserved data blocks\n",
275 __func__, inode->i_ino, used,
276 ei->i_reserved_data_blocks);
277 WARN_ON(1);
278 used = ei->i_reserved_data_blocks;
279 }
280
281 /* Update per-inode reservations */
282 ei->i_reserved_data_blocks -= used;
283 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
271 if (unlikely(used > ei->i_reserved_data_blocks)) {
272 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
273 "with only %d reserved data blocks\n",
274 __func__, inode->i_ino, used,
275 ei->i_reserved_data_blocks);
276 WARN_ON(1);
277 used = ei->i_reserved_data_blocks;
278 }
279
280 /* Update per-inode reservations */
281 ei->i_reserved_data_blocks -= used;
282 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
284 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
283 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
285 used + ei->i_allocated_meta_blocks);
286 ei->i_allocated_meta_blocks = 0;
287
288 if (ei->i_reserved_data_blocks == 0) {
289 /*
290 * We can release all of the reserved metadata blocks
291 * only when we have written all of the delayed
292 * allocation blocks.
293 */
284 used + ei->i_allocated_meta_blocks);
285 ei->i_allocated_meta_blocks = 0;
286
287 if (ei->i_reserved_data_blocks == 0) {
288 /*
289 * We can release all of the reserved metadata blocks
290 * only when we have written all of the delayed
291 * allocation blocks.
292 */
294 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
293 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
295 ei->i_reserved_meta_blocks);
296 ei->i_reserved_meta_blocks = 0;
297 ei->i_da_metadata_calc_len = 0;
298 }
299 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
300
301 /* Update quota subsystem for data blocks */
302 if (quota_claim)
294 ei->i_reserved_meta_blocks);
295 ei->i_reserved_meta_blocks = 0;
296 ei->i_da_metadata_calc_len = 0;
297 }
298 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
299
300 /* Update quota subsystem for data blocks */
301 if (quota_claim)
303 dquot_claim_block(inode, used);
302 dquot_claim_block(inode, EXT4_C2B(sbi, used));
304 else {
305 /*
306 * We did fallocate with an offset that is already delayed
307 * allocated. So on delayed allocated writeback we should
308 * not re-claim the quota for fallocated blocks.
309 */
303 else {
304 /*
305 * We did fallocate with an offset that is already delayed
306 * allocated. So on delayed allocated writeback we should
307 * not re-claim the quota for fallocated blocks.
308 */
310 dquot_release_reservation_block(inode, used);
309 dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
311 }
312
313 /*
314 * If we have done all the pending block allocations and if
315 * there aren't any writers on the inode, we can discard the
316 * inode's preallocations.
317 */
318 if ((ei->i_reserved_data_blocks == 0) &&

--- 75 unchanged lines hidden (view full) ---

394 }
395 }
396 pagevec_release(&pvec);
397 }
398 return num;
399}
400
401/*
310 }
311
312 /*
313 * If we have done all the pending block allocations and if
314 * there aren't any writers on the inode, we can discard the
315 * inode's preallocations.
316 */
317 if ((ei->i_reserved_data_blocks == 0) &&

--- 75 unchanged lines hidden (view full) ---

393 }
394 }
395 pagevec_release(&pvec);
396 }
397 return num;
398}
399
400/*
401 * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
402 */
403static void set_buffers_da_mapped(struct inode *inode,
404 struct ext4_map_blocks *map)
405{
406 struct address_space *mapping = inode->i_mapping;
407 struct pagevec pvec;
408 int i, nr_pages;
409 pgoff_t index, end;
410
411 index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
412 end = (map->m_lblk + map->m_len - 1) >>
413 (PAGE_CACHE_SHIFT - inode->i_blkbits);
414
415 pagevec_init(&pvec, 0);
416 while (index <= end) {
417 nr_pages = pagevec_lookup(&pvec, mapping, index,
418 min(end - index + 1,
419 (pgoff_t)PAGEVEC_SIZE));
420 if (nr_pages == 0)
421 break;
422 for (i = 0; i < nr_pages; i++) {
423 struct page *page = pvec.pages[i];
424 struct buffer_head *bh, *head;
425
426 if (unlikely(page->mapping != mapping) ||
427 !PageDirty(page))
428 break;
429
430 if (page_has_buffers(page)) {
431 bh = head = page_buffers(page);
432 do {
433 set_buffer_da_mapped(bh);
434 bh = bh->b_this_page;
435 } while (bh != head);
436 }
437 index++;
438 }
439 pagevec_release(&pvec);
440 }
441}
442
443/*
402 * The ext4_map_blocks() function tries to look up the requested blocks,
403 * and returns if the blocks are already mapped.
404 *
405 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
406 * and store the allocated blocks in the result buffer head and mark it
407 * mapped.
408 *
409 * If file type is extents based, it will call ext4_ext_map_blocks(),
410 * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
411 * based files
412 *
413 * On success, it returns the number of blocks being mapped or allocate.
414 * if create==0 and the blocks are pre-allocated and uninitialized block,
415 * the result buffer head is unmapped. If the create ==1, it will make sure
416 * the buffer head is mapped.
417 *
418 * It returns 0 if plain look up failed (blocks have not been allocated), in
444 * The ext4_map_blocks() function tries to look up the requested blocks,
445 * and returns if the blocks are already mapped.
446 *
447 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
448 * and store the allocated blocks in the result buffer head and mark it
449 * mapped.
450 *
451 * If file type is extents based, it will call ext4_ext_map_blocks(),
452 * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
453 * based files
454 *
455 * On success, it returns the number of blocks being mapped or allocate.
456 * if create==0 and the blocks are pre-allocated and uninitialized block,
457 * the result buffer head is unmapped. If the create ==1, it will make sure
458 * the buffer head is mapped.
459 *
460 * It returns 0 if plain look up failed (blocks have not been allocated), in
419 * that casem, buffer head is unmapped
461 * that case, buffer head is unmapped
420 *
421 * It returns the error in case of allocation failure.
422 */
423int ext4_map_blocks(handle_t *handle, struct inode *inode,
424 struct ext4_map_blocks *map, int flags)
425{
426 int retval;
427

--- 22 unchanged lines hidden (view full) ---

450 /* If it is only a block(s) look up */
451 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
452 return retval;
453
454 /*
455 * Returns if the blocks have already allocated
456 *
457 * Note that if blocks have been preallocated
462 *
463 * It returns the error in case of allocation failure.
464 */
465int ext4_map_blocks(handle_t *handle, struct inode *inode,
466 struct ext4_map_blocks *map, int flags)
467{
468 int retval;
469

--- 22 unchanged lines hidden (view full) ---

492 /* If it is only a block(s) look up */
493 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
494 return retval;
495
496 /*
497 * Returns if the blocks have already allocated
498 *
499 * Note that if blocks have been preallocated
458 * ext4_ext_get_block() returns th create = 0
500 * ext4_ext_get_block() returns the create = 0
459 * with buffer head unmapped.
460 */
461 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
462 return retval;
463
464 /*
465 * When we call get_blocks without the create flag, the
466 * BH_Unwritten flag could have gotten set if the blocks

--- 45 unchanged lines hidden (view full) ---

512 * block allocation which had been deferred till now. We don't
513 * support fallocate for non extent files. So we can update
514 * reserve space here.
515 */
516 if ((retval > 0) &&
517 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
518 ext4_da_update_reserve_space(inode, retval, 1);
519 }
501 * with buffer head unmapped.
502 */
503 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
504 return retval;
505
506 /*
507 * When we call get_blocks without the create flag, the
508 * BH_Unwritten flag could have gotten set if the blocks

--- 45 unchanged lines hidden (view full) ---

554 * block allocation which had been deferred till now. We don't
555 * support fallocate for non extent files. So we can update
556 * reserve space here.
557 */
558 if ((retval > 0) &&
559 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
560 ext4_da_update_reserve_space(inode, retval, 1);
561 }
520 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
562 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
521 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
522
563 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
564
565 /* If we have successfully mapped the delayed allocated blocks,
566 * set the BH_Da_Mapped bit on them. Its important to do this
567 * under the protection of i_data_sem.
568 */
569 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
570 set_buffers_da_mapped(inode, map);
571 }
572
523 up_write((&EXT4_I(inode)->i_data_sem));
524 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
525 int ret = check_block_validity(inode, map);
526 if (ret != 0)
527 return ret;
528 }
529 return retval;
530}

--- 111 unchanged lines hidden (view full) ---

642{
643 struct buffer_head *bh;
644
645 bh = ext4_getblk(handle, inode, block, create, err);
646 if (!bh)
647 return bh;
648 if (buffer_uptodate(bh))
649 return bh;
573 up_write((&EXT4_I(inode)->i_data_sem));
574 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
575 int ret = check_block_validity(inode, map);
576 if (ret != 0)
577 return ret;
578 }
579 return retval;
580}

--- 111 unchanged lines hidden (view full) ---

692{
693 struct buffer_head *bh;
694
695 bh = ext4_getblk(handle, inode, block, create, err);
696 if (!bh)
697 return bh;
698 if (buffer_uptodate(bh))
699 return bh;
650 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
700 ll_rw_block(READ_META, 1, &bh);
651 wait_on_buffer(bh);
652 if (buffer_uptodate(bh))
653 return bh;
654 put_bh(bh);
655 *err = -EIO;
656 return NULL;
657}
658

--- 373 unchanged lines hidden (view full) ---

1032 if (inode->i_nlink)
1033 ext4_orphan_del(NULL, inode);
1034 }
1035
1036 return ret ? ret : copied;
1037}
1038
1039/*
701 wait_on_buffer(bh);
702 if (buffer_uptodate(bh))
703 return bh;
704 put_bh(bh);
705 *err = -EIO;
706 return NULL;
707}
708

--- 373 unchanged lines hidden (view full) ---

1082 if (inode->i_nlink)
1083 ext4_orphan_del(NULL, inode);
1084 }
1085
1086 return ret ? ret : copied;
1087}
1088
1089/*
1040 * Reserve a single block located at lblock
1090 * Reserve a single cluster located at lblock
1041 */
1042static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1043{
1044 int retries = 0;
1045 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1046 struct ext4_inode_info *ei = EXT4_I(inode);
1091 */
1092static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1093{
1094 int retries = 0;
1095 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1096 struct ext4_inode_info *ei = EXT4_I(inode);
1047 unsigned long md_needed;
1097 unsigned int md_needed;
1048 int ret;
1049
1050 /*
1051 * recalculate the amount of metadata blocks to reserve
1052 * in order to allocate nrblocks
1053 * worse case is one extent per block
1054 */
1055repeat:
1056 spin_lock(&ei->i_block_reservation_lock);
1098 int ret;
1099
1100 /*
1101 * recalculate the amount of metadata blocks to reserve
1102 * in order to allocate nrblocks
1103 * worse case is one extent per block
1104 */
1105repeat:
1106 spin_lock(&ei->i_block_reservation_lock);
1057 md_needed = ext4_calc_metadata_amount(inode, lblock);
1107 md_needed = EXT4_NUM_B2C(sbi,
1108 ext4_calc_metadata_amount(inode, lblock));
1058 trace_ext4_da_reserve_space(inode, md_needed);
1059 spin_unlock(&ei->i_block_reservation_lock);
1060
1061 /*
1062 * We will charge metadata quota at writeout time; this saves
1063 * us from metadata over-estimation, though we may go over by
1064 * a small amount in the end. Here we just reserve for data.
1065 */
1109 trace_ext4_da_reserve_space(inode, md_needed);
1110 spin_unlock(&ei->i_block_reservation_lock);
1111
1112 /*
1113 * We will charge metadata quota at writeout time; this saves
1114 * us from metadata over-estimation, though we may go over by
1115 * a small amount in the end. Here we just reserve for data.
1116 */
1066 ret = dquot_reserve_block(inode, 1);
1117 ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
1067 if (ret)
1068 return ret;
1069 /*
1070 * We do still charge estimated metadata to the sb though;
1071 * we cannot afford to run out of free blocks.
1072 */
1118 if (ret)
1119 return ret;
1120 /*
1121 * We do still charge estimated metadata to the sb though;
1122 * we cannot afford to run out of free blocks.
1123 */
1073 if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
1074 dquot_release_reservation_block(inode, 1);
1124 if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
1125 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1075 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1076 yield();
1077 goto repeat;
1078 }
1079 return -ENOSPC;
1080 }
1081 spin_lock(&ei->i_block_reservation_lock);
1082 ei->i_reserved_data_blocks++;

--- 30 unchanged lines hidden (view full) ---

1113 }
1114 ei->i_reserved_data_blocks -= to_free;
1115
1116 if (ei->i_reserved_data_blocks == 0) {
1117 /*
1118 * We can release all of the reserved metadata blocks
1119 * only when we have written all of the delayed
1120 * allocation blocks.
1126 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1127 yield();
1128 goto repeat;
1129 }
1130 return -ENOSPC;
1131 }
1132 spin_lock(&ei->i_block_reservation_lock);
1133 ei->i_reserved_data_blocks++;

--- 30 unchanged lines hidden (view full) ---

1164 }
1165 ei->i_reserved_data_blocks -= to_free;
1166
1167 if (ei->i_reserved_data_blocks == 0) {
1168 /*
1169 * We can release all of the reserved metadata blocks
1170 * only when we have written all of the delayed
1171 * allocation blocks.
1172 * Note that in case of bigalloc, i_reserved_meta_blocks,
1173 * i_reserved_data_blocks, etc. refer to number of clusters.
1121 */
1174 */
1122 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
1175 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
1123 ei->i_reserved_meta_blocks);
1124 ei->i_reserved_meta_blocks = 0;
1125 ei->i_da_metadata_calc_len = 0;
1126 }
1127
1128 /* update fs dirty data blocks counter */
1176 ei->i_reserved_meta_blocks);
1177 ei->i_reserved_meta_blocks = 0;
1178 ei->i_da_metadata_calc_len = 0;
1179 }
1180
1181 /* update fs dirty data blocks counter */
1129 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1182 percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
1130
1131 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1132
1183
1184 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1185
1133 dquot_release_reservation_block(inode, to_free);
1186 dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
1134}
1135
1136static void ext4_da_page_release_reservation(struct page *page,
1137 unsigned long offset)
1138{
1139 int to_release = 0;
1140 struct buffer_head *head, *bh;
1141 unsigned int curr_off = 0;
1187}
1188
1189static void ext4_da_page_release_reservation(struct page *page,
1190 unsigned long offset)
1191{
1192 int to_release = 0;
1193 struct buffer_head *head, *bh;
1194 unsigned int curr_off = 0;
1195 struct inode *inode = page->mapping->host;
1196 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1197 int num_clusters;
1142
1143 head = page_buffers(page);
1144 bh = head;
1145 do {
1146 unsigned int next_off = curr_off + bh->b_size;
1147
1148 if ((offset <= curr_off) && (buffer_delay(bh))) {
1149 to_release++;
1150 clear_buffer_delay(bh);
1198
1199 head = page_buffers(page);
1200 bh = head;
1201 do {
1202 unsigned int next_off = curr_off + bh->b_size;
1203
1204 if ((offset <= curr_off) && (buffer_delay(bh))) {
1205 to_release++;
1206 clear_buffer_delay(bh);
1207 clear_buffer_da_mapped(bh);
1151 }
1152 curr_off = next_off;
1153 } while ((bh = bh->b_this_page) != head);
1208 }
1209 curr_off = next_off;
1210 } while ((bh = bh->b_this_page) != head);
1154 ext4_da_release_space(page->mapping->host, to_release);
1211
1212 /* If we have released all the blocks belonging to a cluster, then we
1213 * need to release the reserved space for that cluster. */
1214 num_clusters = EXT4_NUM_B2C(sbi, to_release);
1215 while (num_clusters > 0) {
1216 ext4_fsblk_t lblk;
1217 lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
1218 ((num_clusters - 1) << sbi->s_cluster_bits);
1219 if (sbi->s_cluster_ratio == 1 ||
1220 !ext4_find_delalloc_cluster(inode, lblk, 1))
1221 ext4_da_release_space(inode, 1);
1222
1223 num_clusters--;
1224 }
1155}
1156
1157/*
1158 * Delayed allocation stuff
1159 */
1160
1161/*
1162 * mpage_da_submit_io - walks through extent of pages and try to write

--- 85 unchanged lines hidden (view full) ---

1248 goto skip_page;
1249 if (map && (cur_logical >= map->m_lblk) &&
1250 (cur_logical <= (map->m_lblk +
1251 (map->m_len - 1)))) {
1252 if (buffer_delay(bh)) {
1253 clear_buffer_delay(bh);
1254 bh->b_blocknr = pblock;
1255 }
1225}
1226
1227/*
1228 * Delayed allocation stuff
1229 */
1230
1231/*
1232 * mpage_da_submit_io - walks through extent of pages and try to write

--- 85 unchanged lines hidden (view full) ---

1318 goto skip_page;
1319 if (map && (cur_logical >= map->m_lblk) &&
1320 (cur_logical <= (map->m_lblk +
1321 (map->m_len - 1)))) {
1322 if (buffer_delay(bh)) {
1323 clear_buffer_delay(bh);
1324 bh->b_blocknr = pblock;
1325 }
1326 if (buffer_da_mapped(bh))
1327 clear_buffer_da_mapped(bh);
1256 if (buffer_unwritten(bh) ||
1257 buffer_mapped(bh))
1258 BUG_ON(bh->b_blocknr != pblock);
1259 if (map->m_flags & EXT4_MAP_UNINIT)
1260 set_buffer_uninit(bh);
1261 clear_buffer_unwritten(bh);
1262 }
1263

--- 77 unchanged lines hidden (view full) ---

1341 }
1342 return;
1343}
1344
1345static void ext4_print_free_blocks(struct inode *inode)
1346{
1347 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1348 printk(KERN_CRIT "Total free blocks count %lld\n",
1328 if (buffer_unwritten(bh) ||
1329 buffer_mapped(bh))
1330 BUG_ON(bh->b_blocknr != pblock);
1331 if (map->m_flags & EXT4_MAP_UNINIT)
1332 set_buffer_uninit(bh);
1333 clear_buffer_unwritten(bh);
1334 }
1335

--- 77 unchanged lines hidden (view full) ---

1413 }
1414 return;
1415}
1416
1417static void ext4_print_free_blocks(struct inode *inode)
1418{
1419 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1420 printk(KERN_CRIT "Total free blocks count %lld\n",
1349 ext4_count_free_blocks(inode->i_sb));
1421 EXT4_C2B(EXT4_SB(inode->i_sb),
1422 ext4_count_free_clusters(inode->i_sb)));
1350 printk(KERN_CRIT "Free/Dirty block details\n");
1351 printk(KERN_CRIT "free_blocks=%lld\n",
1423 printk(KERN_CRIT "Free/Dirty block details\n");
1424 printk(KERN_CRIT "free_blocks=%lld\n",
1352 (long long) percpu_counter_sum(&sbi->s_freeblocks_counter));
1425 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1426 percpu_counter_sum(&sbi->s_freeclusters_counter)));
1353 printk(KERN_CRIT "dirty_blocks=%lld\n",
1427 printk(KERN_CRIT "dirty_blocks=%lld\n",
1354 (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
1428 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1429 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
1355 printk(KERN_CRIT "Block reservation details\n");
1356 printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
1357 EXT4_I(inode)->i_reserved_data_blocks);
1358 printk(KERN_CRIT "i_reserved_meta_blocks=%u\n",
1359 EXT4_I(inode)->i_reserved_meta_blocks);
1360 return;
1361}
1362

--- 62 unchanged lines hidden (view full) ---

1425 /*
1426 * If get block returns EAGAIN or ENOSPC and there
1427 * appears to be free blocks we will just let
1428 * mpage_da_submit_io() unlock all of the pages.
1429 */
1430 if (err == -EAGAIN)
1431 goto submit_io;
1432
1430 printk(KERN_CRIT "Block reservation details\n");
1431 printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
1432 EXT4_I(inode)->i_reserved_data_blocks);
1433 printk(KERN_CRIT "i_reserved_meta_blocks=%u\n",
1434 EXT4_I(inode)->i_reserved_meta_blocks);
1435 return;
1436}
1437

--- 62 unchanged lines hidden (view full) ---

1500 /*
1501 * If get block returns EAGAIN or ENOSPC and there
1502 * appears to be free blocks we will just let
1503 * mpage_da_submit_io() unlock all of the pages.
1504 */
1505 if (err == -EAGAIN)
1506 goto submit_io;
1507
1433 if (err == -ENOSPC &&
1434 ext4_count_free_blocks(sb)) {
1508 if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
1435 mpd->retval = err;
1436 goto submit_io;
1437 }
1438
1439 /*
1440 * get block failure will cause us to loop in
1441 * writepages, because a_ops->writepage won't be able
1442 * to make progress. The page will be redirtied by

--- 23 unchanged lines hidden (view full) ---

1466
1467 mapp = &map;
1468 if (map.m_flags & EXT4_MAP_NEW) {
1469 struct block_device *bdev = mpd->inode->i_sb->s_bdev;
1470 int i;
1471
1472 for (i = 0; i < map.m_len; i++)
1473 unmap_underlying_metadata(bdev, map.m_pblk + i);
1509 mpd->retval = err;
1510 goto submit_io;
1511 }
1512
1513 /*
1514 * get block failure will cause us to loop in
1515 * writepages, because a_ops->writepage won't be able
1516 * to make progress. The page will be redirtied by

--- 23 unchanged lines hidden (view full) ---

1540
1541 mapp = &map;
1542 if (map.m_flags & EXT4_MAP_NEW) {
1543 struct block_device *bdev = mpd->inode->i_sb->s_bdev;
1544 int i;
1545
1546 for (i = 0; i < map.m_len; i++)
1547 unmap_underlying_metadata(bdev, map.m_pblk + i);
1474 }
1475
1548
1476 if (ext4_should_order_data(mpd->inode)) {
1477 err = ext4_jbd2_file_inode(handle, mpd->inode);
1478 if (err)
1479 /* This only happens if the journal is aborted */
1480 return;
1549 if (ext4_should_order_data(mpd->inode)) {
1550 err = ext4_jbd2_file_inode(handle, mpd->inode);
1551 if (err)
1552 /* Only if the journal is aborted */
1553 return;
1554 }
1481 }
1482
1483 /*
1484 * Update on-disk size along with block allocation.
1485 */
1486 disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
1487 if (disksize > i_size_read(mpd->inode))
1488 disksize = i_size_read(mpd->inode);

--- 90 unchanged lines hidden (view full) ---

1579}
1580
1581static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
1582{
1583 return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
1584}
1585
1586/*
1555 }
1556
1557 /*
1558 * Update on-disk size along with block allocation.
1559 */
1560 disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
1561 if (disksize > i_size_read(mpd->inode))
1562 disksize = i_size_read(mpd->inode);

--- 90 unchanged lines hidden (view full) ---

1653}
1654
1655static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
1656{
1657 return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
1658}
1659
1660/*
1661 * This function is grabs code from the very beginning of
1662 * ext4_map_blocks, but assumes that the caller is from delayed write
1663 * time. This function looks up the requested blocks and sets the
1664 * buffer delay bit under the protection of i_data_sem.
1665 */
1666static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1667 struct ext4_map_blocks *map,
1668 struct buffer_head *bh)
1669{
1670 int retval;
1671 sector_t invalid_block = ~((sector_t) 0xffff);
1672
1673 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1674 invalid_block = ~0;
1675
1676 map->m_flags = 0;
1677 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
1678 "logical block %lu\n", inode->i_ino, map->m_len,
1679 (unsigned long) map->m_lblk);
1680 /*
1681 * Try to see if we can get the block without requesting a new
1682 * file system block.
1683 */
1684 down_read((&EXT4_I(inode)->i_data_sem));
1685 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1686 retval = ext4_ext_map_blocks(NULL, inode, map, 0);
1687 else
1688 retval = ext4_ind_map_blocks(NULL, inode, map, 0);
1689
1690 if (retval == 0) {
1691 /*
1692 * XXX: __block_prepare_write() unmaps passed block,
1693 * is it OK?
1694 */
1695 /* If the block was allocated from previously allocated cluster,
1696 * then we dont need to reserve it again. */
1697 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1698 retval = ext4_da_reserve_space(inode, iblock);
1699 if (retval)
1700 /* not enough space to reserve */
1701 goto out_unlock;
1702 }
1703
1704 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
1705 * and it should not appear on the bh->b_state.
1706 */
1707 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
1708
1709 map_bh(bh, inode->i_sb, invalid_block);
1710 set_buffer_new(bh);
1711 set_buffer_delay(bh);
1712 }
1713
1714out_unlock:
1715 up_read((&EXT4_I(inode)->i_data_sem));
1716
1717 return retval;
1718}
1719
1720/*
1587 * This is a special get_blocks_t callback which is used by
1588 * ext4_da_write_begin(). It will either return mapped block or
1589 * reserve space for a single block.
1590 *
1591 * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
1592 * We also have b_blocknr = -1 and b_bdev initialized properly
1593 *
1594 * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
1595 * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
1596 * initialized properly.
1597 */
1598static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1599 struct buffer_head *bh, int create)
1600{
1601 struct ext4_map_blocks map;
1602 int ret = 0;
1721 * This is a special get_blocks_t callback which is used by
1722 * ext4_da_write_begin(). It will either return mapped block or
1723 * reserve space for a single block.
1724 *
1725 * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
1726 * We also have b_blocknr = -1 and b_bdev initialized properly
1727 *
1728 * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
1729 * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
1730 * initialized properly.
1731 */
1732static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1733 struct buffer_head *bh, int create)
1734{
1735 struct ext4_map_blocks map;
1736 int ret = 0;
1603 sector_t invalid_block = ~((sector_t) 0xffff);
1604
1737
1605 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1606 invalid_block = ~0;
1607
1608 BUG_ON(create == 0);
1609 BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
1610
1611 map.m_lblk = iblock;
1612 map.m_len = 1;
1613
1614 /*
1615 * first, we need to know whether the block is allocated already
1616 * preallocated blocks are unmapped but should treated
1617 * the same as allocated blocks.
1618 */
1738 BUG_ON(create == 0);
1739 BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
1740
1741 map.m_lblk = iblock;
1742 map.m_len = 1;
1743
1744 /*
1745 * first, we need to know whether the block is allocated already
1746 * preallocated blocks are unmapped but should treated
1747 * the same as allocated blocks.
1748 */
1619 ret = ext4_map_blocks(NULL, inode, &map, 0);
1620 if (ret < 0)
1749 ret = ext4_da_map_blocks(inode, iblock, &map, bh);
1750 if (ret <= 0)
1621 return ret;
1751 return ret;
1622 if (ret == 0) {
1623 if (buffer_delay(bh))
1624 return 0; /* Not sure this could or should happen */
1625 /*
1626 * XXX: __block_write_begin() unmaps passed block, is it OK?
1627 */
1628 ret = ext4_da_reserve_space(inode, iblock);
1629 if (ret)
1630 /* not enough space to reserve */
1631 return ret;
1632
1752
1633 map_bh(bh, inode->i_sb, invalid_block);
1634 set_buffer_new(bh);
1635 set_buffer_delay(bh);
1636 return 0;
1637 }
1638
1639 map_bh(bh, inode->i_sb, map.m_pblk);
1640 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
1641
1642 if (buffer_unwritten(bh)) {
1643 /* A delayed write to unwritten bh should be marked
1644 * new and mapped. Mapped ensures that we don't do
1645 * get_block multiple times when we write to the same
1646 * offset and new ensures that we do proper zero out

--- 394 unchanged lines hidden (view full) ---

2041 unsigned int max_pages;
2042 int range_cyclic, cycled = 1, io_done = 0;
2043 int needed_blocks, ret = 0;
2044 long desired_nr_to_write, nr_to_writebump = 0;
2045 loff_t range_start = wbc->range_start;
2046 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2047 pgoff_t done_index = 0;
2048 pgoff_t end;
1753 map_bh(bh, inode->i_sb, map.m_pblk);
1754 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
1755
1756 if (buffer_unwritten(bh)) {
1757 /* A delayed write to unwritten bh should be marked
1758 * new and mapped. Mapped ensures that we don't do
1759 * get_block multiple times when we write to the same
1760 * offset and new ensures that we do proper zero out

--- 394 unchanged lines hidden (view full) ---

2155 unsigned int max_pages;
2156 int range_cyclic, cycled = 1, io_done = 0;
2157 int needed_blocks, ret = 0;
2158 long desired_nr_to_write, nr_to_writebump = 0;
2159 loff_t range_start = wbc->range_start;
2160 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2161 pgoff_t done_index = 0;
2162 pgoff_t end;
2163 struct blk_plug plug;
2049
2050 trace_ext4_da_writepages(inode, wbc);
2051
2052 /*
2053 * No pages to write? This is mainly a kludge to avoid starting
2054 * a transaction for special inodes like journal inode on last iput()
2055 * because that could violate lock ordering on umount
2056 */

--- 62 unchanged lines hidden (view full) ---

2119 nr_to_writebump = desired_nr_to_write - wbc->nr_to_write;
2120 wbc->nr_to_write = desired_nr_to_write;
2121 }
2122
2123retry:
2124 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2125 tag_pages_for_writeback(mapping, index, end);
2126
2164
2165 trace_ext4_da_writepages(inode, wbc);
2166
2167 /*
2168 * No pages to write? This is mainly a kludge to avoid starting
2169 * a transaction for special inodes like journal inode on last iput()
2170 * because that could violate lock ordering on umount
2171 */

--- 62 unchanged lines hidden (view full) ---

2234 nr_to_writebump = desired_nr_to_write - wbc->nr_to_write;
2235 wbc->nr_to_write = desired_nr_to_write;
2236 }
2237
2238retry:
2239 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2240 tag_pages_for_writeback(mapping, index, end);
2241
2242 blk_start_plug(&plug);
2127 while (!ret && wbc->nr_to_write > 0) {
2128
2129 /*
2130 * we insert one extent at a time. So we need
2131 * credit needed for single extent allocation.
2132 * journalled mode is currently not supported
2133 * by delalloc
2134 */

--- 48 unchanged lines hidden (view full) ---

2183 } else if (wbc->nr_to_write)
2184 /*
2185 * There is no more writeout needed
2186 * or we requested for a noblocking writeout
2187 * and we found the device congested
2188 */
2189 break;
2190 }
2243 while (!ret && wbc->nr_to_write > 0) {
2244
2245 /*
2246 * we insert one extent at a time. So we need
2247 * credit needed for single extent allocation.
2248 * journalled mode is currently not supported
2249 * by delalloc
2250 */

--- 48 unchanged lines hidden (view full) ---

2299 } else if (wbc->nr_to_write)
2300 /*
2301 * There is no more writeout needed
2302 * or we requested for a noblocking writeout
2303 * and we found the device congested
2304 */
2305 break;
2306 }
2307 blk_finish_plug(&plug);
2191 if (!io_done && !cycled) {
2192 cycled = 1;
2193 index = 0;
2194 wbc->range_start = index << PAGE_CACHE_SHIFT;
2195 wbc->range_end = mapping->writeback_index - 1;
2196 goto retry;
2197 }
2198

--- 22 unchanged lines hidden (view full) ---

2221 /*
2222 * switch to non delalloc mode if we are running low
2223 * on free block. The free block accounting via percpu
2224 * counters can get slightly wrong with percpu_counter_batch getting
2225 * accumulated on each CPU without updating global counters
2226 * Delalloc need an accurate free block accounting. So switch
2227 * to non delalloc when we are near to error range.
2228 */
2308 if (!io_done && !cycled) {
2309 cycled = 1;
2310 index = 0;
2311 wbc->range_start = index << PAGE_CACHE_SHIFT;
2312 wbc->range_end = mapping->writeback_index - 1;
2313 goto retry;
2314 }
2315

--- 22 unchanged lines hidden (view full) ---

2338 /*
2339 * switch to non delalloc mode if we are running low
2340 * on free block. The free block accounting via percpu
2341 * counters can get slightly wrong with percpu_counter_batch getting
2342 * accumulated on each CPU without updating global counters
2343 * Delalloc need an accurate free block accounting. So switch
2344 * to non delalloc when we are near to error range.
2345 */
2229 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
2230 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
2346 free_blocks = EXT4_C2B(sbi,
2347 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
2348 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
2231 if (2 * free_blocks < 3 * dirty_blocks ||
2349 if (2 * free_blocks < 3 * dirty_blocks ||
2232 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2350 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
2233 /*
2234 * free block count is less than 150% of dirty blocks
2235 * or free blocks is less than watermark
2236 */
2237 return 1;
2238 }
2239 /*
2240 * Even if we don't switch but are nearing capacity,

--- 9 unchanged lines hidden (view full) ---

2250 loff_t pos, unsigned len, unsigned flags,
2251 struct page **pagep, void **fsdata)
2252{
2253 int ret, retries = 0;
2254 struct page *page;
2255 pgoff_t index;
2256 struct inode *inode = mapping->host;
2257 handle_t *handle;
2351 /*
2352 * free block count is less than 150% of dirty blocks
2353 * or free blocks is less than watermark
2354 */
2355 return 1;
2356 }
2357 /*
2358 * Even if we don't switch but are nearing capacity,

--- 9 unchanged lines hidden (view full) ---

2368 loff_t pos, unsigned len, unsigned flags,
2369 struct page **pagep, void **fsdata)
2370{
2371 int ret, retries = 0;
2372 struct page *page;
2373 pgoff_t index;
2374 struct inode *inode = mapping->host;
2375 handle_t *handle;
2376 loff_t page_len;
2258
2259 index = pos >> PAGE_CACHE_SHIFT;
2260
2261 if (ext4_nonda_switch(inode->i_sb)) {
2262 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
2263 return ext4_write_begin(file, mapping, pos,
2264 len, flags, pagep, fsdata);
2265 }

--- 30 unchanged lines hidden (view full) ---

2296 page_cache_release(page);
2297 /*
2298 * block_write_begin may have instantiated a few blocks
2299 * outside i_size. Trim these off again. Don't need
2300 * i_size_read because we hold i_mutex.
2301 */
2302 if (pos + len > inode->i_size)
2303 ext4_truncate_failed_write(inode);
2377
2378 index = pos >> PAGE_CACHE_SHIFT;
2379
2380 if (ext4_nonda_switch(inode->i_sb)) {
2381 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
2382 return ext4_write_begin(file, mapping, pos,
2383 len, flags, pagep, fsdata);
2384 }

--- 30 unchanged lines hidden (view full) ---

2415 page_cache_release(page);
2416 /*
2417 * block_write_begin may have instantiated a few blocks
2418 * outside i_size. Trim these off again. Don't need
2419 * i_size_read because we hold i_mutex.
2420 */
2421 if (pos + len > inode->i_size)
2422 ext4_truncate_failed_write(inode);
2423 } else {
2424 page_len = pos & (PAGE_CACHE_SIZE - 1);
2425 if (page_len > 0) {
2426 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2427 inode, page, pos - page_len, page_len,
2428 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2429 }
2304 }
2305
2306 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
2307 goto retry;
2308out:
2309 return ret;
2310}
2311

--- 26 unchanged lines hidden (view full) ---

2338 struct page *page, void *fsdata)
2339{
2340 struct inode *inode = mapping->host;
2341 int ret = 0, ret2;
2342 handle_t *handle = ext4_journal_current_handle();
2343 loff_t new_i_size;
2344 unsigned long start, end;
2345 int write_mode = (int)(unsigned long)fsdata;
2430 }
2431
2432 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
2433 goto retry;
2434out:
2435 return ret;
2436}
2437

--- 26 unchanged lines hidden (view full) ---

2464 struct page *page, void *fsdata)
2465{
2466 struct inode *inode = mapping->host;
2467 int ret = 0, ret2;
2468 handle_t *handle = ext4_journal_current_handle();
2469 loff_t new_i_size;
2470 unsigned long start, end;
2471 int write_mode = (int)(unsigned long)fsdata;
2472 loff_t page_len;
2346
2347 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2348 if (ext4_should_order_data(inode)) {
2349 return ext4_ordered_write_end(file, mapping, pos,
2350 len, copied, page, fsdata);
2351 } else if (ext4_should_writeback_data(inode)) {
2352 return ext4_writeback_write_end(file, mapping, pos,
2353 len, copied, page, fsdata);

--- 32 unchanged lines hidden (view full) ---

2386 * new_i_size is less that inode->i_size
2387 * bu greater than i_disksize.(hint delalloc)
2388 */
2389 ext4_mark_inode_dirty(handle, inode);
2390 }
2391 }
2392 ret2 = generic_write_end(file, mapping, pos, len, copied,
2393 page, fsdata);
2473
2474 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2475 if (ext4_should_order_data(inode)) {
2476 return ext4_ordered_write_end(file, mapping, pos,
2477 len, copied, page, fsdata);
2478 } else if (ext4_should_writeback_data(inode)) {
2479 return ext4_writeback_write_end(file, mapping, pos,
2480 len, copied, page, fsdata);

--- 32 unchanged lines hidden (view full) ---

2513 * new_i_size is less that inode->i_size
2514 * bu greater than i_disksize.(hint delalloc)
2515 */
2516 ext4_mark_inode_dirty(handle, inode);
2517 }
2518 }
2519 ret2 = generic_write_end(file, mapping, pos, len, copied,
2520 page, fsdata);
2521
2522 page_len = PAGE_CACHE_SIZE -
2523 ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
2524
2525 if (page_len > 0) {
2526 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2527 inode, page, pos + copied - 1, page_len,
2528 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2529 }
2530
2394 copied = ret2;
2395 if (ret2 < 0)
2396 ret = ret2;
2397 ret2 = ext4_journal_stop(handle);
2398 if (!ret)
2399 ret = ret2;
2400
2401 return ret ? ret : copied;

--- 447 unchanged lines hidden (view full) ---

2849static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
2850 const struct iovec *iov, loff_t offset,
2851 unsigned long nr_segs)
2852{
2853 struct file *file = iocb->ki_filp;
2854 struct inode *inode = file->f_mapping->host;
2855 ssize_t ret;
2856
2531 copied = ret2;
2532 if (ret2 < 0)
2533 ret = ret2;
2534 ret2 = ext4_journal_stop(handle);
2535 if (!ret)
2536 ret = ret2;
2537
2538 return ret ? ret : copied;

--- 447 unchanged lines hidden (view full) ---

2986static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
2987 const struct iovec *iov, loff_t offset,
2988 unsigned long nr_segs)
2989{
2990 struct file *file = iocb->ki_filp;
2991 struct inode *inode = file->f_mapping->host;
2992 ssize_t ret;
2993
2994 /*
2995 * If we are doing data journalling we don't support O_DIRECT
2996 */
2997 if (ext4_should_journal_data(inode))
2998 return 0;
2999
2857 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
2858 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
2859 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
2860 else
2861 ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
2862 trace_ext4_direct_IO_exit(inode, offset,
2863 iov_length(iov, nr_segs), rw, ret);
2864 return ret;

--- 53 unchanged lines hidden (view full) ---

2918 .readpages = ext4_readpages,
2919 .writepage = ext4_writepage,
2920 .write_begin = ext4_write_begin,
2921 .write_end = ext4_journalled_write_end,
2922 .set_page_dirty = ext4_journalled_set_page_dirty,
2923 .bmap = ext4_bmap,
2924 .invalidatepage = ext4_invalidatepage,
2925 .releasepage = ext4_releasepage,
3000 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
3001 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3002 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
3003 else
3004 ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
3005 trace_ext4_direct_IO_exit(inode, offset,
3006 iov_length(iov, nr_segs), rw, ret);
3007 return ret;

--- 53 unchanged lines hidden (view full) ---

3061 .readpages = ext4_readpages,
3062 .writepage = ext4_writepage,
3063 .write_begin = ext4_write_begin,
3064 .write_end = ext4_journalled_write_end,
3065 .set_page_dirty = ext4_journalled_set_page_dirty,
3066 .bmap = ext4_bmap,
3067 .invalidatepage = ext4_invalidatepage,
3068 .releasepage = ext4_releasepage,
3069 .direct_IO = ext4_direct_IO,
2926 .is_partially_uptodate = block_is_partially_uptodate,
2927 .error_remove_page = generic_error_remove_page,
2928};
2929
2930static const struct address_space_operations ext4_da_aops = {
2931 .readpage = ext4_readpage,
2932 .readpages = ext4_readpages,
2933 .writepage = ext4_writepage,

--- 20 unchanged lines hidden (view full) ---

2954 test_opt(inode->i_sb, DELALLOC))
2955 inode->i_mapping->a_ops = &ext4_da_aops;
2956 else if (ext4_should_writeback_data(inode))
2957 inode->i_mapping->a_ops = &ext4_writeback_aops;
2958 else
2959 inode->i_mapping->a_ops = &ext4_journalled_aops;
2960}
2961
3070 .is_partially_uptodate = block_is_partially_uptodate,
3071 .error_remove_page = generic_error_remove_page,
3072};
3073
3074static const struct address_space_operations ext4_da_aops = {
3075 .readpage = ext4_readpage,
3076 .readpages = ext4_readpages,
3077 .writepage = ext4_writepage,

--- 20 unchanged lines hidden (view full) ---

3098 test_opt(inode->i_sb, DELALLOC))
3099 inode->i_mapping->a_ops = &ext4_da_aops;
3100 else if (ext4_should_writeback_data(inode))
3101 inode->i_mapping->a_ops = &ext4_writeback_aops;
3102 else
3103 inode->i_mapping->a_ops = &ext4_journalled_aops;
3104}
3105
3106
2962/*
3107/*
3108 * ext4_discard_partial_page_buffers()
3109 * Wrapper function for ext4_discard_partial_page_buffers_no_lock.
3110 * This function finds and locks the page containing the offset
3111 * "from" and passes it to ext4_discard_partial_page_buffers_no_lock.
3112 * Calling functions that already have the page locked should call
3113 * ext4_discard_partial_page_buffers_no_lock directly.
3114 */
3115int ext4_discard_partial_page_buffers(handle_t *handle,
3116 struct address_space *mapping, loff_t from,
3117 loff_t length, int flags)
3118{
3119 struct inode *inode = mapping->host;
3120 struct page *page;
3121 int err = 0;
3122
3123 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3124 mapping_gfp_mask(mapping) & ~__GFP_FS);
3125 if (!page)
3126 return -EINVAL;
3127
3128 err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page,
3129 from, length, flags);
3130
3131 unlock_page(page);
3132 page_cache_release(page);
3133 return err;
3134}
3135
3136/*
3137 * ext4_discard_partial_page_buffers_no_lock()
3138 * Zeros a page range of length 'length' starting from offset 'from'.
3139 * Buffer heads that correspond to the block aligned regions of the
3140 * zeroed range will be unmapped. Unblock aligned regions
3141 * will have the corresponding buffer head mapped if needed so that
3142 * that region of the page can be updated with the partial zero out.
3143 *
3144 * This function assumes that the page has already been locked. The
3145 * The range to be discarded must be contained with in the given page.
3146 * If the specified range exceeds the end of the page it will be shortened
3147 * to the end of the page that corresponds to 'from'. This function is
3148 * appropriate for updating a page and it buffer heads to be unmapped and
3149 * zeroed for blocks that have been either released, or are going to be
3150 * released.
3151 *
3152 * handle: The journal handle
3153 * inode: The files inode
3154 * page: A locked page that contains the offset "from"
3155 * from: The starting byte offset (from the begining of the file)
3156 * to begin discarding
3157 * len: The length of bytes to discard
3158 * flags: Optional flags that may be used:
3159 *
3160 * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
3161 * Only zero the regions of the page whose buffer heads
3162 * have already been unmapped. This flag is appropriate
3163 * for updateing the contents of a page whose blocks may
3164 * have already been released, and we only want to zero
3165 * out the regions that correspond to those released blocks.
3166 *
3167 * Returns zero on sucess or negative on failure.
3168 */
3169int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3170 struct inode *inode, struct page *page, loff_t from,
3171 loff_t length, int flags)
3172{
3173 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3174 unsigned int offset = from & (PAGE_CACHE_SIZE-1);
3175 unsigned int blocksize, max, pos;
3176 unsigned int end_of_block, range_to_discard;
3177 ext4_lblk_t iblock;
3178 struct buffer_head *bh;
3179 int err = 0;
3180
3181 blocksize = inode->i_sb->s_blocksize;
3182 max = PAGE_CACHE_SIZE - offset;
3183
3184 if (index != page->index)
3185 return -EINVAL;
3186
3187 /*
3188 * correct length if it does not fall between
3189 * 'from' and the end of the page
3190 */
3191 if (length > max || length < 0)
3192 length = max;
3193
3194 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3195
3196 if (!page_has_buffers(page)) {
3197 /*
3198 * If the range to be discarded covers a partial block
3199 * we need to get the page buffers. This is because
3200 * partial blocks cannot be released and the page needs
3201 * to be updated with the contents of the block before
3202 * we write the zeros on top of it.
3203 */
3204 if (!(from & (blocksize - 1)) ||
3205 !((from + length) & (blocksize - 1))) {
3206 create_empty_buffers(page, blocksize, 0);
3207 } else {
3208 /*
3209 * If there are no partial blocks,
3210 * there is nothing to update,
3211 * so we can return now
3212 */
3213 return 0;
3214 }
3215 }
3216
3217 /* Find the buffer that contains "offset" */
3218 bh = page_buffers(page);
3219 pos = blocksize;
3220 while (offset >= pos) {
3221 bh = bh->b_this_page;
3222 iblock++;
3223 pos += blocksize;
3224 }
3225
3226 pos = offset;
3227 while (pos < offset + length) {
3228 err = 0;
3229
3230 /* The length of space left to zero and unmap */
3231 range_to_discard = offset + length - pos;
3232
3233 /* The length of space until the end of the block */
3234 end_of_block = blocksize - (pos & (blocksize-1));
3235
3236 /*
3237 * Do not unmap or zero past end of block
3238 * for this buffer head
3239 */
3240 if (range_to_discard > end_of_block)
3241 range_to_discard = end_of_block;
3242
3243
3244 /*
3245 * Skip this buffer head if we are only zeroing unampped
3246 * regions of the page
3247 */
3248 if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED &&
3249 buffer_mapped(bh))
3250 goto next;
3251
3252 /* If the range is block aligned, unmap */
3253 if (range_to_discard == blocksize) {
3254 clear_buffer_dirty(bh);
3255 bh->b_bdev = NULL;
3256 clear_buffer_mapped(bh);
3257 clear_buffer_req(bh);
3258 clear_buffer_new(bh);
3259 clear_buffer_delay(bh);
3260 clear_buffer_unwritten(bh);
3261 clear_buffer_uptodate(bh);
3262 zero_user(page, pos, range_to_discard);
3263 BUFFER_TRACE(bh, "Buffer discarded");
3264 goto next;
3265 }
3266
3267 /*
3268 * If this block is not completely contained in the range
3269 * to be discarded, then it is not going to be released. Because
3270 * we need to keep this block, we need to make sure this part
3271 * of the page is uptodate before we modify it by writeing
3272 * partial zeros on it.
3273 */
3274 if (!buffer_mapped(bh)) {
3275 /*
3276 * Buffer head must be mapped before we can read
3277 * from the block
3278 */
3279 BUFFER_TRACE(bh, "unmapped");
3280 ext4_get_block(inode, iblock, bh, 0);
3281 /* unmapped? It's a hole - nothing to do */
3282 if (!buffer_mapped(bh)) {
3283 BUFFER_TRACE(bh, "still unmapped");
3284 goto next;
3285 }
3286 }
3287
3288 /* Ok, it's mapped. Make sure it's up-to-date */
3289 if (PageUptodate(page))
3290 set_buffer_uptodate(bh);
3291
3292 if (!buffer_uptodate(bh)) {
3293 err = -EIO;
3294 ll_rw_block(READ, 1, &bh);
3295 wait_on_buffer(bh);
3296 /* Uhhuh. Read error. Complain and punt.*/
3297 if (!buffer_uptodate(bh))
3298 goto next;
3299 }
3300
3301 if (ext4_should_journal_data(inode)) {
3302 BUFFER_TRACE(bh, "get write access");
3303 err = ext4_journal_get_write_access(handle, bh);
3304 if (err)
3305 goto next;
3306 }
3307
3308 zero_user(page, pos, range_to_discard);
3309
3310 err = 0;
3311 if (ext4_should_journal_data(inode)) {
3312 err = ext4_handle_dirty_metadata(handle, inode, bh);
3313 } else
3314 mark_buffer_dirty(bh);
3315
3316 BUFFER_TRACE(bh, "Partial buffer zeroed");
3317next:
3318 bh = bh->b_this_page;
3319 iblock++;
3320 pos += range_to_discard;
3321 }
3322
3323 return err;
3324}
3325
3326/*
2963 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
2964 * up to the end of the block which corresponds to `from'.
2965 * This required during truncate. We need to physically zero the tail end
2966 * of that block so it doesn't yield old data if the file is later grown.
2967 */
2968int ext4_block_truncate_page(handle_t *handle,
2969 struct address_space *mapping, loff_t from)
2970{

--- 94 unchanged lines hidden (view full) ---

3065
3066 zero_user(page, offset, length);
3067
3068 BUFFER_TRACE(bh, "zeroed end of block");
3069
3070 err = 0;
3071 if (ext4_should_journal_data(inode)) {
3072 err = ext4_handle_dirty_metadata(handle, inode, bh);
3327 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
3328 * up to the end of the block which corresponds to `from'.
3329 * This required during truncate. We need to physically zero the tail end
3330 * of that block so it doesn't yield old data if the file is later grown.
3331 */
3332int ext4_block_truncate_page(handle_t *handle,
3333 struct address_space *mapping, loff_t from)
3334{

--- 94 unchanged lines hidden (view full) ---

3429
3430 zero_user(page, offset, length);
3431
3432 BUFFER_TRACE(bh, "zeroed end of block");
3433
3434 err = 0;
3435 if (ext4_should_journal_data(inode)) {
3436 err = ext4_handle_dirty_metadata(handle, inode, bh);
3073 } else {
3074 if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
3075 err = ext4_jbd2_file_inode(handle, inode);
3437 } else
3076 mark_buffer_dirty(bh);
3438 mark_buffer_dirty(bh);
3077 }
3078
3079unlock:
3080 unlock_page(page);
3081 page_cache_release(page);
3082 return err;
3083}
3084
3085int ext4_can_truncate(struct inode *inode)

--- 24 unchanged lines hidden (view full) ---

3110 if (!S_ISREG(inode->i_mode))
3111 return -ENOTSUPP;
3112
3113 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
3114 /* TODO: Add support for non extent hole punching */
3115 return -ENOTSUPP;
3116 }
3117
3439
3440unlock:
3441 unlock_page(page);
3442 page_cache_release(page);
3443 return err;
3444}
3445
3446int ext4_can_truncate(struct inode *inode)

--- 24 unchanged lines hidden (view full) ---

3471 if (!S_ISREG(inode->i_mode))
3472 return -ENOTSUPP;
3473
3474 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
3475 /* TODO: Add support for non extent hole punching */
3476 return -ENOTSUPP;
3477 }
3478
3479 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
3480 /* TODO: Add support for bigalloc file systems */
3481 return -ENOTSUPP;
3482 }
3483
3118 return ext4_ext_punch_hole(file, offset, length);
3119}
3120
3121/*
3122 * ext4_truncate()
3123 *
3124 * We block out ext4_get_block() block instantiations across the entire
3125 * transaction, and VFS/VM ensures that ext4_truncate() cannot run

--- 167 unchanged lines hidden (view full) ---

3293 /*
3294 * There are other valid inodes in the buffer, this inode
3295 * has in-inode xattrs, or we don't have this inode in memory.
3296 * Read the block from disk.
3297 */
3298 trace_ext4_load_inode(inode);
3299 get_bh(bh);
3300 bh->b_end_io = end_buffer_read_sync;
3484 return ext4_ext_punch_hole(file, offset, length);
3485}
3486
3487/*
3488 * ext4_truncate()
3489 *
3490 * We block out ext4_get_block() block instantiations across the entire
3491 * transaction, and VFS/VM ensures that ext4_truncate() cannot run

--- 167 unchanged lines hidden (view full) ---

3659 /*
3660 * There are other valid inodes in the buffer, this inode
3661 * has in-inode xattrs, or we don't have this inode in memory.
3662 * Read the block from disk.
3663 */
3664 trace_ext4_load_inode(inode);
3665 get_bh(bh);
3666 bh->b_end_io = end_buffer_read_sync;
3301 submit_bh(READ | REQ_META | REQ_PRIO, bh);
3667 submit_bh(READ_META, bh);
3302 wait_on_buffer(bh);
3303 if (!buffer_uptodate(bh)) {
3304 EXT4_ERROR_INODE_BLOCK(inode, block,
3305 "unable to read itable block");
3306 brelse(bh);
3307 return -EIO;
3308 }
3309 }

--- 1121 unchanged lines hidden ---
3668 wait_on_buffer(bh);
3669 if (!buffer_uptodate(bh)) {
3670 EXT4_ERROR_INODE_BLOCK(inode, block,
3671 "unable to read itable block");
3672 brelse(bh);
3673 return -EIO;
3674 }
3675 }

--- 1121 unchanged lines hidden ---