xref: /openbmc/linux/fs/reiserfs/bitmap.c (revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2)
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 /* Reiserfs block (de)allocator, bitmap-based. */
5 
6 #include <linux/config.h>
7 #include <linux/time.h>
8 #include <linux/reiserfs_fs.h>
9 #include <linux/errno.h>
10 #include <linux/buffer_head.h>
11 #include <linux/kernel.h>
12 #include <linux/pagemap.h>
13 #include <linux/reiserfs_fs_sb.h>
14 #include <linux/reiserfs_fs_i.h>
15 #include <linux/quotaops.h>
16 
17 #define PREALLOCATION_SIZE 9
18 
19 /* different reiserfs block allocator options */
20 
21 #define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
22 
23 #define  _ALLOC_concentrating_formatted_nodes 0
24 #define  _ALLOC_displacing_large_files 1
25 #define  _ALLOC_displacing_new_packing_localities 2
26 #define  _ALLOC_old_hashed_relocation 3
27 #define  _ALLOC_new_hashed_relocation 4
28 #define  _ALLOC_skip_busy 5
29 #define  _ALLOC_displace_based_on_dirid 6
30 #define  _ALLOC_hashed_formatted_nodes 7
31 #define  _ALLOC_old_way 8
32 #define  _ALLOC_hundredth_slices 9
33 #define  _ALLOC_dirid_groups 10
34 #define  _ALLOC_oid_groups 11
35 #define  _ALLOC_packing_groups 12
36 
37 #define  concentrating_formatted_nodes(s)	test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
38 #define  displacing_large_files(s)		test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
39 #define  displacing_new_packing_localities(s)	test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
40 
41 #define SET_OPTION(optname) \
42    do { \
43         reiserfs_warning(s, "reiserfs: option \"%s\" is set", #optname); \
44         set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
45     } while(0)
46 #define TEST_OPTION(optname, s) \
47     test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
48 
49 static inline void get_bit_address (struct super_block * s,
50 				    b_blocknr_t block, int * bmap_nr, int * offset)
51 {
52     /* It is in the bitmap block number equal to the block
53      * number divided by the number of bits in a block. */
54     *bmap_nr = block / (s->s_blocksize << 3);
55     /* Within that bitmap block it is located at bit offset *offset. */
56     *offset = block & ((s->s_blocksize << 3) - 1 );
57     return;
58 }
59 
60 #ifdef CONFIG_REISERFS_CHECK
61 int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value)
62 {
63     int i, j;
64 
65     if (block == 0 || block >= SB_BLOCK_COUNT (s)) {
66 	reiserfs_warning (s, "vs-4010: is_reusable: block number is out of range %lu (%u)",
67 			  block, SB_BLOCK_COUNT (s));
68 	return 0;
69     }
70 
71     /* it can't be one of the bitmap blocks */
72     for (i = 0; i < SB_BMAP_NR (s); i ++)
73 	if (block == SB_AP_BITMAP (s)[i].bh->b_blocknr) {
74 	    reiserfs_warning (s, "vs: 4020: is_reusable: "
75 			      "bitmap block %lu(%u) can't be freed or reused",
76 			      block, SB_BMAP_NR (s));
77 	    return 0;
78 	}
79 
80     get_bit_address (s, block, &i, &j);
81 
82     if (i >= SB_BMAP_NR (s)) {
83 	reiserfs_warning (s, "vs-4030: is_reusable: there is no so many bitmap blocks: "
84 			  "block=%lu, bitmap_nr=%d", block, i);
85 	return 0;
86     }
87 
88     if ((bit_value == 0 &&
89          reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) ||
90 	(bit_value == 1 &&
91 	 reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i].bh->b_data) == 0)) {
92 	reiserfs_warning (s, "vs-4040: is_reusable: corresponding bit of block %lu does not "
93 			  "match required value (i==%d, j==%d) test_bit==%d",
94 		block, i, j, reiserfs_test_le_bit (j, SB_AP_BITMAP (s)[i].bh->b_data));
95 
96 	return 0;
97     }
98 
99     if (bit_value == 0 && block == SB_ROOT_BLOCK (s)) {
100 	reiserfs_warning (s, "vs-4050: is_reusable: this is root block (%u), "
101 			  "it must be busy", SB_ROOT_BLOCK (s));
102 	return 0;
103     }
104 
105     return 1;
106 }
107 #endif /* CONFIG_REISERFS_CHECK */
108 
109 /* searches in journal structures for a given block number (bmap, off). If block
110    is found in reiserfs journal it suggests next free block candidate to test. */
111 static inline  int is_block_in_journal (struct super_block * s, int bmap, int
112 off, int *next)
113 {
114     b_blocknr_t tmp;
115 
116     if (reiserfs_in_journal (s, bmap, off, 1, &tmp)) {
117 	if (tmp) {              /* hint supplied */
118 	    *next = tmp;
119 	    PROC_INFO_INC( s, scan_bitmap.in_journal_hint );
120 	} else {
121 	    (*next) = off + 1;          /* inc offset to avoid looping. */
122 	    PROC_INFO_INC( s, scan_bitmap.in_journal_nohint );
123 	}
124 	PROC_INFO_INC( s, scan_bitmap.retry );
125 	return 1;
126     }
127     return 0;
128 }
129 
130 /* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
131  * block; */
132 static int scan_bitmap_block (struct reiserfs_transaction_handle *th,
133 			      int bmap_n, int *beg, int boundary, int min, int max, int unfm)
134 {
135     struct super_block *s = th->t_super;
136     struct reiserfs_bitmap_info *bi=&SB_AP_BITMAP(s)[bmap_n];
137     int end, next;
138     int org = *beg;
139 
140     BUG_ON (!th->t_trans_id);
141 
142     RFALSE(bmap_n >= SB_BMAP_NR (s), "Bitmap %d is out of range (0..%d)",bmap_n, SB_BMAP_NR (s) - 1);
143     PROC_INFO_INC( s, scan_bitmap.bmap );
144 /* this is unclear and lacks comments, explain how journal bitmaps
145    work here for the reader.  Convey a sense of the design here. What
146    is a window? */
147 /* - I mean `a window of zero bits' as in description of this function - Zam. */
148 
149     if ( !bi ) {
150 	reiserfs_warning (s, "NULL bitmap info pointer for bitmap %d", bmap_n);
151 	return 0;
152     }
153     if (buffer_locked (bi->bh)) {
154        PROC_INFO_INC( s, scan_bitmap.wait );
155        __wait_on_buffer (bi->bh);
156     }
157 
158     while (1) {
159 	cont:
160 	if (bi->free_count < min)
161 		return 0; // No free blocks in this bitmap
162 
163 	/* search for a first zero bit -- beggining of a window */
164 	*beg = reiserfs_find_next_zero_le_bit
165 	        ((unsigned long*)(bi->bh->b_data), boundary, *beg);
166 
167 	if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block
168 				      * cannot contain a zero window of minimum size */
169 	    return 0;
170 	}
171 
172 	if (unfm && is_block_in_journal(s,bmap_n, *beg, beg))
173 	    continue;
174 	/* first zero bit found; we check next bits */
175 	for (end = *beg + 1;; end ++) {
176 	    if (end >= *beg + max || end >= boundary || reiserfs_test_le_bit (end, bi->bh->b_data)) {
177 		next = end;
178 		break;
179 	    }
180 	    /* finding the other end of zero bit window requires looking into journal structures (in
181 	     * case of searching for free blocks for unformatted nodes) */
182 	    if (unfm && is_block_in_journal(s, bmap_n, end, &next))
183 		break;
184 	}
185 
186 	/* now (*beg) points to beginning of zero bits window,
187 	 * (end) points to one bit after the window end */
188 	if (end - *beg >= min) { /* it seems we have found window of proper size */
189 	    int i;
190 	    reiserfs_prepare_for_journal (s, bi->bh, 1);
191 	    /* try to set all blocks used checking are they still free */
192 	    for (i = *beg; i < end; i++) {
193 		/* It seems that we should not check in journal again. */
194 		if (reiserfs_test_and_set_le_bit (i, bi->bh->b_data)) {
195 		    /* bit was set by another process
196 		     * while we slept in prepare_for_journal() */
197 		    PROC_INFO_INC( s, scan_bitmap.stolen );
198 		    if (i >= *beg + min)	{ /* we can continue with smaller set of allocated blocks,
199 					   * if length of this set is more or equal to `min' */
200 			end = i;
201 			break;
202 		    }
203 		    /* otherwise we clear all bit were set ... */
204 		    while (--i >= *beg)
205 			reiserfs_test_and_clear_le_bit (i, bi->bh->b_data);
206 		    reiserfs_restore_prepared_buffer (s, bi->bh);
207 		    *beg = org;
208 		    /* ... and search again in current block from beginning */
209 		    goto cont;
210 		}
211 	    }
212 	    bi->free_count -= (end - *beg);
213 	    journal_mark_dirty (th, s, bi->bh);
214 
215 	    /* free block count calculation */
216 	    reiserfs_prepare_for_journal (s, SB_BUFFER_WITH_SB(s), 1);
217 	    PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
218 	    journal_mark_dirty (th, s, SB_BUFFER_WITH_SB(s));
219 
220 	    return end - (*beg);
221 	} else {
222 	    *beg = next;
223 	}
224     }
225 }
226 
227 static int bmap_hash_id(struct super_block *s, u32 id) {
228     char * hash_in = NULL;
229     unsigned long hash;
230     unsigned bm;
231 
232     if (id <= 2) {
233 	bm = 1;
234     } else {
235         hash_in = (char *)(&id);
236         hash = keyed_hash(hash_in, 4);
237 	bm = hash % SB_BMAP_NR(s);
238 	if (!bm)
239 	    bm = 1;
240     }
241     /* this can only be true when SB_BMAP_NR = 1 */
242     if (bm >= SB_BMAP_NR(s))
243     	bm = 0;
244     return bm;
245 }
246 
247 /*
248  * hashes the id and then returns > 0 if the block group for the
249  * corresponding hash is full
250  */
251 static inline int block_group_used(struct super_block *s, u32 id) {
252     int bm;
253     bm = bmap_hash_id(s, id);
254     if (SB_AP_BITMAP(s)[bm].free_count > ((s->s_blocksize << 3) * 60 / 100) ) {
255         return 0;
256     }
257     return 1;
258 }
259 
260 /*
261  * the packing is returned in disk byte order
262  */
263 u32 reiserfs_choose_packing(struct inode *dir) {
264     u32 packing;
265     if (TEST_OPTION(packing_groups, dir->i_sb)) {
266 	u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
267 	/*
268 	 * some versions of reiserfsck expect packing locality 1 to be
269 	 * special
270 	 */
271 	if (parent_dir == 1 || block_group_used(dir->i_sb,parent_dir))
272             packing = INODE_PKEY(dir)->k_objectid;
273         else
274             packing = INODE_PKEY(dir)->k_dir_id;
275     } else
276         packing = INODE_PKEY(dir)->k_objectid;
277     return packing;
278 }
279 
280 /* Tries to find contiguous zero bit window (given size) in given region of
281  * bitmap and place new blocks there. Returns number of allocated blocks. */
282 static int scan_bitmap (struct reiserfs_transaction_handle *th,
283 			b_blocknr_t *start, b_blocknr_t finish,
284 			int min, int max, int unfm, unsigned long file_block)
285 {
286     int nr_allocated=0;
287     struct super_block * s = th->t_super;
288     /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
289      * - Hans, it is not a block number - Zam. */
290 
291     int bm, off;
292     int end_bm, end_off;
293     int off_max = s->s_blocksize << 3;
294 
295     BUG_ON (!th->t_trans_id);
296 
297     PROC_INFO_INC( s, scan_bitmap.call );
298     if ( SB_FREE_BLOCKS(s) <= 0)
299 	return 0; // No point in looking for more free blocks
300 
301     get_bit_address (s, *start, &bm, &off);
302     get_bit_address (s, finish, &end_bm, &end_off);
303     if (bm > SB_BMAP_NR(s))
304         return 0;
305     if (end_bm > SB_BMAP_NR(s))
306         end_bm = SB_BMAP_NR(s);
307 
308     /* When the bitmap is more than 10% free, anyone can allocate.
309      * When it's less than 10% free, only files that already use the
310      * bitmap are allowed. Once we pass 80% full, this restriction
311      * is lifted.
312      *
313      * We do this so that files that grow later still have space close to
314      * their original allocation. This improves locality, and presumably
315      * performance as a result.
316      *
317      * This is only an allocation policy and does not make up for getting a
318      * bad hint. Decent hinting must be implemented for this to work well.
319      */
320     if ( TEST_OPTION(skip_busy, s) && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s)/20 ) {
321 	for (;bm < end_bm; bm++, off = 0) {
322 	    if ( ( off && (!unfm || (file_block != 0))) || SB_AP_BITMAP(s)[bm].free_count > (s->s_blocksize << 3) / 10 )
323 		nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
324 	    if (nr_allocated)
325 		goto ret;
326         }
327 	/* we know from above that start is a reasonable number */
328 	get_bit_address (s, *start, &bm, &off);
329     }
330 
331     for (;bm < end_bm; bm++, off = 0) {
332 	nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
333 	if (nr_allocated)
334 	    goto ret;
335     }
336 
337     nr_allocated = scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
338 
339  ret:
340     *start = bm * off_max + off;
341     return nr_allocated;
342 
343 }
344 
345 static void _reiserfs_free_block (struct reiserfs_transaction_handle *th,
346 				  struct inode *inode, b_blocknr_t block,
347 				  int for_unformatted)
348 {
349     struct super_block * s = th->t_super;
350     struct reiserfs_super_block * rs;
351     struct buffer_head * sbh;
352     struct reiserfs_bitmap_info *apbi;
353     int nr, offset;
354 
355     BUG_ON (!th->t_trans_id);
356 
357     PROC_INFO_INC( s, free_block );
358 
359     rs = SB_DISK_SUPER_BLOCK (s);
360     sbh = SB_BUFFER_WITH_SB (s);
361     apbi = SB_AP_BITMAP(s);
362 
363     get_bit_address (s, block, &nr, &offset);
364 
365     if (nr >= sb_bmap_nr (rs)) {
366 	reiserfs_warning (s, "vs-4075: reiserfs_free_block: "
367 			  "block %lu is out of range on %s",
368 			  block, reiserfs_bdevname (s));
369 	return;
370     }
371 
372     reiserfs_prepare_for_journal(s, apbi[nr].bh, 1 ) ;
373 
374     /* clear bit for the given block in bit map */
375     if (!reiserfs_test_and_clear_le_bit (offset, apbi[nr].bh->b_data)) {
376 	reiserfs_warning (s, "vs-4080: reiserfs_free_block: "
377 			  "free_block (%s:%lu)[dev:blocknr]: bit already cleared",
378 			  reiserfs_bdevname (s), block);
379     }
380     apbi[nr].free_count ++;
381     journal_mark_dirty (th, s, apbi[nr].bh);
382 
383     reiserfs_prepare_for_journal(s, sbh, 1) ;
384     /* update super block */
385     set_sb_free_blocks( rs, sb_free_blocks(rs) + 1 );
386 
387     journal_mark_dirty (th, s, sbh);
388     if (for_unformatted)
389         DQUOT_FREE_BLOCK_NODIRTY(inode, 1);
390 }
391 
392 void reiserfs_free_block (struct reiserfs_transaction_handle *th,
393 			  struct inode *inode, b_blocknr_t block,
394 			  int for_unformatted)
395 {
396     struct super_block * s = th->t_super;
397 
398     BUG_ON (!th->t_trans_id);
399 
400     RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
401     RFALSE(is_reusable (s, block, 1) == 0, "vs-4071: can not free such block");
402     /* mark it before we clear it, just in case */
403     journal_mark_freed(th, s, block) ;
404     _reiserfs_free_block(th, inode, block, for_unformatted) ;
405 }
406 
407 /* preallocated blocks don't need to be run through journal_mark_freed */
408 static void reiserfs_free_prealloc_block (struct reiserfs_transaction_handle *th,
409 			  struct inode *inode, b_blocknr_t block) {
410     RFALSE(!th->t_super, "vs-4060: trying to free block on nonexistent device");
411     RFALSE(is_reusable (th->t_super, block, 1) == 0, "vs-4070: can not free such block");
412     BUG_ON (!th->t_trans_id);
413     _reiserfs_free_block(th, inode, block, 1) ;
414 }
415 
416 static void __discard_prealloc (struct reiserfs_transaction_handle * th,
417 				struct reiserfs_inode_info *ei)
418 {
419     unsigned long save = ei->i_prealloc_block ;
420     int dirty = 0;
421     struct inode *inode = &ei->vfs_inode;
422     BUG_ON (!th->t_trans_id);
423 #ifdef CONFIG_REISERFS_CHECK
424     if (ei->i_prealloc_count < 0)
425 	reiserfs_warning (th->t_super, "zam-4001:%s: inode has negative prealloc blocks count.", __FUNCTION__ );
426 #endif
427     while (ei->i_prealloc_count > 0) {
428 	reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
429 	ei->i_prealloc_block++;
430 	ei->i_prealloc_count --;
431 	dirty = 1;
432     }
433     if (dirty)
434     	reiserfs_update_sd(th, inode);
435     ei->i_prealloc_block = save;
436     list_del_init(&(ei->i_prealloc_list));
437 }
438 
439 /* FIXME: It should be inline function */
440 void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th,
441 				struct inode *inode)
442 {
443     struct reiserfs_inode_info *ei = REISERFS_I(inode);
444     BUG_ON (!th->t_trans_id);
445     if (ei->i_prealloc_count)
446 	__discard_prealloc(th, ei);
447 }
448 
449 void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th)
450 {
451     struct list_head * plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
452 
453     BUG_ON (!th->t_trans_id);
454 
455     while (!list_empty(plist)) {
456 	struct reiserfs_inode_info *ei;
457 	ei = list_entry(plist->next, struct reiserfs_inode_info, i_prealloc_list);
458 #ifdef CONFIG_REISERFS_CHECK
459 	if (!ei->i_prealloc_count) {
460 	    reiserfs_warning (th->t_super, "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.", __FUNCTION__);
461 	}
462 #endif
463 	__discard_prealloc(th, ei);
464     }
465 }
466 
467 void reiserfs_init_alloc_options (struct super_block *s)
468 {
469     set_bit (_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
470     set_bit (_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
471     set_bit (_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
472 }
473 
474 /* block allocator related options are parsed here */
475 int reiserfs_parse_alloc_options(struct super_block * s, char * options)
476 {
477     char * this_char, * value;
478 
479     REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */
480 
481     while ( (this_char = strsep (&options, ":")) != NULL ) {
482 	if ((value = strchr (this_char, '=')) != NULL)
483 	    *value++ = 0;
484 
485 	if (!strcmp(this_char, "concentrating_formatted_nodes")) {
486 	    int temp;
487 	    SET_OPTION(concentrating_formatted_nodes);
488 	    temp = (value && *value) ? simple_strtoul (value, &value, 0) : 10;
489 	    if (temp <= 0 || temp > 100) {
490 		REISERFS_SB(s)->s_alloc_options.border = 10;
491 	    } else {
492 		REISERFS_SB(s)->s_alloc_options.border = 100 / temp;
493 	   }
494 	    continue;
495 	}
496 	if (!strcmp(this_char, "displacing_large_files")) {
497 	    SET_OPTION(displacing_large_files);
498 	    REISERFS_SB(s)->s_alloc_options.large_file_size =
499 		(value && *value) ? simple_strtoul (value, &value, 0) : 16;
500 	    continue;
501 	}
502 	if (!strcmp(this_char, "displacing_new_packing_localities")) {
503 	    SET_OPTION(displacing_new_packing_localities);
504 	    continue;
505 	};
506 
507 	if (!strcmp(this_char, "old_hashed_relocation")) {
508 	    SET_OPTION(old_hashed_relocation);
509 	    continue;
510 	}
511 
512 	if (!strcmp(this_char, "new_hashed_relocation")) {
513 	    SET_OPTION(new_hashed_relocation);
514 	    continue;
515 	}
516 
517         if (!strcmp(this_char, "dirid_groups")) {
518 	    SET_OPTION(dirid_groups);
519 	    continue;
520         }
521         if (!strcmp(this_char, "oid_groups")) {
522 	    SET_OPTION(oid_groups);
523 	    continue;
524         }
525         if (!strcmp(this_char, "packing_groups")) {
526 	    SET_OPTION(packing_groups);
527 	    continue;
528         }
529 	if (!strcmp(this_char, "hashed_formatted_nodes")) {
530 	    SET_OPTION(hashed_formatted_nodes);
531 	    continue;
532 	}
533 
534 	if (!strcmp(this_char, "skip_busy")) {
535 	    SET_OPTION(skip_busy);
536 	    continue;
537 	}
538 
539 	if (!strcmp(this_char, "hundredth_slices")) {
540 	    SET_OPTION(hundredth_slices);
541 	    continue;
542 	}
543 
544 	if (!strcmp(this_char, "old_way")) {
545 	    SET_OPTION(old_way);
546 	    continue;
547 	}
548 
549 	if (!strcmp(this_char, "displace_based_on_dirid")) {
550 	    SET_OPTION(displace_based_on_dirid);
551 	    continue;
552 	}
553 
554 	if (!strcmp(this_char, "preallocmin")) {
555 	    REISERFS_SB(s)->s_alloc_options.preallocmin =
556 		(value && *value) ? simple_strtoul (value, &value, 0) : 4;
557 	    continue;
558 	}
559 
560 	if (!strcmp(this_char, "preallocsize")) {
561 	    REISERFS_SB(s)->s_alloc_options.preallocsize =
562 		(value && *value) ? simple_strtoul (value, &value, 0) : PREALLOCATION_SIZE;
563 	    continue;
564 	}
565 
566 	reiserfs_warning (s, "zam-4001: %s : unknown option - %s",
567 			  __FUNCTION__ , this_char);
568 	return 1;
569       }
570 
571     reiserfs_warning (s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
572     return 0;
573 }
574 
575 static inline void new_hashed_relocation (reiserfs_blocknr_hint_t * hint)
576 {
577     char * hash_in;
578     if (hint->formatted_node) {
579 	    hash_in = (char*)&hint->key.k_dir_id;
580     } else {
581 	if (!hint->inode) {
582 	    //hint->search_start = hint->beg;
583 	    hash_in = (char*)&hint->key.k_dir_id;
584 	} else
585 	    if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
586 		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
587 	    else
588 		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
589       }
590 
591     hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
592 }
593 
594 /*
595  * Relocation based on dirid, hashing them into a given bitmap block
596  * files. Formatted nodes are unaffected, a seperate policy covers them
597  */
598 static void
599 dirid_groups (reiserfs_blocknr_hint_t *hint)
600 {
601     unsigned long hash;
602     __u32 dirid = 0;
603     int bm = 0;
604     struct super_block *sb = hint->th->t_super;
605     if (hint->inode)
606 	dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
607     else if (hint->formatted_node)
608         dirid = hint->key.k_dir_id;
609 
610     if (dirid) {
611 	bm = bmap_hash_id(sb, dirid);
612 	hash = bm * (sb->s_blocksize << 3);
613 	/* give a portion of the block group to metadata */
614 	if (hint->inode)
615 	    hash += sb->s_blocksize/2;
616 	hint->search_start = hash;
617     }
618 }
619 
620 /*
621  * Relocation based on oid, hashing them into a given bitmap block
622  * files. Formatted nodes are unaffected, a seperate policy covers them
623  */
624 static void
625 oid_groups (reiserfs_blocknr_hint_t *hint)
626 {
627     if (hint->inode) {
628 	unsigned long hash;
629 	__u32 oid;
630 	__u32 dirid;
631 	int bm;
632 
633 	dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
634 
635 	/* keep the root dir and it's first set of subdirs close to
636 	 * the start of the disk
637 	 */
638 	if (dirid <= 2)
639 	    hash = (hint->inode->i_sb->s_blocksize << 3);
640 	else {
641 	    oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
642 	    bm = bmap_hash_id(hint->inode->i_sb, oid);
643 	    hash = bm * (hint->inode->i_sb->s_blocksize << 3);
644 	}
645 	hint->search_start = hash;
646     }
647 }
648 
649 /* returns 1 if it finds an indirect item and gets valid hint info
650  * from it, otherwise 0
651  */
652 static int get_left_neighbor(reiserfs_blocknr_hint_t *hint)
653 {
654     struct path * path;
655     struct buffer_head * bh;
656     struct item_head * ih;
657     int pos_in_item;
658     __u32 * item;
659     int ret = 0;
660 
661     if (!hint->path)		/* reiserfs code can call this function w/o pointer to path
662 				 * structure supplied; then we rely on supplied search_start */
663 	return 0;
664 
665     path = hint->path;
666     bh = get_last_bh(path);
667     RFALSE( !bh, "green-4002: Illegal path specified to get_left_neighbor");
668     ih = get_ih(path);
669     pos_in_item = path->pos_in_item;
670     item = get_item (path);
671 
672     hint->search_start = bh->b_blocknr;
673 
674     if (!hint->formatted_node && is_indirect_le_ih (ih)) {
675 	/* for indirect item: go to left and look for the first non-hole entry
676 	   in the indirect item */
677 	if (pos_in_item == I_UNFM_NUM (ih))
678 	    pos_in_item--;
679 //	    pos_in_item = I_UNFM_NUM (ih) - 1;
680 	while (pos_in_item >= 0) {
681 	    int t=get_block_num(item,pos_in_item);
682 	    if (t) {
683 		hint->search_start = t;
684 		ret = 1;
685 		break;
686 	    }
687 	    pos_in_item --;
688 	}
689     }
690 
691     /* does result value fit into specified region? */
692     return ret;
693 }
694 
695 /* should be, if formatted node, then try to put on first part of the device
696    specified as number of percent with mount option device, else try to put
697    on last of device.  This is not to say it is good code to do so,
698    but the effect should be measured.  */
699 static inline void set_border_in_hint(struct super_block *s, reiserfs_blocknr_hint_t *hint)
700 {
701     b_blocknr_t border = SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
702 
703     if (hint->formatted_node)
704 	hint->end = border - 1;
705     else
706 	hint->beg = border;
707 }
708 
709 static inline void displace_large_file(reiserfs_blocknr_hint_t *hint)
710 {
711     if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
712 	hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id), 4) % (hint->end - hint->beg);
713     else
714 	hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid), 4) % (hint->end - hint->beg);
715 }
716 
717 static inline void hash_formatted_node(reiserfs_blocknr_hint_t *hint)
718 {
719    char * hash_in;
720 
721    if (!hint->inode)
722 	hash_in = (char*)&hint->key.k_dir_id;
723     else if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
724 	hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
725     else
726 	hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
727 
728 	hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
729 }
730 
731 static inline int this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *hint)
732 {
733     return hint->block == REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
734 }
735 
736 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
737 static inline void displace_new_packing_locality (reiserfs_blocknr_hint_t *hint)
738 {
739     struct reiserfs_key * key = &hint->key;
740 
741     hint->th->displace_new_blocks = 0;
742     hint->search_start = hint->beg + keyed_hash((char*)(&key->k_objectid),4) % (hint->end - hint->beg);
743 }
744   #endif
745 
746 static inline int old_hashed_relocation (reiserfs_blocknr_hint_t * hint)
747 {
748     b_blocknr_t border;
749     u32 hash_in;
750 
751     if (hint->formatted_node || hint->inode == NULL) {
752 	return 0;
753       }
754 
755     hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
756     border = hint->beg + (u32) keyed_hash(((char *) (&hash_in)), 4) % (hint->end - hint->beg - 1);
757     if (border > hint->search_start)
758 	hint->search_start = border;
759 
760     return 1;
761   }
762 
763 static inline int old_way (reiserfs_blocknr_hint_t * hint)
764 {
765     b_blocknr_t border;
766 
767     if (hint->formatted_node || hint->inode == NULL) {
768 	return 0;
769     }
770 
771       border = hint->beg + le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end  - hint->beg);
772     if (border > hint->search_start)
773 	hint->search_start = border;
774 
775     return 1;
776 }
777 
778 static inline void hundredth_slices (reiserfs_blocknr_hint_t * hint)
779 {
780     struct reiserfs_key * key = &hint->key;
781     b_blocknr_t slice_start;
782 
783     slice_start = (keyed_hash((char*)(&key->k_dir_id),4) % 100) * (hint->end / 100);
784     if ( slice_start > hint->search_start || slice_start + (hint->end / 100) <= hint->search_start) {
785 	hint->search_start = slice_start;
786     }
787 }
788 
789 static void determine_search_start(reiserfs_blocknr_hint_t *hint,
790 					  int amount_needed)
791 {
792     struct super_block *s = hint->th->t_super;
793     int unfm_hint;
794 
795     hint->beg = 0;
796     hint->end = SB_BLOCK_COUNT(s) - 1;
797 
798     /* This is former border algorithm. Now with tunable border offset */
799     if (concentrating_formatted_nodes(s))
800 	set_border_in_hint(s, hint);
801 
802 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
803     /* whenever we create a new directory, we displace it.  At first we will
804        hash for location, later we might look for a moderately empty place for
805        it */
806     if (displacing_new_packing_localities(s)
807 	&& hint->th->displace_new_blocks) {
808 	displace_new_packing_locality(hint);
809 
810 	/* we do not continue determine_search_start,
811 	 * if new packing locality is being displaced */
812 	return;
813     }
814 #endif
815 
816     /* all persons should feel encouraged to add more special cases here and
817      * test them */
818 
819     if (displacing_large_files(s) && !hint->formatted_node
820 	&& this_blocknr_allocation_would_make_it_a_large_file(hint)) {
821 	displace_large_file(hint);
822 	return;
823     }
824 
825     /* if none of our special cases is relevant, use the left neighbor in the
826        tree order of the new node we are allocating for */
827     if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes,s)) {
828         hash_formatted_node(hint);
829 	return;
830     }
831 
832     unfm_hint = get_left_neighbor(hint);
833 
834     /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
835        new blocks are displaced based on directory ID. Also, if suggested search_start
836        is less than last preallocated block, we start searching from it, assuming that
837        HDD dataflow is faster in forward direction */
838     if ( TEST_OPTION(old_way, s)) {
839 	if (!hint->formatted_node) {
840 	    if ( !reiserfs_hashed_relocation(s))
841 		old_way(hint);
842 	    else if (!reiserfs_no_unhashed_relocation(s))
843 		old_hashed_relocation(hint);
844 
845 	    if ( hint->inode && hint->search_start < REISERFS_I(hint->inode)->i_prealloc_block)
846 		hint->search_start = REISERFS_I(hint->inode)->i_prealloc_block;
847 	}
848 	return;
849     }
850 
851     /* This is an approach proposed by Hans */
852     if ( TEST_OPTION(hundredth_slices, s) && ! (displacing_large_files(s) && !hint->formatted_node)) {
853 	hundredth_slices(hint);
854 	return;
855     }
856 
857     /* old_hashed_relocation only works on unformatted */
858     if (!unfm_hint && !hint->formatted_node &&
859         TEST_OPTION(old_hashed_relocation, s))
860     {
861 	old_hashed_relocation(hint);
862     }
863     /* new_hashed_relocation works with both formatted/unformatted nodes */
864     if ((!unfm_hint || hint->formatted_node) &&
865         TEST_OPTION(new_hashed_relocation, s))
866     {
867 	new_hashed_relocation(hint);
868     }
869     /* dirid grouping works only on unformatted nodes */
870     if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups,s))
871     {
872         dirid_groups(hint);
873     }
874 
875 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
876     if (hint->formatted_node && TEST_OPTION(dirid_groups,s))
877     {
878         dirid_groups(hint);
879     }
880 #endif
881 
882     /* oid grouping works only on unformatted nodes */
883     if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups,s))
884     {
885         oid_groups(hint);
886     }
887     return;
888 }
889 
890 static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
891 {
892     /* make minimum size a mount option and benchmark both ways */
893     /* we preallocate blocks only for regular files, specific size */
894     /* benchmark preallocating always and see what happens */
895 
896     hint->prealloc_size = 0;
897 
898     if (!hint->formatted_node && hint->preallocate) {
899 	if (S_ISREG(hint->inode->i_mode)
900 	    && hint->inode->i_size >= REISERFS_SB(hint->th->t_super)->s_alloc_options.preallocmin * hint->inode->i_sb->s_blocksize)
901 	    hint->prealloc_size = REISERFS_SB(hint->th->t_super)->s_alloc_options.preallocsize - 1;
902     }
903     return CARRY_ON;
904 }
905 
906 /* XXX I know it could be merged with upper-level function;
907    but may be result function would be too complex. */
908 static inline int allocate_without_wrapping_disk (reiserfs_blocknr_hint_t * hint,
909 					 b_blocknr_t * new_blocknrs,
910 					 b_blocknr_t start, b_blocknr_t finish,
911 					 int min,
912 					 int amount_needed, int prealloc_size)
913 {
914     int rest = amount_needed;
915     int nr_allocated;
916 
917     while (rest > 0 && start <= finish) {
918 	nr_allocated = scan_bitmap (hint->th, &start, finish, min,
919 				    rest + prealloc_size, !hint->formatted_node,
920 				    hint->block);
921 
922 	if (nr_allocated == 0)	/* no new blocks allocated, return */
923 	    break;
924 
925 	/* fill free_blocknrs array first */
926 	while (rest > 0 && nr_allocated > 0) {
927 	    * new_blocknrs ++ = start ++;
928 	    rest --; nr_allocated --;
929 	}
930 
931 	/* do we have something to fill prealloc. array also ? */
932 	if (nr_allocated > 0) {
933 	    /* it means prealloc_size was greater that 0 and we do preallocation */
934 	    list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
935 		     &SB_JOURNAL(hint->th->t_super)->j_prealloc_list);
936 	    REISERFS_I(hint->inode)->i_prealloc_block = start;
937 	    REISERFS_I(hint->inode)->i_prealloc_count = nr_allocated;
938 	    break;
939 	}
940     }
941 
942     return (amount_needed - rest);
943 }
944 
945 static inline int blocknrs_and_prealloc_arrays_from_search_start
946     (reiserfs_blocknr_hint_t *hint, b_blocknr_t *new_blocknrs, int amount_needed)
947 {
948     struct super_block *s = hint->th->t_super;
949     b_blocknr_t start = hint->search_start;
950     b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
951     int passno = 0;
952     int nr_allocated = 0;
953     int bigalloc = 0;
954 
955     determine_prealloc_size(hint);
956     if (!hint->formatted_node) {
957         int quota_ret;
958 #ifdef REISERQUOTA_DEBUG
959 	reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: allocating %d blocks id=%u", amount_needed, hint->inode->i_uid);
960 #endif
961 	quota_ret = DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed);
962 	if (quota_ret)    /* Quota exceeded? */
963 	    return QUOTA_EXCEEDED;
964 	if (hint->preallocate && hint->prealloc_size ) {
965 #ifdef REISERQUOTA_DEBUG
966 	    reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: allocating (prealloc) %d blocks id=%u", hint->prealloc_size, hint->inode->i_uid);
967 #endif
968 	    quota_ret = DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, hint->prealloc_size);
969 	    if (quota_ret)
970 		hint->preallocate=hint->prealloc_size=0;
971 	}
972 	/* for unformatted nodes, force large allocations */
973 	bigalloc = amount_needed;
974     }
975 
976     do {
977 	/* in bigalloc mode, nr_allocated should stay zero until
978 	 * the entire allocation is filled
979 	 */
980 	if (unlikely(bigalloc && nr_allocated)) {
981 	    reiserfs_warning(s, "bigalloc is %d, nr_allocated %d\n",
982 	    bigalloc, nr_allocated);
983 	    /* reset things to a sane value */
984 	    bigalloc = amount_needed - nr_allocated;
985 	}
986 	/*
987 	 * try pass 0 and pass 1 looking for a nice big
988 	 * contiguous allocation.  Then reset and look
989 	 * for anything you can find.
990 	 */
991 	if (passno == 2 && bigalloc) {
992 	    passno = 0;
993 	    bigalloc = 0;
994 	}
995 	switch (passno++) {
996         case 0: /* Search from hint->search_start to end of disk */
997 	    start = hint->search_start;
998 	    finish = SB_BLOCK_COUNT(s) - 1;
999 	    break;
1000         case 1: /* Search from hint->beg to hint->search_start */
1001 	    start = hint->beg;
1002 	    finish = hint->search_start;
1003 	    break;
1004 	case 2: /* Last chance: Search from 0 to hint->beg */
1005 	    start = 0;
1006 	    finish = hint->beg;
1007 	    break;
1008 	default: /* We've tried searching everywhere, not enough space */
1009 	    /* Free the blocks */
1010 	    if (!hint->formatted_node) {
1011 #ifdef REISERQUOTA_DEBUG
1012 		reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: freeing (nospace) %d blocks id=%u", amount_needed + hint->prealloc_size - nr_allocated, hint->inode->i_uid);
1013 #endif
1014 		DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated);     /* Free not allocated blocks */
1015 	    }
1016   	    while (nr_allocated --)
1017 		reiserfs_free_block(hint->th, hint->inode, new_blocknrs[nr_allocated], !hint->formatted_node);
1018 
1019 	    return NO_DISK_SPACE;
1020 	}
1021     } while ((nr_allocated += allocate_without_wrapping_disk (hint,
1022 			    new_blocknrs + nr_allocated, start, finish,
1023 			    bigalloc ? bigalloc : 1,
1024 			    amount_needed - nr_allocated,
1025 			    hint->prealloc_size))
1026 			< amount_needed);
1027     if ( !hint->formatted_node &&
1028          amount_needed + hint->prealloc_size >
1029 	 nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1030     /* Some of preallocation blocks were not allocated */
1031 #ifdef REISERQUOTA_DEBUG
1032 	reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: freeing (failed prealloc) %d blocks id=%u", amount_needed + hint->prealloc_size - nr_allocated - REISERFS_I(hint->inode)->i_prealloc_count, hint->inode->i_uid);
1033 #endif
1034 	DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed +
1035 	                         hint->prealloc_size - nr_allocated -
1036 				 REISERFS_I(hint->inode)->i_prealloc_count);
1037     }
1038 
1039     return CARRY_ON;
1040 }
1041 
1042 /* grab new blocknrs from preallocated list */
1043 /* return amount still needed after using them */
1044 static int use_preallocated_list_if_available (reiserfs_blocknr_hint_t *hint,
1045 					       b_blocknr_t *new_blocknrs, int amount_needed)
1046 {
1047     struct inode * inode = hint->inode;
1048 
1049     if (REISERFS_I(inode)->i_prealloc_count > 0) {
1050 	while (amount_needed) {
1051 
1052 	    *new_blocknrs ++ = REISERFS_I(inode)->i_prealloc_block ++;
1053 	    REISERFS_I(inode)->i_prealloc_count --;
1054 
1055 	    amount_needed --;
1056 
1057 	    if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1058 		list_del(&REISERFS_I(inode)->i_prealloc_list);
1059 		break;
1060 	    }
1061 	}
1062       }
1063     /* return amount still needed after using preallocated blocks */
1064     return amount_needed;
1065 }
1066 
1067 int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
1068 			       b_blocknr_t * new_blocknrs, int amount_needed,
1069 			       int reserved_by_us /* Amount of blocks we have
1070 						      already reserved */)
1071 {
1072     int initial_amount_needed = amount_needed;
1073     int ret;
1074     struct super_block *s = hint->th->t_super;
1075 
1076     /* Check if there is enough space, taking into account reserved space */
1077     if ( SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1078 	 amount_needed - reserved_by_us)
1079         return NO_DISK_SPACE;
1080     /* should this be if !hint->inode &&  hint->preallocate? */
1081     /* do you mean hint->formatted_node can be removed ? - Zam */
1082     /* hint->formatted_node cannot be removed because we try to access
1083        inode information here, and there is often no inode assotiated with
1084        metadata allocations - green */
1085 
1086     if (!hint->formatted_node && hint->preallocate) {
1087 	amount_needed = use_preallocated_list_if_available
1088 	    (hint, new_blocknrs, amount_needed);
1089 	if (amount_needed == 0)	/* all blocknrs we need we got from
1090                                    prealloc. list */
1091 	    return CARRY_ON;
1092 	new_blocknrs += (initial_amount_needed - amount_needed);
1093     }
1094 
1095     /* find search start and save it in hint structure */
1096     determine_search_start(hint, amount_needed);
1097     if (hint->search_start >= SB_BLOCK_COUNT(s))
1098         hint->search_start = SB_BLOCK_COUNT(s) - 1;
1099 
1100     /* allocation itself; fill new_blocknrs and preallocation arrays */
1101     ret = blocknrs_and_prealloc_arrays_from_search_start
1102 	(hint, new_blocknrs, amount_needed);
1103 
1104     /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
1105      * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
1106      * variant) */
1107 
1108     if (ret != CARRY_ON) {
1109 	while (amount_needed ++ < initial_amount_needed) {
1110 	    reiserfs_free_block(hint->th, hint->inode, *(--new_blocknrs), 1);
1111 	}
1112     }
1113     return ret;
1114 }
1115 
1116 /* These 2 functions are here to provide blocks reservation to the rest of kernel */
1117 /* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure
1118    there are actually this much blocks on the FS available */
1119 void reiserfs_claim_blocks_to_be_allocated(
1120 				      struct super_block *sb, /* super block of
1121 							        filesystem where
1122 								blocks should be
1123 								reserved */
1124 				      int blocks /* How much to reserve */
1125 					  )
1126 {
1127 
1128     /* Fast case, if reservation is zero - exit immediately. */
1129     if ( !blocks )
1130 	return;
1131 
1132     spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1133     REISERFS_SB(sb)->reserved_blocks += blocks;
1134     spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1135 }
1136 
1137 /* Unreserve @blocks amount of blocks in fs pointed by @sb */
1138 void reiserfs_release_claimed_blocks(
1139 				struct super_block *sb, /* super block of
1140 							  filesystem where
1141 							  blocks should be
1142 							  reserved */
1143 				int blocks /* How much to unreserve */
1144 					  )
1145 {
1146 
1147     /* Fast case, if unreservation is zero - exit immediately. */
1148     if ( !blocks )
1149 	return;
1150 
1151     spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1152     REISERFS_SB(sb)->reserved_blocks -= blocks;
1153     spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1154     RFALSE( REISERFS_SB(sb)->reserved_blocks < 0, "amount of blocks reserved became zero?");
1155 }
1156 
1157 /* This function estimates how much pages we will be able to write to FS
1158    used for reiserfs_file_write() purposes for now. */
1159 int reiserfs_can_fit_pages ( struct super_block *sb /* superblock of filesystem
1160 						       to estimate space */ )
1161 {
1162 	int space;
1163 
1164 	spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1165 	space = (SB_FREE_BLOCKS(sb) - REISERFS_SB(sb)->reserved_blocks) >> ( PAGE_CACHE_SHIFT - sb->s_blocksize_bits);
1166 	spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1167 
1168 	return space>0?space:0;
1169 }
1170