xref: /openbmc/linux/fs/ocfs2/localalloc.c (revision a1e58bbd)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * localalloc.c
5  *
6  * Node local data allocation
7  *
8  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public
12  * License as published by the Free Software Foundation; either
13  * version 2 of the License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public
21  * License along with this program; if not, write to the
22  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23  * Boston, MA 021110-1307, USA.
24  */
25 
26 #include <linux/fs.h>
27 #include <linux/types.h>
28 #include <linux/slab.h>
29 #include <linux/highmem.h>
30 #include <linux/bitops.h>
31 
32 #define MLOG_MASK_PREFIX ML_DISK_ALLOC
33 #include <cluster/masklog.h>
34 
35 #include "ocfs2.h"
36 
37 #include "alloc.h"
38 #include "dlmglue.h"
39 #include "inode.h"
40 #include "journal.h"
41 #include "localalloc.h"
42 #include "suballoc.h"
43 #include "super.h"
44 #include "sysfile.h"
45 
46 #include "buffer_head_io.h"
47 
48 #define OCFS2_LOCAL_ALLOC(dinode)	(&((dinode)->id2.i_lab))
49 
50 static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb);
51 
52 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
53 
54 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
55 					     struct ocfs2_dinode *alloc,
56 					     u32 numbits);
57 
58 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
59 
60 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
61 				    handle_t *handle,
62 				    struct ocfs2_dinode *alloc,
63 				    struct inode *main_bm_inode,
64 				    struct buffer_head *main_bm_bh);
65 
66 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
67 						struct ocfs2_alloc_context **ac,
68 						struct inode **bitmap_inode,
69 						struct buffer_head **bitmap_bh);
70 
71 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
72 					handle_t *handle,
73 					struct ocfs2_alloc_context *ac);
74 
75 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
76 					  struct inode *local_alloc_inode);
77 
78 static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb)
79 {
80 	BUG_ON(osb->s_clustersize_bits > 20);
81 
82 	/* Size local alloc windows by the megabyte */
83 	return osb->local_alloc_size << (20 - osb->s_clustersize_bits);
84 }
85 
86 /*
87  * Tell us whether a given allocation should use the local alloc
88  * file. Otherwise, it has to go to the main bitmap.
89  */
90 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
91 {
92 	int la_bits = ocfs2_local_alloc_window_bits(osb);
93 	int ret = 0;
94 
95 	if (osb->local_alloc_state != OCFS2_LA_ENABLED)
96 		goto bail;
97 
98 	/* la_bits should be at least twice the size (in clusters) of
99 	 * a new block group. We want to be sure block group
100 	 * allocations go through the local alloc, so allow an
101 	 * allocation to take up to half the bitmap. */
102 	if (bits > (la_bits / 2))
103 		goto bail;
104 
105 	ret = 1;
106 bail:
107 	mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
108 	     osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
109 	return ret;
110 }
111 
112 int ocfs2_load_local_alloc(struct ocfs2_super *osb)
113 {
114 	int status = 0;
115 	struct ocfs2_dinode *alloc = NULL;
116 	struct buffer_head *alloc_bh = NULL;
117 	u32 num_used;
118 	struct inode *inode = NULL;
119 	struct ocfs2_local_alloc *la;
120 
121 	mlog_entry_void();
122 
123 	if (osb->local_alloc_size == 0)
124 		goto bail;
125 
126 	if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) {
127 		mlog(ML_NOTICE, "Requested local alloc window %d is larger "
128 		     "than max possible %u. Using defaults.\n",
129 		     ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1));
130 		osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
131 	}
132 
133 	/* read the alloc off disk */
134 	inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
135 					    osb->slot_num);
136 	if (!inode) {
137 		status = -EINVAL;
138 		mlog_errno(status);
139 		goto bail;
140 	}
141 
142 	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
143 				  &alloc_bh, 0, inode);
144 	if (status < 0) {
145 		mlog_errno(status);
146 		goto bail;
147 	}
148 
149 	alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
150 	la = OCFS2_LOCAL_ALLOC(alloc);
151 
152 	if (!(le32_to_cpu(alloc->i_flags) &
153 	    (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
154 		mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
155 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
156 		status = -EINVAL;
157 		goto bail;
158 	}
159 
160 	if ((la->la_size == 0) ||
161 	    (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
162 		mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
163 		     le16_to_cpu(la->la_size));
164 		status = -EINVAL;
165 		goto bail;
166 	}
167 
168 	/* do a little verification. */
169 	num_used = ocfs2_local_alloc_count_bits(alloc);
170 
171 	/* hopefully the local alloc has always been recovered before
172 	 * we load it. */
173 	if (num_used
174 	    || alloc->id1.bitmap1.i_used
175 	    || alloc->id1.bitmap1.i_total
176 	    || la->la_bm_off)
177 		mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
178 		     "found = %u, set = %u, taken = %u, off = %u\n",
179 		     num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
180 		     le32_to_cpu(alloc->id1.bitmap1.i_total),
181 		     OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
182 
183 	osb->local_alloc_bh = alloc_bh;
184 	osb->local_alloc_state = OCFS2_LA_ENABLED;
185 
186 bail:
187 	if (status < 0)
188 		if (alloc_bh)
189 			brelse(alloc_bh);
190 	if (inode)
191 		iput(inode);
192 
193 	mlog(0, "Local alloc window bits = %d\n",
194 	     ocfs2_local_alloc_window_bits(osb));
195 
196 	mlog_exit(status);
197 	return status;
198 }
199 
200 /*
201  * return any unused bits to the bitmap and write out a clean
202  * local_alloc.
203  *
204  * local_alloc_bh is optional. If not passed, we will simply use the
205  * one off osb. If you do pass it however, be warned that it *will* be
206  * returned brelse'd and NULL'd out.*/
207 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
208 {
209 	int status;
210 	handle_t *handle;
211 	struct inode *local_alloc_inode = NULL;
212 	struct buffer_head *bh = NULL;
213 	struct buffer_head *main_bm_bh = NULL;
214 	struct inode *main_bm_inode = NULL;
215 	struct ocfs2_dinode *alloc_copy = NULL;
216 	struct ocfs2_dinode *alloc = NULL;
217 
218 	mlog_entry_void();
219 
220 	if (osb->local_alloc_state == OCFS2_LA_UNUSED)
221 		goto out;
222 
223 	local_alloc_inode =
224 		ocfs2_get_system_file_inode(osb,
225 					    LOCAL_ALLOC_SYSTEM_INODE,
226 					    osb->slot_num);
227 	if (!local_alloc_inode) {
228 		status = -ENOENT;
229 		mlog_errno(status);
230 		goto out;
231 	}
232 
233 	osb->local_alloc_state = OCFS2_LA_DISABLED;
234 
235 	main_bm_inode = ocfs2_get_system_file_inode(osb,
236 						    GLOBAL_BITMAP_SYSTEM_INODE,
237 						    OCFS2_INVALID_SLOT);
238 	if (!main_bm_inode) {
239 		status = -EINVAL;
240 		mlog_errno(status);
241 		goto out;
242 	}
243 
244 	mutex_lock(&main_bm_inode->i_mutex);
245 
246 	status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
247 	if (status < 0) {
248 		mlog_errno(status);
249 		goto out_mutex;
250 	}
251 
252 	/* WINDOW_MOVE_CREDITS is a bit heavy... */
253 	handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
254 	if (IS_ERR(handle)) {
255 		mlog_errno(PTR_ERR(handle));
256 		handle = NULL;
257 		goto out_unlock;
258 	}
259 
260 	bh = osb->local_alloc_bh;
261 	alloc = (struct ocfs2_dinode *) bh->b_data;
262 
263 	alloc_copy = kmalloc(bh->b_size, GFP_KERNEL);
264 	if (!alloc_copy) {
265 		status = -ENOMEM;
266 		goto out_commit;
267 	}
268 	memcpy(alloc_copy, alloc, bh->b_size);
269 
270 	status = ocfs2_journal_access(handle, local_alloc_inode, bh,
271 				      OCFS2_JOURNAL_ACCESS_WRITE);
272 	if (status < 0) {
273 		mlog_errno(status);
274 		goto out_commit;
275 	}
276 
277 	ocfs2_clear_local_alloc(alloc);
278 
279 	status = ocfs2_journal_dirty(handle, bh);
280 	if (status < 0) {
281 		mlog_errno(status);
282 		goto out_commit;
283 	}
284 
285 	brelse(bh);
286 	osb->local_alloc_bh = NULL;
287 	osb->local_alloc_state = OCFS2_LA_UNUSED;
288 
289 	status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
290 					  main_bm_inode, main_bm_bh);
291 	if (status < 0)
292 		mlog_errno(status);
293 
294 out_commit:
295 	ocfs2_commit_trans(osb, handle);
296 
297 out_unlock:
298 	if (main_bm_bh)
299 		brelse(main_bm_bh);
300 
301 	ocfs2_inode_unlock(main_bm_inode, 1);
302 
303 out_mutex:
304 	mutex_unlock(&main_bm_inode->i_mutex);
305 	iput(main_bm_inode);
306 
307 out:
308 	if (local_alloc_inode)
309 		iput(local_alloc_inode);
310 
311 	if (alloc_copy)
312 		kfree(alloc_copy);
313 
314 	mlog_exit_void();
315 }
316 
317 /*
318  * We want to free the bitmap bits outside of any recovery context as
319  * we'll need a cluster lock to do so, but we must clear the local
320  * alloc before giving up the recovered nodes journal. To solve this,
321  * we kmalloc a copy of the local alloc before it's change for the
322  * caller to process with ocfs2_complete_local_alloc_recovery
323  */
324 int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
325 				     int slot_num,
326 				     struct ocfs2_dinode **alloc_copy)
327 {
328 	int status = 0;
329 	struct buffer_head *alloc_bh = NULL;
330 	struct inode *inode = NULL;
331 	struct ocfs2_dinode *alloc;
332 
333 	mlog_entry("(slot_num = %d)\n", slot_num);
334 
335 	*alloc_copy = NULL;
336 
337 	inode = ocfs2_get_system_file_inode(osb,
338 					    LOCAL_ALLOC_SYSTEM_INODE,
339 					    slot_num);
340 	if (!inode) {
341 		status = -EINVAL;
342 		mlog_errno(status);
343 		goto bail;
344 	}
345 
346 	mutex_lock(&inode->i_mutex);
347 
348 	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
349 				  &alloc_bh, 0, inode);
350 	if (status < 0) {
351 		mlog_errno(status);
352 		goto bail;
353 	}
354 
355 	*alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
356 	if (!(*alloc_copy)) {
357 		status = -ENOMEM;
358 		goto bail;
359 	}
360 	memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);
361 
362 	alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
363 	ocfs2_clear_local_alloc(alloc);
364 
365 	status = ocfs2_write_block(osb, alloc_bh, inode);
366 	if (status < 0)
367 		mlog_errno(status);
368 
369 bail:
370 	if ((status < 0) && (*alloc_copy)) {
371 		kfree(*alloc_copy);
372 		*alloc_copy = NULL;
373 	}
374 
375 	if (alloc_bh)
376 		brelse(alloc_bh);
377 
378 	if (inode) {
379 		mutex_unlock(&inode->i_mutex);
380 		iput(inode);
381 	}
382 
383 	mlog_exit(status);
384 	return status;
385 }
386 
387 /*
388  * Step 2: By now, we've completed the journal recovery, we've stamped
389  * a clean local alloc on disk and dropped the node out of the
390  * recovery map. Dlm locks will no longer stall, so lets clear out the
391  * main bitmap.
392  */
393 int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
394 					struct ocfs2_dinode *alloc)
395 {
396 	int status;
397 	handle_t *handle;
398 	struct buffer_head *main_bm_bh = NULL;
399 	struct inode *main_bm_inode;
400 
401 	mlog_entry_void();
402 
403 	main_bm_inode = ocfs2_get_system_file_inode(osb,
404 						    GLOBAL_BITMAP_SYSTEM_INODE,
405 						    OCFS2_INVALID_SLOT);
406 	if (!main_bm_inode) {
407 		status = -EINVAL;
408 		mlog_errno(status);
409 		goto out;
410 	}
411 
412 	mutex_lock(&main_bm_inode->i_mutex);
413 
414 	status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
415 	if (status < 0) {
416 		mlog_errno(status);
417 		goto out_mutex;
418 	}
419 
420 	handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
421 	if (IS_ERR(handle)) {
422 		status = PTR_ERR(handle);
423 		handle = NULL;
424 		mlog_errno(status);
425 		goto out_unlock;
426 	}
427 
428 	/* we want the bitmap change to be recorded on disk asap */
429 	handle->h_sync = 1;
430 
431 	status = ocfs2_sync_local_to_main(osb, handle, alloc,
432 					  main_bm_inode, main_bm_bh);
433 	if (status < 0)
434 		mlog_errno(status);
435 
436 	ocfs2_commit_trans(osb, handle);
437 
438 out_unlock:
439 	ocfs2_inode_unlock(main_bm_inode, 1);
440 
441 out_mutex:
442 	mutex_unlock(&main_bm_inode->i_mutex);
443 
444 	if (main_bm_bh)
445 		brelse(main_bm_bh);
446 
447 	iput(main_bm_inode);
448 
449 out:
450 	mlog_exit(status);
451 	return status;
452 }
453 
454 /*
455  * make sure we've got at least bitswanted contiguous bits in the
456  * local alloc. You lose them when you drop i_mutex.
457  *
458  * We will add ourselves to the transaction passed in, but may start
459  * our own in order to shift windows.
460  */
461 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
462 				   u32 bits_wanted,
463 				   struct ocfs2_alloc_context *ac)
464 {
465 	int status;
466 	struct ocfs2_dinode *alloc;
467 	struct inode *local_alloc_inode;
468 	unsigned int free_bits;
469 
470 	mlog_entry_void();
471 
472 	BUG_ON(!ac);
473 
474 	local_alloc_inode =
475 		ocfs2_get_system_file_inode(osb,
476 					    LOCAL_ALLOC_SYSTEM_INODE,
477 					    osb->slot_num);
478 	if (!local_alloc_inode) {
479 		status = -ENOENT;
480 		mlog_errno(status);
481 		goto bail;
482 	}
483 
484 	mutex_lock(&local_alloc_inode->i_mutex);
485 
486 	if (osb->local_alloc_state != OCFS2_LA_ENABLED) {
487 		status = -ENOSPC;
488 		goto bail;
489 	}
490 
491 	if (bits_wanted > ocfs2_local_alloc_window_bits(osb)) {
492 		mlog(0, "Asking for more than my max window size!\n");
493 		status = -ENOSPC;
494 		goto bail;
495 	}
496 
497 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
498 
499 #ifdef OCFS2_DEBUG_FS
500 	if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
501 	    ocfs2_local_alloc_count_bits(alloc)) {
502 		ocfs2_error(osb->sb, "local alloc inode %llu says it has "
503 			    "%u free bits, but a count shows %u",
504 			    (unsigned long long)le64_to_cpu(alloc->i_blkno),
505 			    le32_to_cpu(alloc->id1.bitmap1.i_used),
506 			    ocfs2_local_alloc_count_bits(alloc));
507 		status = -EIO;
508 		goto bail;
509 	}
510 #endif
511 
512 	free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
513 		le32_to_cpu(alloc->id1.bitmap1.i_used);
514 	if (bits_wanted > free_bits) {
515 		/* uhoh, window change time. */
516 		status =
517 			ocfs2_local_alloc_slide_window(osb, local_alloc_inode);
518 		if (status < 0) {
519 			if (status != -ENOSPC)
520 				mlog_errno(status);
521 			goto bail;
522 		}
523 	}
524 
525 	ac->ac_inode = local_alloc_inode;
526 	ac->ac_which = OCFS2_AC_USE_LOCAL;
527 	get_bh(osb->local_alloc_bh);
528 	ac->ac_bh = osb->local_alloc_bh;
529 	status = 0;
530 bail:
531 	if (status < 0 && local_alloc_inode) {
532 		mutex_unlock(&local_alloc_inode->i_mutex);
533 		iput(local_alloc_inode);
534 	}
535 
536 	mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num,
537 	     status);
538 
539 	mlog_exit(status);
540 	return status;
541 }
542 
543 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
544 				 handle_t *handle,
545 				 struct ocfs2_alloc_context *ac,
546 				 u32 bits_wanted,
547 				 u32 *bit_off,
548 				 u32 *num_bits)
549 {
550 	int status, start;
551 	struct inode *local_alloc_inode;
552 	void *bitmap;
553 	struct ocfs2_dinode *alloc;
554 	struct ocfs2_local_alloc *la;
555 
556 	mlog_entry_void();
557 	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
558 
559 	local_alloc_inode = ac->ac_inode;
560 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
561 	la = OCFS2_LOCAL_ALLOC(alloc);
562 
563 	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
564 	if (start == -1) {
565 		/* TODO: Shouldn't we just BUG here? */
566 		status = -ENOSPC;
567 		mlog_errno(status);
568 		goto bail;
569 	}
570 
571 	bitmap = la->la_bitmap;
572 	*bit_off = le32_to_cpu(la->la_bm_off) + start;
573 	/* local alloc is always contiguous by nature -- we never
574 	 * delete bits from it! */
575 	*num_bits = bits_wanted;
576 
577 	status = ocfs2_journal_access(handle, local_alloc_inode,
578 				      osb->local_alloc_bh,
579 				      OCFS2_JOURNAL_ACCESS_WRITE);
580 	if (status < 0) {
581 		mlog_errno(status);
582 		goto bail;
583 	}
584 
585 	while(bits_wanted--)
586 		ocfs2_set_bit(start++, bitmap);
587 
588 	le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
589 
590 	status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
591 	if (status < 0) {
592 		mlog_errno(status);
593 		goto bail;
594 	}
595 
596 	status = 0;
597 bail:
598 	mlog_exit(status);
599 	return status;
600 }
601 
602 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
603 {
604 	int i;
605 	u8 *buffer;
606 	u32 count = 0;
607 	struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
608 
609 	mlog_entry_void();
610 
611 	buffer = la->la_bitmap;
612 	for (i = 0; i < le16_to_cpu(la->la_size); i++)
613 		count += hweight8(buffer[i]);
614 
615 	mlog_exit(count);
616 	return count;
617 }
618 
619 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
620 					     struct ocfs2_dinode *alloc,
621 					     u32 numbits)
622 {
623 	int numfound, bitoff, left, startoff, lastzero;
624 	void *bitmap = NULL;
625 
626 	mlog_entry("(numbits wanted = %u)\n", numbits);
627 
628 	if (!alloc->id1.bitmap1.i_total) {
629 		mlog(0, "No bits in my window!\n");
630 		bitoff = -1;
631 		goto bail;
632 	}
633 
634 	bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
635 
636 	numfound = bitoff = startoff = 0;
637 	lastzero = -1;
638 	left = le32_to_cpu(alloc->id1.bitmap1.i_total);
639 	while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
640 		if (bitoff == left) {
641 			/* mlog(0, "bitoff (%d) == left", bitoff); */
642 			break;
643 		}
644 		/* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
645 		   "numfound = %d\n", bitoff, startoff, numfound);*/
646 
647 		/* Ok, we found a zero bit... is it contig. or do we
648 		 * start over?*/
649 		if (bitoff == startoff) {
650 			/* we found a zero */
651 			numfound++;
652 			startoff++;
653 		} else {
654 			/* got a zero after some ones */
655 			numfound = 1;
656 			startoff = bitoff+1;
657 		}
658 		/* we got everything we needed */
659 		if (numfound == numbits) {
660 			/* mlog(0, "Found it all!\n"); */
661 			break;
662 		}
663 	}
664 
665 	mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
666 	     numfound);
667 
668 	if (numfound == numbits)
669 		bitoff = startoff - numfound;
670 	else
671 		bitoff = -1;
672 
673 bail:
674 	mlog_exit(bitoff);
675 	return bitoff;
676 }
677 
678 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
679 {
680 	struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
681 	int i;
682 	mlog_entry_void();
683 
684 	alloc->id1.bitmap1.i_total = 0;
685 	alloc->id1.bitmap1.i_used = 0;
686 	la->la_bm_off = 0;
687 	for(i = 0; i < le16_to_cpu(la->la_size); i++)
688 		la->la_bitmap[i] = 0;
689 
690 	mlog_exit_void();
691 }
692 
693 #if 0
694 /* turn this on and uncomment below to aid debugging window shifts. */
695 static void ocfs2_verify_zero_bits(unsigned long *bitmap,
696 				   unsigned int start,
697 				   unsigned int count)
698 {
699 	unsigned int tmp = count;
700 	while(tmp--) {
701 		if (ocfs2_test_bit(start + tmp, bitmap)) {
702 			printk("ocfs2_verify_zero_bits: start = %u, count = "
703 			       "%u\n", start, count);
704 			printk("ocfs2_verify_zero_bits: bit %u is set!",
705 			       start + tmp);
706 			BUG();
707 		}
708 	}
709 }
710 #endif
711 
712 /*
713  * sync the local alloc to main bitmap.
714  *
715  * assumes you've already locked the main bitmap -- the bitmap inode
716  * passed is used for caching.
717  */
718 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
719 				    handle_t *handle,
720 				    struct ocfs2_dinode *alloc,
721 				    struct inode *main_bm_inode,
722 				    struct buffer_head *main_bm_bh)
723 {
724 	int status = 0;
725 	int bit_off, left, count, start;
726 	u64 la_start_blk;
727 	u64 blkno;
728 	void *bitmap;
729 	struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
730 
731 	mlog_entry("total = %u, used = %u\n",
732 		   le32_to_cpu(alloc->id1.bitmap1.i_total),
733 		   le32_to_cpu(alloc->id1.bitmap1.i_used));
734 
735 	if (!alloc->id1.bitmap1.i_total) {
736 		mlog(0, "nothing to sync!\n");
737 		goto bail;
738 	}
739 
740 	if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
741 	    le32_to_cpu(alloc->id1.bitmap1.i_total)) {
742 		mlog(0, "all bits were taken!\n");
743 		goto bail;
744 	}
745 
746 	la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
747 						le32_to_cpu(la->la_bm_off));
748 	bitmap = la->la_bitmap;
749 	start = count = bit_off = 0;
750 	left = le32_to_cpu(alloc->id1.bitmap1.i_total);
751 
752 	while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
753 	       != -1) {
754 		if ((bit_off < left) && (bit_off == start)) {
755 			count++;
756 			start++;
757 			continue;
758 		}
759 		if (count) {
760 			blkno = la_start_blk +
761 				ocfs2_clusters_to_blocks(osb->sb,
762 							 start - count);
763 
764 			mlog(0, "freeing %u bits starting at local alloc bit "
765 			     "%u (la_start_blk = %llu, blkno = %llu)\n",
766 			     count, start - count,
767 			     (unsigned long long)la_start_blk,
768 			     (unsigned long long)blkno);
769 
770 			status = ocfs2_free_clusters(handle, main_bm_inode,
771 						     main_bm_bh, blkno, count);
772 			if (status < 0) {
773 				mlog_errno(status);
774 				goto bail;
775 			}
776 		}
777 		if (bit_off >= left)
778 			break;
779 		count = 1;
780 		start = bit_off + 1;
781 	}
782 
783 bail:
784 	mlog_exit(status);
785 	return status;
786 }
787 
788 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
789 						struct ocfs2_alloc_context **ac,
790 						struct inode **bitmap_inode,
791 						struct buffer_head **bitmap_bh)
792 {
793 	int status;
794 
795 	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
796 	if (!(*ac)) {
797 		status = -ENOMEM;
798 		mlog_errno(status);
799 		goto bail;
800 	}
801 
802 	(*ac)->ac_bits_wanted = ocfs2_local_alloc_window_bits(osb);
803 
804 	status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
805 	if (status < 0) {
806 		if (status != -ENOSPC)
807 			mlog_errno(status);
808 		goto bail;
809 	}
810 
811 	*bitmap_inode = (*ac)->ac_inode;
812 	igrab(*bitmap_inode);
813 	*bitmap_bh = (*ac)->ac_bh;
814 	get_bh(*bitmap_bh);
815 	status = 0;
816 bail:
817 	if ((status < 0) && *ac) {
818 		ocfs2_free_alloc_context(*ac);
819 		*ac = NULL;
820 	}
821 
822 	mlog_exit(status);
823 	return status;
824 }
825 
826 /*
827  * pass it the bitmap lock in lock_bh if you have it.
828  */
829 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
830 					handle_t *handle,
831 					struct ocfs2_alloc_context *ac)
832 {
833 	int status = 0;
834 	u32 cluster_off, cluster_count;
835 	struct ocfs2_dinode *alloc = NULL;
836 	struct ocfs2_local_alloc *la;
837 
838 	mlog_entry_void();
839 
840 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
841 	la = OCFS2_LOCAL_ALLOC(alloc);
842 
843 	if (alloc->id1.bitmap1.i_total)
844 		mlog(0, "asking me to alloc a new window over a non-empty "
845 		     "one\n");
846 
847 	mlog(0, "Allocating %u clusters for a new window.\n",
848 	     ocfs2_local_alloc_window_bits(osb));
849 
850 	/* Instruct the allocation code to try the most recently used
851 	 * cluster group. We'll re-record the group used this pass
852 	 * below. */
853 	ac->ac_last_group = osb->la_last_gd;
854 
855 	/* we used the generic suballoc reserve function, but we set
856 	 * everything up nicely, so there's no reason why we can't use
857 	 * the more specific cluster api to claim bits. */
858 	status = ocfs2_claim_clusters(osb, handle, ac,
859 				      ocfs2_local_alloc_window_bits(osb),
860 				      &cluster_off, &cluster_count);
861 	if (status < 0) {
862 		if (status != -ENOSPC)
863 			mlog_errno(status);
864 		goto bail;
865 	}
866 
867 	osb->la_last_gd = ac->ac_last_group;
868 
869 	la->la_bm_off = cpu_to_le32(cluster_off);
870 	alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
871 	/* just in case... In the future when we find space ourselves,
872 	 * we don't have to get all contiguous -- but we'll have to
873 	 * set all previously used bits in bitmap and update
874 	 * la_bits_set before setting the bits in the main bitmap. */
875 	alloc->id1.bitmap1.i_used = 0;
876 	memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
877 	       le16_to_cpu(la->la_size));
878 
879 	mlog(0, "New window allocated:\n");
880 	mlog(0, "window la_bm_off = %u\n",
881 	     OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
882 	mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
883 
884 bail:
885 	mlog_exit(status);
886 	return status;
887 }
888 
889 /* Note that we do *NOT* lock the local alloc inode here as
890  * it's been locked already for us. */
891 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
892 					  struct inode *local_alloc_inode)
893 {
894 	int status = 0;
895 	struct buffer_head *main_bm_bh = NULL;
896 	struct inode *main_bm_inode = NULL;
897 	handle_t *handle = NULL;
898 	struct ocfs2_dinode *alloc;
899 	struct ocfs2_dinode *alloc_copy = NULL;
900 	struct ocfs2_alloc_context *ac = NULL;
901 
902 	mlog_entry_void();
903 
904 	/* This will lock the main bitmap for us. */
905 	status = ocfs2_local_alloc_reserve_for_window(osb,
906 						      &ac,
907 						      &main_bm_inode,
908 						      &main_bm_bh);
909 	if (status < 0) {
910 		if (status != -ENOSPC)
911 			mlog_errno(status);
912 		goto bail;
913 	}
914 
915 	handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
916 	if (IS_ERR(handle)) {
917 		status = PTR_ERR(handle);
918 		handle = NULL;
919 		mlog_errno(status);
920 		goto bail;
921 	}
922 
923 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
924 
925 	/* We want to clear the local alloc before doing anything
926 	 * else, so that if we error later during this operation,
927 	 * local alloc shutdown won't try to double free main bitmap
928 	 * bits. Make a copy so the sync function knows which bits to
929 	 * free. */
930 	alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_KERNEL);
931 	if (!alloc_copy) {
932 		status = -ENOMEM;
933 		mlog_errno(status);
934 		goto bail;
935 	}
936 	memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
937 
938 	status = ocfs2_journal_access(handle, local_alloc_inode,
939 				      osb->local_alloc_bh,
940 				      OCFS2_JOURNAL_ACCESS_WRITE);
941 	if (status < 0) {
942 		mlog_errno(status);
943 		goto bail;
944 	}
945 
946 	ocfs2_clear_local_alloc(alloc);
947 
948 	status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
949 	if (status < 0) {
950 		mlog_errno(status);
951 		goto bail;
952 	}
953 
954 	status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
955 					  main_bm_inode, main_bm_bh);
956 	if (status < 0) {
957 		mlog_errno(status);
958 		goto bail;
959 	}
960 
961 	status = ocfs2_local_alloc_new_window(osb, handle, ac);
962 	if (status < 0) {
963 		if (status != -ENOSPC)
964 			mlog_errno(status);
965 		goto bail;
966 	}
967 
968 	atomic_inc(&osb->alloc_stats.moves);
969 
970 	status = 0;
971 bail:
972 	if (handle)
973 		ocfs2_commit_trans(osb, handle);
974 
975 	if (main_bm_bh)
976 		brelse(main_bm_bh);
977 
978 	if (main_bm_inode)
979 		iput(main_bm_inode);
980 
981 	if (alloc_copy)
982 		kfree(alloc_copy);
983 
984 	if (ac)
985 		ocfs2_free_alloc_context(ac);
986 
987 	mlog_exit(status);
988 	return status;
989 }
990 
991