xref: /openbmc/linux/fs/ocfs2/localalloc.c (revision b627b4ed)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * localalloc.c
5  *
6  * Node local data allocation
7  *
8  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public
12  * License as published by the Free Software Foundation; either
13  * version 2 of the License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public
21  * License along with this program; if not, write to the
22  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23  * Boston, MA 021110-1307, USA.
24  */
25 
26 #include <linux/fs.h>
27 #include <linux/types.h>
28 #include <linux/slab.h>
29 #include <linux/highmem.h>
30 #include <linux/bitops.h>
31 
32 #define MLOG_MASK_PREFIX ML_DISK_ALLOC
33 #include <cluster/masklog.h>
34 
35 #include "ocfs2.h"
36 
37 #include "alloc.h"
38 #include "blockcheck.h"
39 #include "dlmglue.h"
40 #include "inode.h"
41 #include "journal.h"
42 #include "localalloc.h"
43 #include "suballoc.h"
44 #include "super.h"
45 #include "sysfile.h"
46 
47 #include "buffer_head_io.h"
48 
49 #define OCFS2_LOCAL_ALLOC(dinode)	(&((dinode)->id2.i_lab))
50 
51 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
52 
53 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
54 					     struct ocfs2_dinode *alloc,
55 					     u32 numbits);
56 
57 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
58 
59 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
60 				    handle_t *handle,
61 				    struct ocfs2_dinode *alloc,
62 				    struct inode *main_bm_inode,
63 				    struct buffer_head *main_bm_bh);
64 
65 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
66 						struct ocfs2_alloc_context **ac,
67 						struct inode **bitmap_inode,
68 						struct buffer_head **bitmap_bh);
69 
70 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
71 					handle_t *handle,
72 					struct ocfs2_alloc_context *ac);
73 
74 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
75 					  struct inode *local_alloc_inode);
76 
77 static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
78 {
79 	return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
80 		osb->local_alloc_state == OCFS2_LA_ENABLED);
81 }
82 
83 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
84 				      unsigned int num_clusters)
85 {
86 	spin_lock(&osb->osb_lock);
87 	if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
88 	    osb->local_alloc_state == OCFS2_LA_THROTTLED)
89 		if (num_clusters >= osb->local_alloc_default_bits) {
90 			cancel_delayed_work(&osb->la_enable_wq);
91 			osb->local_alloc_state = OCFS2_LA_ENABLED;
92 		}
93 	spin_unlock(&osb->osb_lock);
94 }
95 
96 void ocfs2_la_enable_worker(struct work_struct *work)
97 {
98 	struct ocfs2_super *osb =
99 		container_of(work, struct ocfs2_super,
100 			     la_enable_wq.work);
101 	spin_lock(&osb->osb_lock);
102 	osb->local_alloc_state = OCFS2_LA_ENABLED;
103 	spin_unlock(&osb->osb_lock);
104 }
105 
106 /*
107  * Tell us whether a given allocation should use the local alloc
108  * file. Otherwise, it has to go to the main bitmap.
109  *
110  * This function does semi-dirty reads of local alloc size and state!
111  * This is ok however, as the values are re-checked once under mutex.
112  */
113 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
114 {
115 	int ret = 0;
116 	int la_bits;
117 
118 	spin_lock(&osb->osb_lock);
119 	la_bits = osb->local_alloc_bits;
120 
121 	if (!ocfs2_la_state_enabled(osb))
122 		goto bail;
123 
124 	/* la_bits should be at least twice the size (in clusters) of
125 	 * a new block group. We want to be sure block group
126 	 * allocations go through the local alloc, so allow an
127 	 * allocation to take up to half the bitmap. */
128 	if (bits > (la_bits / 2))
129 		goto bail;
130 
131 	ret = 1;
132 bail:
133 	mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
134 	     osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
135 	spin_unlock(&osb->osb_lock);
136 	return ret;
137 }
138 
139 int ocfs2_load_local_alloc(struct ocfs2_super *osb)
140 {
141 	int status = 0;
142 	struct ocfs2_dinode *alloc = NULL;
143 	struct buffer_head *alloc_bh = NULL;
144 	u32 num_used;
145 	struct inode *inode = NULL;
146 	struct ocfs2_local_alloc *la;
147 
148 	mlog_entry_void();
149 
150 	if (osb->local_alloc_bits == 0)
151 		goto bail;
152 
153 	if (osb->local_alloc_bits >= osb->bitmap_cpg) {
154 		mlog(ML_NOTICE, "Requested local alloc window %d is larger "
155 		     "than max possible %u. Using defaults.\n",
156 		     osb->local_alloc_bits, (osb->bitmap_cpg - 1));
157 		osb->local_alloc_bits =
158 			ocfs2_megabytes_to_clusters(osb->sb,
159 						    OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
160 	}
161 
162 	/* read the alloc off disk */
163 	inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
164 					    osb->slot_num);
165 	if (!inode) {
166 		status = -EINVAL;
167 		mlog_errno(status);
168 		goto bail;
169 	}
170 
171 	status = ocfs2_read_inode_block_full(inode, &alloc_bh,
172 					     OCFS2_BH_IGNORE_CACHE);
173 	if (status < 0) {
174 		mlog_errno(status);
175 		goto bail;
176 	}
177 
178 	alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
179 	la = OCFS2_LOCAL_ALLOC(alloc);
180 
181 	if (!(le32_to_cpu(alloc->i_flags) &
182 	    (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
183 		mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
184 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
185 		status = -EINVAL;
186 		goto bail;
187 	}
188 
189 	if ((la->la_size == 0) ||
190 	    (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
191 		mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
192 		     le16_to_cpu(la->la_size));
193 		status = -EINVAL;
194 		goto bail;
195 	}
196 
197 	/* do a little verification. */
198 	num_used = ocfs2_local_alloc_count_bits(alloc);
199 
200 	/* hopefully the local alloc has always been recovered before
201 	 * we load it. */
202 	if (num_used
203 	    || alloc->id1.bitmap1.i_used
204 	    || alloc->id1.bitmap1.i_total
205 	    || la->la_bm_off)
206 		mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
207 		     "found = %u, set = %u, taken = %u, off = %u\n",
208 		     num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
209 		     le32_to_cpu(alloc->id1.bitmap1.i_total),
210 		     OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
211 
212 	osb->local_alloc_bh = alloc_bh;
213 	osb->local_alloc_state = OCFS2_LA_ENABLED;
214 
215 bail:
216 	if (status < 0)
217 		brelse(alloc_bh);
218 	if (inode)
219 		iput(inode);
220 
221 	mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
222 
223 	mlog_exit(status);
224 	return status;
225 }
226 
227 /*
228  * return any unused bits to the bitmap and write out a clean
229  * local_alloc.
230  *
231  * local_alloc_bh is optional. If not passed, we will simply use the
232  * one off osb. If you do pass it however, be warned that it *will* be
233  * returned brelse'd and NULL'd out.*/
234 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
235 {
236 	int status;
237 	handle_t *handle;
238 	struct inode *local_alloc_inode = NULL;
239 	struct buffer_head *bh = NULL;
240 	struct buffer_head *main_bm_bh = NULL;
241 	struct inode *main_bm_inode = NULL;
242 	struct ocfs2_dinode *alloc_copy = NULL;
243 	struct ocfs2_dinode *alloc = NULL;
244 
245 	mlog_entry_void();
246 
247 	cancel_delayed_work(&osb->la_enable_wq);
248 	flush_workqueue(ocfs2_wq);
249 
250 	if (osb->local_alloc_state == OCFS2_LA_UNUSED)
251 		goto out;
252 
253 	local_alloc_inode =
254 		ocfs2_get_system_file_inode(osb,
255 					    LOCAL_ALLOC_SYSTEM_INODE,
256 					    osb->slot_num);
257 	if (!local_alloc_inode) {
258 		status = -ENOENT;
259 		mlog_errno(status);
260 		goto out;
261 	}
262 
263 	osb->local_alloc_state = OCFS2_LA_DISABLED;
264 
265 	main_bm_inode = ocfs2_get_system_file_inode(osb,
266 						    GLOBAL_BITMAP_SYSTEM_INODE,
267 						    OCFS2_INVALID_SLOT);
268 	if (!main_bm_inode) {
269 		status = -EINVAL;
270 		mlog_errno(status);
271 		goto out;
272 	}
273 
274 	mutex_lock(&main_bm_inode->i_mutex);
275 
276 	status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
277 	if (status < 0) {
278 		mlog_errno(status);
279 		goto out_mutex;
280 	}
281 
282 	/* WINDOW_MOVE_CREDITS is a bit heavy... */
283 	handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
284 	if (IS_ERR(handle)) {
285 		mlog_errno(PTR_ERR(handle));
286 		handle = NULL;
287 		goto out_unlock;
288 	}
289 
290 	bh = osb->local_alloc_bh;
291 	alloc = (struct ocfs2_dinode *) bh->b_data;
292 
293 	alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
294 	if (!alloc_copy) {
295 		status = -ENOMEM;
296 		goto out_commit;
297 	}
298 	memcpy(alloc_copy, alloc, bh->b_size);
299 
300 	status = ocfs2_journal_access_di(handle, local_alloc_inode, bh,
301 					 OCFS2_JOURNAL_ACCESS_WRITE);
302 	if (status < 0) {
303 		mlog_errno(status);
304 		goto out_commit;
305 	}
306 
307 	ocfs2_clear_local_alloc(alloc);
308 
309 	status = ocfs2_journal_dirty(handle, bh);
310 	if (status < 0) {
311 		mlog_errno(status);
312 		goto out_commit;
313 	}
314 
315 	brelse(bh);
316 	osb->local_alloc_bh = NULL;
317 	osb->local_alloc_state = OCFS2_LA_UNUSED;
318 
319 	status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
320 					  main_bm_inode, main_bm_bh);
321 	if (status < 0)
322 		mlog_errno(status);
323 
324 out_commit:
325 	ocfs2_commit_trans(osb, handle);
326 
327 out_unlock:
328 	brelse(main_bm_bh);
329 
330 	ocfs2_inode_unlock(main_bm_inode, 1);
331 
332 out_mutex:
333 	mutex_unlock(&main_bm_inode->i_mutex);
334 	iput(main_bm_inode);
335 
336 out:
337 	if (local_alloc_inode)
338 		iput(local_alloc_inode);
339 
340 	if (alloc_copy)
341 		kfree(alloc_copy);
342 
343 	mlog_exit_void();
344 }
345 
346 /*
347  * We want to free the bitmap bits outside of any recovery context as
348  * we'll need a cluster lock to do so, but we must clear the local
349  * alloc before giving up the recovered nodes journal. To solve this,
350  * we kmalloc a copy of the local alloc before it's change for the
351  * caller to process with ocfs2_complete_local_alloc_recovery
352  */
353 int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
354 				     int slot_num,
355 				     struct ocfs2_dinode **alloc_copy)
356 {
357 	int status = 0;
358 	struct buffer_head *alloc_bh = NULL;
359 	struct inode *inode = NULL;
360 	struct ocfs2_dinode *alloc;
361 
362 	mlog_entry("(slot_num = %d)\n", slot_num);
363 
364 	*alloc_copy = NULL;
365 
366 	inode = ocfs2_get_system_file_inode(osb,
367 					    LOCAL_ALLOC_SYSTEM_INODE,
368 					    slot_num);
369 	if (!inode) {
370 		status = -EINVAL;
371 		mlog_errno(status);
372 		goto bail;
373 	}
374 
375 	mutex_lock(&inode->i_mutex);
376 
377 	status = ocfs2_read_inode_block_full(inode, &alloc_bh,
378 					     OCFS2_BH_IGNORE_CACHE);
379 	if (status < 0) {
380 		mlog_errno(status);
381 		goto bail;
382 	}
383 
384 	*alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
385 	if (!(*alloc_copy)) {
386 		status = -ENOMEM;
387 		goto bail;
388 	}
389 	memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);
390 
391 	alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
392 	ocfs2_clear_local_alloc(alloc);
393 
394 	ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
395 	status = ocfs2_write_block(osb, alloc_bh, inode);
396 	if (status < 0)
397 		mlog_errno(status);
398 
399 bail:
400 	if ((status < 0) && (*alloc_copy)) {
401 		kfree(*alloc_copy);
402 		*alloc_copy = NULL;
403 	}
404 
405 	brelse(alloc_bh);
406 
407 	if (inode) {
408 		mutex_unlock(&inode->i_mutex);
409 		iput(inode);
410 	}
411 
412 	mlog_exit(status);
413 	return status;
414 }
415 
416 /*
417  * Step 2: By now, we've completed the journal recovery, we've stamped
418  * a clean local alloc on disk and dropped the node out of the
419  * recovery map. Dlm locks will no longer stall, so lets clear out the
420  * main bitmap.
421  */
422 int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
423 					struct ocfs2_dinode *alloc)
424 {
425 	int status;
426 	handle_t *handle;
427 	struct buffer_head *main_bm_bh = NULL;
428 	struct inode *main_bm_inode;
429 
430 	mlog_entry_void();
431 
432 	main_bm_inode = ocfs2_get_system_file_inode(osb,
433 						    GLOBAL_BITMAP_SYSTEM_INODE,
434 						    OCFS2_INVALID_SLOT);
435 	if (!main_bm_inode) {
436 		status = -EINVAL;
437 		mlog_errno(status);
438 		goto out;
439 	}
440 
441 	mutex_lock(&main_bm_inode->i_mutex);
442 
443 	status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
444 	if (status < 0) {
445 		mlog_errno(status);
446 		goto out_mutex;
447 	}
448 
449 	handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
450 	if (IS_ERR(handle)) {
451 		status = PTR_ERR(handle);
452 		handle = NULL;
453 		mlog_errno(status);
454 		goto out_unlock;
455 	}
456 
457 	/* we want the bitmap change to be recorded on disk asap */
458 	handle->h_sync = 1;
459 
460 	status = ocfs2_sync_local_to_main(osb, handle, alloc,
461 					  main_bm_inode, main_bm_bh);
462 	if (status < 0)
463 		mlog_errno(status);
464 
465 	ocfs2_commit_trans(osb, handle);
466 
467 out_unlock:
468 	ocfs2_inode_unlock(main_bm_inode, 1);
469 
470 out_mutex:
471 	mutex_unlock(&main_bm_inode->i_mutex);
472 
473 	brelse(main_bm_bh);
474 
475 	iput(main_bm_inode);
476 
477 out:
478 	if (!status)
479 		ocfs2_init_inode_steal_slot(osb);
480 	mlog_exit(status);
481 	return status;
482 }
483 
484 /* Check to see if the local alloc window is within ac->ac_max_block */
485 static int ocfs2_local_alloc_in_range(struct inode *inode,
486 				      struct ocfs2_alloc_context *ac,
487 				      u32 bits_wanted)
488 {
489 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
490 	struct ocfs2_dinode *alloc;
491 	struct ocfs2_local_alloc *la;
492 	int start;
493 	u64 block_off;
494 
495 	if (!ac->ac_max_block)
496 		return 1;
497 
498 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
499 	la = OCFS2_LOCAL_ALLOC(alloc);
500 
501 	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
502 	if (start == -1) {
503 		mlog_errno(-ENOSPC);
504 		return 0;
505 	}
506 
507 	/*
508 	 * Converting (bm_off + start + bits_wanted) to blocks gives us
509 	 * the blkno just past our actual allocation.  This is perfect
510 	 * to compare with ac_max_block.
511 	 */
512 	block_off = ocfs2_clusters_to_blocks(inode->i_sb,
513 					     le32_to_cpu(la->la_bm_off) +
514 					     start + bits_wanted);
515 	mlog(0, "Checking %llu against %llu\n",
516 	     (unsigned long long)block_off,
517 	     (unsigned long long)ac->ac_max_block);
518 	if (block_off > ac->ac_max_block)
519 		return 0;
520 
521 	return 1;
522 }
523 
524 /*
525  * make sure we've got at least bits_wanted contiguous bits in the
526  * local alloc. You lose them when you drop i_mutex.
527  *
528  * We will add ourselves to the transaction passed in, but may start
529  * our own in order to shift windows.
530  */
531 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
532 				   u32 bits_wanted,
533 				   struct ocfs2_alloc_context *ac)
534 {
535 	int status;
536 	struct ocfs2_dinode *alloc;
537 	struct inode *local_alloc_inode;
538 	unsigned int free_bits;
539 
540 	mlog_entry_void();
541 
542 	BUG_ON(!ac);
543 
544 	local_alloc_inode =
545 		ocfs2_get_system_file_inode(osb,
546 					    LOCAL_ALLOC_SYSTEM_INODE,
547 					    osb->slot_num);
548 	if (!local_alloc_inode) {
549 		status = -ENOENT;
550 		mlog_errno(status);
551 		goto bail;
552 	}
553 
554 	mutex_lock(&local_alloc_inode->i_mutex);
555 
556 	/*
557 	 * We must double check state and allocator bits because
558 	 * another process may have changed them while holding i_mutex.
559 	 */
560 	spin_lock(&osb->osb_lock);
561 	if (!ocfs2_la_state_enabled(osb) ||
562 	    (bits_wanted > osb->local_alloc_bits)) {
563 		spin_unlock(&osb->osb_lock);
564 		status = -ENOSPC;
565 		goto bail;
566 	}
567 	spin_unlock(&osb->osb_lock);
568 
569 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
570 
571 #ifdef CONFIG_OCFS2_DEBUG_FS
572 	if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
573 	    ocfs2_local_alloc_count_bits(alloc)) {
574 		ocfs2_error(osb->sb, "local alloc inode %llu says it has "
575 			    "%u free bits, but a count shows %u",
576 			    (unsigned long long)le64_to_cpu(alloc->i_blkno),
577 			    le32_to_cpu(alloc->id1.bitmap1.i_used),
578 			    ocfs2_local_alloc_count_bits(alloc));
579 		status = -EIO;
580 		goto bail;
581 	}
582 #endif
583 
584 	free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
585 		le32_to_cpu(alloc->id1.bitmap1.i_used);
586 	if (bits_wanted > free_bits) {
587 		/* uhoh, window change time. */
588 		status =
589 			ocfs2_local_alloc_slide_window(osb, local_alloc_inode);
590 		if (status < 0) {
591 			if (status != -ENOSPC)
592 				mlog_errno(status);
593 			goto bail;
594 		}
595 
596 		/*
597 		 * Under certain conditions, the window slide code
598 		 * might have reduced the number of bits available or
599 		 * disabled the the local alloc entirely. Re-check
600 		 * here and return -ENOSPC if necessary.
601 		 */
602 		status = -ENOSPC;
603 		if (!ocfs2_la_state_enabled(osb))
604 			goto bail;
605 
606 		free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
607 			le32_to_cpu(alloc->id1.bitmap1.i_used);
608 		if (bits_wanted > free_bits)
609 			goto bail;
610 	}
611 
612 	if (ac->ac_max_block)
613 		mlog(0, "Calling in_range for max block %llu\n",
614 		     (unsigned long long)ac->ac_max_block);
615 
616 	if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
617 					bits_wanted)) {
618 		/*
619 		 * The window is outside ac->ac_max_block.
620 		 * This errno tells the caller to keep localalloc enabled
621 		 * but to get the allocation from the main bitmap.
622 		 */
623 		status = -EFBIG;
624 		goto bail;
625 	}
626 
627 	ac->ac_inode = local_alloc_inode;
628 	/* We should never use localalloc from another slot */
629 	ac->ac_alloc_slot = osb->slot_num;
630 	ac->ac_which = OCFS2_AC_USE_LOCAL;
631 	get_bh(osb->local_alloc_bh);
632 	ac->ac_bh = osb->local_alloc_bh;
633 	status = 0;
634 bail:
635 	if (status < 0 && local_alloc_inode) {
636 		mutex_unlock(&local_alloc_inode->i_mutex);
637 		iput(local_alloc_inode);
638 	}
639 
640 	mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num,
641 	     status);
642 
643 	mlog_exit(status);
644 	return status;
645 }
646 
647 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
648 				 handle_t *handle,
649 				 struct ocfs2_alloc_context *ac,
650 				 u32 bits_wanted,
651 				 u32 *bit_off,
652 				 u32 *num_bits)
653 {
654 	int status, start;
655 	struct inode *local_alloc_inode;
656 	void *bitmap;
657 	struct ocfs2_dinode *alloc;
658 	struct ocfs2_local_alloc *la;
659 
660 	mlog_entry_void();
661 	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
662 
663 	local_alloc_inode = ac->ac_inode;
664 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
665 	la = OCFS2_LOCAL_ALLOC(alloc);
666 
667 	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
668 	if (start == -1) {
669 		/* TODO: Shouldn't we just BUG here? */
670 		status = -ENOSPC;
671 		mlog_errno(status);
672 		goto bail;
673 	}
674 
675 	bitmap = la->la_bitmap;
676 	*bit_off = le32_to_cpu(la->la_bm_off) + start;
677 	/* local alloc is always contiguous by nature -- we never
678 	 * delete bits from it! */
679 	*num_bits = bits_wanted;
680 
681 	status = ocfs2_journal_access_di(handle, local_alloc_inode,
682 					 osb->local_alloc_bh,
683 					 OCFS2_JOURNAL_ACCESS_WRITE);
684 	if (status < 0) {
685 		mlog_errno(status);
686 		goto bail;
687 	}
688 
689 	while(bits_wanted--)
690 		ocfs2_set_bit(start++, bitmap);
691 
692 	le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
693 
694 	status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
695 	if (status < 0) {
696 		mlog_errno(status);
697 		goto bail;
698 	}
699 
700 	status = 0;
701 bail:
702 	mlog_exit(status);
703 	return status;
704 }
705 
706 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
707 {
708 	int i;
709 	u8 *buffer;
710 	u32 count = 0;
711 	struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
712 
713 	mlog_entry_void();
714 
715 	buffer = la->la_bitmap;
716 	for (i = 0; i < le16_to_cpu(la->la_size); i++)
717 		count += hweight8(buffer[i]);
718 
719 	mlog_exit(count);
720 	return count;
721 }
722 
723 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
724 					     struct ocfs2_dinode *alloc,
725 					     u32 numbits)
726 {
727 	int numfound, bitoff, left, startoff, lastzero;
728 	void *bitmap = NULL;
729 
730 	mlog_entry("(numbits wanted = %u)\n", numbits);
731 
732 	if (!alloc->id1.bitmap1.i_total) {
733 		mlog(0, "No bits in my window!\n");
734 		bitoff = -1;
735 		goto bail;
736 	}
737 
738 	bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
739 
740 	numfound = bitoff = startoff = 0;
741 	lastzero = -1;
742 	left = le32_to_cpu(alloc->id1.bitmap1.i_total);
743 	while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
744 		if (bitoff == left) {
745 			/* mlog(0, "bitoff (%d) == left", bitoff); */
746 			break;
747 		}
748 		/* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
749 		   "numfound = %d\n", bitoff, startoff, numfound);*/
750 
751 		/* Ok, we found a zero bit... is it contig. or do we
752 		 * start over?*/
753 		if (bitoff == startoff) {
754 			/* we found a zero */
755 			numfound++;
756 			startoff++;
757 		} else {
758 			/* got a zero after some ones */
759 			numfound = 1;
760 			startoff = bitoff+1;
761 		}
762 		/* we got everything we needed */
763 		if (numfound == numbits) {
764 			/* mlog(0, "Found it all!\n"); */
765 			break;
766 		}
767 	}
768 
769 	mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
770 	     numfound);
771 
772 	if (numfound == numbits)
773 		bitoff = startoff - numfound;
774 	else
775 		bitoff = -1;
776 
777 bail:
778 	mlog_exit(bitoff);
779 	return bitoff;
780 }
781 
782 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
783 {
784 	struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
785 	int i;
786 	mlog_entry_void();
787 
788 	alloc->id1.bitmap1.i_total = 0;
789 	alloc->id1.bitmap1.i_used = 0;
790 	la->la_bm_off = 0;
791 	for(i = 0; i < le16_to_cpu(la->la_size); i++)
792 		la->la_bitmap[i] = 0;
793 
794 	mlog_exit_void();
795 }
796 
797 #if 0
798 /* turn this on and uncomment below to aid debugging window shifts. */
799 static void ocfs2_verify_zero_bits(unsigned long *bitmap,
800 				   unsigned int start,
801 				   unsigned int count)
802 {
803 	unsigned int tmp = count;
804 	while(tmp--) {
805 		if (ocfs2_test_bit(start + tmp, bitmap)) {
806 			printk("ocfs2_verify_zero_bits: start = %u, count = "
807 			       "%u\n", start, count);
808 			printk("ocfs2_verify_zero_bits: bit %u is set!",
809 			       start + tmp);
810 			BUG();
811 		}
812 	}
813 }
814 #endif
815 
816 /*
817  * sync the local alloc to main bitmap.
818  *
819  * assumes you've already locked the main bitmap -- the bitmap inode
820  * passed is used for caching.
821  */
822 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
823 				    handle_t *handle,
824 				    struct ocfs2_dinode *alloc,
825 				    struct inode *main_bm_inode,
826 				    struct buffer_head *main_bm_bh)
827 {
828 	int status = 0;
829 	int bit_off, left, count, start;
830 	u64 la_start_blk;
831 	u64 blkno;
832 	void *bitmap;
833 	struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
834 
835 	mlog_entry("total = %u, used = %u\n",
836 		   le32_to_cpu(alloc->id1.bitmap1.i_total),
837 		   le32_to_cpu(alloc->id1.bitmap1.i_used));
838 
839 	if (!alloc->id1.bitmap1.i_total) {
840 		mlog(0, "nothing to sync!\n");
841 		goto bail;
842 	}
843 
844 	if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
845 	    le32_to_cpu(alloc->id1.bitmap1.i_total)) {
846 		mlog(0, "all bits were taken!\n");
847 		goto bail;
848 	}
849 
850 	la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
851 						le32_to_cpu(la->la_bm_off));
852 	bitmap = la->la_bitmap;
853 	start = count = bit_off = 0;
854 	left = le32_to_cpu(alloc->id1.bitmap1.i_total);
855 
856 	while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
857 	       != -1) {
858 		if ((bit_off < left) && (bit_off == start)) {
859 			count++;
860 			start++;
861 			continue;
862 		}
863 		if (count) {
864 			blkno = la_start_blk +
865 				ocfs2_clusters_to_blocks(osb->sb,
866 							 start - count);
867 
868 			mlog(0, "freeing %u bits starting at local alloc bit "
869 			     "%u (la_start_blk = %llu, blkno = %llu)\n",
870 			     count, start - count,
871 			     (unsigned long long)la_start_blk,
872 			     (unsigned long long)blkno);
873 
874 			status = ocfs2_free_clusters(handle, main_bm_inode,
875 						     main_bm_bh, blkno, count);
876 			if (status < 0) {
877 				mlog_errno(status);
878 				goto bail;
879 			}
880 		}
881 		if (bit_off >= left)
882 			break;
883 		count = 1;
884 		start = bit_off + 1;
885 	}
886 
887 bail:
888 	mlog_exit(status);
889 	return status;
890 }
891 
892 enum ocfs2_la_event {
893 	OCFS2_LA_EVENT_SLIDE,		/* Normal window slide. */
894 	OCFS2_LA_EVENT_FRAGMENTED,	/* The global bitmap has
895 					 * enough bits theoretically
896 					 * free, but a contiguous
897 					 * allocation could not be
898 					 * found. */
899 	OCFS2_LA_EVENT_ENOSPC,		/* Global bitmap doesn't have
900 					 * enough bits free to satisfy
901 					 * our request. */
902 };
903 #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
904 /*
905  * Given an event, calculate the size of our next local alloc window.
906  *
907  * This should always be called under i_mutex of the local alloc inode
908  * so that local alloc disabling doesn't race with processes trying to
909  * use the allocator.
910  *
911  * Returns the state which the local alloc was left in. This value can
912  * be ignored by some paths.
913  */
914 static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
915 				  enum ocfs2_la_event event)
916 {
917 	unsigned int bits;
918 	int state;
919 
920 	spin_lock(&osb->osb_lock);
921 	if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
922 		WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
923 		goto out_unlock;
924 	}
925 
926 	/*
927 	 * ENOSPC and fragmentation are treated similarly for now.
928 	 */
929 	if (event == OCFS2_LA_EVENT_ENOSPC ||
930 	    event == OCFS2_LA_EVENT_FRAGMENTED) {
931 		/*
932 		 * We ran out of contiguous space in the primary
933 		 * bitmap. Drastically reduce the number of bits used
934 		 * by local alloc until we have to disable it.
935 		 */
936 		bits = osb->local_alloc_bits >> 1;
937 		if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
938 			/*
939 			 * By setting state to THROTTLED, we'll keep
940 			 * the number of local alloc bits used down
941 			 * until an event occurs which would give us
942 			 * reason to assume the bitmap situation might
943 			 * have changed.
944 			 */
945 			osb->local_alloc_state = OCFS2_LA_THROTTLED;
946 			osb->local_alloc_bits = bits;
947 		} else {
948 			osb->local_alloc_state = OCFS2_LA_DISABLED;
949 		}
950 		queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
951 				   OCFS2_LA_ENABLE_INTERVAL);
952 		goto out_unlock;
953 	}
954 
955 	/*
956 	 * Don't increase the size of the local alloc window until we
957 	 * know we might be able to fulfill the request. Otherwise, we
958 	 * risk bouncing around the global bitmap during periods of
959 	 * low space.
960 	 */
961 	if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
962 		osb->local_alloc_bits = osb->local_alloc_default_bits;
963 
964 out_unlock:
965 	state = osb->local_alloc_state;
966 	spin_unlock(&osb->osb_lock);
967 
968 	return state;
969 }
970 
971 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
972 						struct ocfs2_alloc_context **ac,
973 						struct inode **bitmap_inode,
974 						struct buffer_head **bitmap_bh)
975 {
976 	int status;
977 
978 	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
979 	if (!(*ac)) {
980 		status = -ENOMEM;
981 		mlog_errno(status);
982 		goto bail;
983 	}
984 
985 retry_enospc:
986 	(*ac)->ac_bits_wanted = osb->local_alloc_bits;
987 
988 	status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
989 	if (status == -ENOSPC) {
990 		if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
991 		    OCFS2_LA_DISABLED)
992 			goto bail;
993 
994 		ocfs2_free_ac_resource(*ac);
995 		memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
996 		goto retry_enospc;
997 	}
998 	if (status < 0) {
999 		mlog_errno(status);
1000 		goto bail;
1001 	}
1002 
1003 	*bitmap_inode = (*ac)->ac_inode;
1004 	igrab(*bitmap_inode);
1005 	*bitmap_bh = (*ac)->ac_bh;
1006 	get_bh(*bitmap_bh);
1007 	status = 0;
1008 bail:
1009 	if ((status < 0) && *ac) {
1010 		ocfs2_free_alloc_context(*ac);
1011 		*ac = NULL;
1012 	}
1013 
1014 	mlog_exit(status);
1015 	return status;
1016 }
1017 
1018 /*
1019  * pass it the bitmap lock in lock_bh if you have it.
1020  */
1021 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
1022 					handle_t *handle,
1023 					struct ocfs2_alloc_context *ac)
1024 {
1025 	int status = 0;
1026 	u32 cluster_off, cluster_count;
1027 	struct ocfs2_dinode *alloc = NULL;
1028 	struct ocfs2_local_alloc *la;
1029 
1030 	mlog_entry_void();
1031 
1032 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
1033 	la = OCFS2_LOCAL_ALLOC(alloc);
1034 
1035 	if (alloc->id1.bitmap1.i_total)
1036 		mlog(0, "asking me to alloc a new window over a non-empty "
1037 		     "one\n");
1038 
1039 	mlog(0, "Allocating %u clusters for a new window.\n",
1040 	     osb->local_alloc_bits);
1041 
1042 	/* Instruct the allocation code to try the most recently used
1043 	 * cluster group. We'll re-record the group used this pass
1044 	 * below. */
1045 	ac->ac_last_group = osb->la_last_gd;
1046 
1047 	/* we used the generic suballoc reserve function, but we set
1048 	 * everything up nicely, so there's no reason why we can't use
1049 	 * the more specific cluster api to claim bits. */
1050 	status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
1051 				      &cluster_off, &cluster_count);
1052 	if (status == -ENOSPC) {
1053 retry_enospc:
1054 		/*
1055 		 * Note: We could also try syncing the journal here to
1056 		 * allow use of any free bits which the current
1057 		 * transaction can't give us access to. --Mark
1058 		 */
1059 		if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
1060 		    OCFS2_LA_DISABLED)
1061 			goto bail;
1062 
1063 		status = ocfs2_claim_clusters(osb, handle, ac,
1064 					      osb->local_alloc_bits,
1065 					      &cluster_off,
1066 					      &cluster_count);
1067 		if (status == -ENOSPC)
1068 			goto retry_enospc;
1069 		/*
1070 		 * We only shrunk the *minimum* number of in our
1071 		 * request - it's entirely possible that the allocator
1072 		 * might give us more than we asked for.
1073 		 */
1074 		if (status == 0) {
1075 			spin_lock(&osb->osb_lock);
1076 			osb->local_alloc_bits = cluster_count;
1077 			spin_unlock(&osb->osb_lock);
1078 		}
1079 	}
1080 	if (status < 0) {
1081 		if (status != -ENOSPC)
1082 			mlog_errno(status);
1083 		goto bail;
1084 	}
1085 
1086 	osb->la_last_gd = ac->ac_last_group;
1087 
1088 	la->la_bm_off = cpu_to_le32(cluster_off);
1089 	alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
1090 	/* just in case... In the future when we find space ourselves,
1091 	 * we don't have to get all contiguous -- but we'll have to
1092 	 * set all previously used bits in bitmap and update
1093 	 * la_bits_set before setting the bits in the main bitmap. */
1094 	alloc->id1.bitmap1.i_used = 0;
1095 	memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
1096 	       le16_to_cpu(la->la_size));
1097 
1098 	mlog(0, "New window allocated:\n");
1099 	mlog(0, "window la_bm_off = %u\n",
1100 	     OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
1101 	mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
1102 
1103 bail:
1104 	mlog_exit(status);
1105 	return status;
1106 }
1107 
1108 /* Note that we do *NOT* lock the local alloc inode here as
1109  * it's been locked already for us. */
1110 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1111 					  struct inode *local_alloc_inode)
1112 {
1113 	int status = 0;
1114 	struct buffer_head *main_bm_bh = NULL;
1115 	struct inode *main_bm_inode = NULL;
1116 	handle_t *handle = NULL;
1117 	struct ocfs2_dinode *alloc;
1118 	struct ocfs2_dinode *alloc_copy = NULL;
1119 	struct ocfs2_alloc_context *ac = NULL;
1120 
1121 	mlog_entry_void();
1122 
1123 	ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
1124 
1125 	/* This will lock the main bitmap for us. */
1126 	status = ocfs2_local_alloc_reserve_for_window(osb,
1127 						      &ac,
1128 						      &main_bm_inode,
1129 						      &main_bm_bh);
1130 	if (status < 0) {
1131 		if (status != -ENOSPC)
1132 			mlog_errno(status);
1133 		goto bail;
1134 	}
1135 
1136 	handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
1137 	if (IS_ERR(handle)) {
1138 		status = PTR_ERR(handle);
1139 		handle = NULL;
1140 		mlog_errno(status);
1141 		goto bail;
1142 	}
1143 
1144 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
1145 
1146 	/* We want to clear the local alloc before doing anything
1147 	 * else, so that if we error later during this operation,
1148 	 * local alloc shutdown won't try to double free main bitmap
1149 	 * bits. Make a copy so the sync function knows which bits to
1150 	 * free. */
1151 	alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
1152 	if (!alloc_copy) {
1153 		status = -ENOMEM;
1154 		mlog_errno(status);
1155 		goto bail;
1156 	}
1157 	memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
1158 
1159 	status = ocfs2_journal_access_di(handle, local_alloc_inode,
1160 					 osb->local_alloc_bh,
1161 					 OCFS2_JOURNAL_ACCESS_WRITE);
1162 	if (status < 0) {
1163 		mlog_errno(status);
1164 		goto bail;
1165 	}
1166 
1167 	ocfs2_clear_local_alloc(alloc);
1168 
1169 	status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1170 	if (status < 0) {
1171 		mlog_errno(status);
1172 		goto bail;
1173 	}
1174 
1175 	status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
1176 					  main_bm_inode, main_bm_bh);
1177 	if (status < 0) {
1178 		mlog_errno(status);
1179 		goto bail;
1180 	}
1181 
1182 	status = ocfs2_local_alloc_new_window(osb, handle, ac);
1183 	if (status < 0) {
1184 		if (status != -ENOSPC)
1185 			mlog_errno(status);
1186 		goto bail;
1187 	}
1188 
1189 	atomic_inc(&osb->alloc_stats.moves);
1190 
1191 	status = 0;
1192 bail:
1193 	if (handle)
1194 		ocfs2_commit_trans(osb, handle);
1195 
1196 	brelse(main_bm_bh);
1197 
1198 	if (main_bm_inode)
1199 		iput(main_bm_inode);
1200 
1201 	if (alloc_copy)
1202 		kfree(alloc_copy);
1203 
1204 	if (ac)
1205 		ocfs2_free_alloc_context(ac);
1206 
1207 	mlog_exit(status);
1208 	return status;
1209 }
1210 
1211