xref: /openbmc/linux/fs/gfs2/lops.c (revision d6a079e8)
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9 
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/bio.h>
17 #include <linux/fs.h>
18 
19 #include "gfs2.h"
20 #include "incore.h"
21 #include "inode.h"
22 #include "glock.h"
23 #include "log.h"
24 #include "lops.h"
25 #include "meta_io.h"
26 #include "recovery.h"
27 #include "rgrp.h"
28 #include "trans.h"
29 #include "util.h"
30 #include "trace_gfs2.h"
31 
32 /**
33  * gfs2_pin - Pin a buffer in memory
34  * @sdp: The superblock
35  * @bh: The buffer to be pinned
36  *
37  * The log lock must be held when calling this function
38  */
39 static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
40 {
41 	struct gfs2_bufdata *bd;
42 
43 	gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
44 
45 	clear_buffer_dirty(bh);
46 	if (test_set_buffer_pinned(bh))
47 		gfs2_assert_withdraw(sdp, 0);
48 	if (!buffer_uptodate(bh))
49 		gfs2_io_error_bh(sdp, bh);
50 	bd = bh->b_private;
51 	/* If this buffer is in the AIL and it has already been written
52 	 * to in-place disk block, remove it from the AIL.
53 	 */
54 	if (bd->bd_ail)
55 		list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
56 	get_bh(bh);
57 	atomic_inc(&sdp->sd_log_pinned);
58 	trace_gfs2_pin(bd, 1);
59 }
60 
61 /**
62  * gfs2_unpin - Unpin a buffer
63  * @sdp: the filesystem the buffer belongs to
64  * @bh: The buffer to unpin
65  * @ai:
66  *
67  */
68 
69 static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
70 		       struct gfs2_ail *ai)
71 {
72 	struct gfs2_bufdata *bd = bh->b_private;
73 
74 	gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
75 
76 	if (!buffer_pinned(bh))
77 		gfs2_assert_withdraw(sdp, 0);
78 
79 	lock_buffer(bh);
80 	mark_buffer_dirty(bh);
81 	clear_buffer_pinned(bh);
82 
83 	spin_lock(&sdp->sd_ail_lock);
84 	if (bd->bd_ail) {
85 		list_del(&bd->bd_ail_st_list);
86 		brelse(bh);
87 	} else {
88 		struct gfs2_glock *gl = bd->bd_gl;
89 		list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
90 		atomic_inc(&gl->gl_ail_count);
91 	}
92 	bd->bd_ail = ai;
93 	list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
94 	spin_unlock(&sdp->sd_ail_lock);
95 
96 	if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
97 		gfs2_glock_schedule_for_reclaim(bd->bd_gl);
98 	trace_gfs2_pin(bd, 0);
99 	unlock_buffer(bh);
100 	atomic_dec(&sdp->sd_log_pinned);
101 }
102 
103 
104 static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh)
105 {
106 	return (struct gfs2_log_descriptor *)bh->b_data;
107 }
108 
109 static inline __be64 *bh_log_ptr(struct buffer_head *bh)
110 {
111 	struct gfs2_log_descriptor *ld = bh_log_desc(bh);
112 	return (__force __be64 *)(ld + 1);
113 }
114 
115 static inline __be64 *bh_ptr_end(struct buffer_head *bh)
116 {
117 	return (__force __be64 *)(bh->b_data + bh->b_size);
118 }
119 
120 
121 static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
122 {
123 	struct buffer_head *bh = gfs2_log_get_buf(sdp);
124 	struct gfs2_log_descriptor *ld = bh_log_desc(bh);
125 	ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
126 	ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
127 	ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
128 	ld->ld_type = cpu_to_be32(ld_type);
129 	ld->ld_length = 0;
130 	ld->ld_data1 = 0;
131 	ld->ld_data2 = 0;
132 	memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
133 	return bh;
134 }
135 
136 static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
137 {
138 	struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
139 	struct gfs2_meta_header *mh;
140 	struct gfs2_trans *tr;
141 
142 	lock_buffer(bd->bd_bh);
143 	gfs2_log_lock(sdp);
144 	if (!list_empty(&bd->bd_list_tr))
145 		goto out;
146 	tr = current->journal_info;
147 	tr->tr_touched = 1;
148 	tr->tr_num_buf++;
149 	list_add(&bd->bd_list_tr, &tr->tr_list_buf);
150 	if (!list_empty(&le->le_list))
151 		goto out;
152 	set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
153 	set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
154 	gfs2_meta_check(sdp, bd->bd_bh);
155 	gfs2_pin(sdp, bd->bd_bh);
156 	mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
157 	mh->__pad0 = cpu_to_be64(0);
158 	mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
159 	sdp->sd_log_num_buf++;
160 	list_add(&le->le_list, &sdp->sd_log_le_buf);
161 	tr->tr_num_buf_new++;
162 out:
163 	gfs2_log_unlock(sdp);
164 	unlock_buffer(bd->bd_bh);
165 }
166 
167 static void buf_lo_before_commit(struct gfs2_sbd *sdp)
168 {
169 	struct buffer_head *bh;
170 	struct gfs2_log_descriptor *ld;
171 	struct gfs2_bufdata *bd1 = NULL, *bd2;
172 	unsigned int total;
173 	unsigned int limit;
174 	unsigned int num;
175 	unsigned n;
176 	__be64 *ptr;
177 
178 	limit = buf_limit(sdp);
179 	/* for 4k blocks, limit = 503 */
180 
181 	gfs2_log_lock(sdp);
182 	total = sdp->sd_log_num_buf;
183 	bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
184 	while(total) {
185 		num = total;
186 		if (total > limit)
187 			num = limit;
188 		gfs2_log_unlock(sdp);
189 		bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA);
190 		gfs2_log_lock(sdp);
191 		ld = bh_log_desc(bh);
192 		ptr = bh_log_ptr(bh);
193 		ld->ld_length = cpu_to_be32(num + 1);
194 		ld->ld_data1 = cpu_to_be32(num);
195 
196 		n = 0;
197 		list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
198 					     bd_le.le_list) {
199 			*ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
200 			if (++n >= num)
201 				break;
202 		}
203 
204 		gfs2_log_unlock(sdp);
205 		submit_bh(WRITE_SYNC_PLUG, bh);
206 		gfs2_log_lock(sdp);
207 
208 		n = 0;
209 		list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
210 					     bd_le.le_list) {
211 			get_bh(bd2->bd_bh);
212 			gfs2_log_unlock(sdp);
213 			lock_buffer(bd2->bd_bh);
214 			bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
215 			submit_bh(WRITE_SYNC_PLUG, bh);
216 			gfs2_log_lock(sdp);
217 			if (++n >= num)
218 				break;
219 		}
220 
221 		BUG_ON(total < num);
222 		total -= num;
223 	}
224 	gfs2_log_unlock(sdp);
225 }
226 
227 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
228 {
229 	struct list_head *head = &sdp->sd_log_le_buf;
230 	struct gfs2_bufdata *bd;
231 
232 	while (!list_empty(head)) {
233 		bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
234 		list_del_init(&bd->bd_le.le_list);
235 		sdp->sd_log_num_buf--;
236 
237 		gfs2_unpin(sdp, bd->bd_bh, ai);
238 	}
239 	gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
240 }
241 
242 static void buf_lo_before_scan(struct gfs2_jdesc *jd,
243 			       struct gfs2_log_header_host *head, int pass)
244 {
245 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
246 
247 	if (pass != 0)
248 		return;
249 
250 	sdp->sd_found_blocks = 0;
251 	sdp->sd_replayed_blocks = 0;
252 }
253 
254 static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
255 				struct gfs2_log_descriptor *ld, __be64 *ptr,
256 				int pass)
257 {
258 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
259 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
260 	struct gfs2_glock *gl = ip->i_gl;
261 	unsigned int blks = be32_to_cpu(ld->ld_data1);
262 	struct buffer_head *bh_log, *bh_ip;
263 	u64 blkno;
264 	int error = 0;
265 
266 	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
267 		return 0;
268 
269 	gfs2_replay_incr_blk(sdp, &start);
270 
271 	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
272 		blkno = be64_to_cpu(*ptr++);
273 
274 		sdp->sd_found_blocks++;
275 
276 		if (gfs2_revoke_check(sdp, blkno, start))
277 			continue;
278 
279 		error = gfs2_replay_read_block(jd, start, &bh_log);
280 		if (error)
281 			return error;
282 
283 		bh_ip = gfs2_meta_new(gl, blkno);
284 		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
285 
286 		if (gfs2_meta_check(sdp, bh_ip))
287 			error = -EIO;
288 		else
289 			mark_buffer_dirty(bh_ip);
290 
291 		brelse(bh_log);
292 		brelse(bh_ip);
293 
294 		if (error)
295 			break;
296 
297 		sdp->sd_replayed_blocks++;
298 	}
299 
300 	return error;
301 }
302 
303 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
304 {
305 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
306 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
307 
308 	if (error) {
309 		gfs2_meta_sync(ip->i_gl);
310 		return;
311 	}
312 	if (pass != 1)
313 		return;
314 
315 	gfs2_meta_sync(ip->i_gl);
316 
317 	fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
318 	        jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
319 }
320 
321 static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
322 {
323 	struct gfs2_trans *tr;
324 
325 	tr = current->journal_info;
326 	tr->tr_touched = 1;
327 	tr->tr_num_revoke++;
328 	sdp->sd_log_num_revoke++;
329 	list_add(&le->le_list, &sdp->sd_log_le_revoke);
330 }
331 
332 static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
333 {
334 	struct gfs2_log_descriptor *ld;
335 	struct gfs2_meta_header *mh;
336 	struct buffer_head *bh;
337 	unsigned int offset;
338 	struct list_head *head = &sdp->sd_log_le_revoke;
339 	struct gfs2_bufdata *bd;
340 
341 	if (!sdp->sd_log_num_revoke)
342 		return;
343 
344 	bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE);
345 	ld = bh_log_desc(bh);
346 	ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
347 						    sizeof(u64)));
348 	ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
349 	offset = sizeof(struct gfs2_log_descriptor);
350 
351 	while (!list_empty(head)) {
352 		bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
353 		list_del_init(&bd->bd_le.le_list);
354 		sdp->sd_log_num_revoke--;
355 
356 		if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
357 			submit_bh(WRITE_SYNC_PLUG, bh);
358 
359 			bh = gfs2_log_get_buf(sdp);
360 			mh = (struct gfs2_meta_header *)bh->b_data;
361 			mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
362 			mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
363 			mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
364 			offset = sizeof(struct gfs2_meta_header);
365 		}
366 
367 		*(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
368 		kmem_cache_free(gfs2_bufdata_cachep, bd);
369 
370 		offset += sizeof(u64);
371 	}
372 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
373 
374 	submit_bh(WRITE_SYNC_PLUG, bh);
375 }
376 
377 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
378 				  struct gfs2_log_header_host *head, int pass)
379 {
380 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
381 
382 	if (pass != 0)
383 		return;
384 
385 	sdp->sd_found_revokes = 0;
386 	sdp->sd_replay_tail = head->lh_tail;
387 }
388 
389 static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
390 				   struct gfs2_log_descriptor *ld, __be64 *ptr,
391 				   int pass)
392 {
393 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
394 	unsigned int blks = be32_to_cpu(ld->ld_length);
395 	unsigned int revokes = be32_to_cpu(ld->ld_data1);
396 	struct buffer_head *bh;
397 	unsigned int offset;
398 	u64 blkno;
399 	int first = 1;
400 	int error;
401 
402 	if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
403 		return 0;
404 
405 	offset = sizeof(struct gfs2_log_descriptor);
406 
407 	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
408 		error = gfs2_replay_read_block(jd, start, &bh);
409 		if (error)
410 			return error;
411 
412 		if (!first)
413 			gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
414 
415 		while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
416 			blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
417 
418 			error = gfs2_revoke_add(sdp, blkno, start);
419 			if (error < 0) {
420 				brelse(bh);
421 				return error;
422 			}
423 			else if (error)
424 				sdp->sd_found_revokes++;
425 
426 			if (!--revokes)
427 				break;
428 			offset += sizeof(u64);
429 		}
430 
431 		brelse(bh);
432 		offset = sizeof(struct gfs2_meta_header);
433 		first = 0;
434 	}
435 
436 	return 0;
437 }
438 
439 static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
440 {
441 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
442 
443 	if (error) {
444 		gfs2_revoke_clean(sdp);
445 		return;
446 	}
447 	if (pass != 1)
448 		return;
449 
450 	fs_info(sdp, "jid=%u: Found %u revoke tags\n",
451 	        jd->jd_jid, sdp->sd_found_revokes);
452 
453 	gfs2_revoke_clean(sdp);
454 }
455 
456 static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
457 {
458 	struct gfs2_rgrpd *rgd;
459 	struct gfs2_trans *tr = current->journal_info;
460 
461 	tr->tr_touched = 1;
462 
463 	rgd = container_of(le, struct gfs2_rgrpd, rd_le);
464 
465 	gfs2_log_lock(sdp);
466 	if (!list_empty(&le->le_list)){
467 		gfs2_log_unlock(sdp);
468 		return;
469 	}
470 	gfs2_rgrp_bh_hold(rgd);
471 	sdp->sd_log_num_rg++;
472 	list_add(&le->le_list, &sdp->sd_log_le_rg);
473 	gfs2_log_unlock(sdp);
474 }
475 
476 static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
477 {
478 	struct list_head *head = &sdp->sd_log_le_rg;
479 	struct gfs2_rgrpd *rgd;
480 
481 	while (!list_empty(head)) {
482 		rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
483 		list_del_init(&rgd->rd_le.le_list);
484 		sdp->sd_log_num_rg--;
485 
486 		gfs2_rgrp_repolish_clones(rgd);
487 		gfs2_rgrp_bh_put(rgd);
488 	}
489 	gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
490 }
491 
492 /**
493  * databuf_lo_add - Add a databuf to the transaction.
494  *
495  * This is used in two distinct cases:
496  * i) In ordered write mode
497  *    We put the data buffer on a list so that we can ensure that its
498  *    synced to disk at the right time
499  * ii) In journaled data mode
500  *    We need to journal the data block in the same way as metadata in
501  *    the functions above. The difference is that here we have a tag
502  *    which is two __be64's being the block number (as per meta data)
503  *    and a flag which says whether the data block needs escaping or
504  *    not. This means we need a new log entry for each 251 or so data
505  *    blocks, which isn't an enormous overhead but twice as much as
506  *    for normal metadata blocks.
507  */
508 static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
509 {
510 	struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
511 	struct gfs2_trans *tr = current->journal_info;
512 	struct address_space *mapping = bd->bd_bh->b_page->mapping;
513 	struct gfs2_inode *ip = GFS2_I(mapping->host);
514 
515 	lock_buffer(bd->bd_bh);
516 	gfs2_log_lock(sdp);
517 	if (tr) {
518 		if (!list_empty(&bd->bd_list_tr))
519 			goto out;
520 		tr->tr_touched = 1;
521 		if (gfs2_is_jdata(ip)) {
522 			tr->tr_num_buf++;
523 			list_add(&bd->bd_list_tr, &tr->tr_list_buf);
524 		}
525 	}
526 	if (!list_empty(&le->le_list))
527 		goto out;
528 
529 	set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
530 	set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
531 	if (gfs2_is_jdata(ip)) {
532 		gfs2_pin(sdp, bd->bd_bh);
533 		tr->tr_num_databuf_new++;
534 		sdp->sd_log_num_databuf++;
535 		list_add_tail(&le->le_list, &sdp->sd_log_le_databuf);
536 	} else {
537 		list_add_tail(&le->le_list, &sdp->sd_log_le_ordered);
538 	}
539 out:
540 	gfs2_log_unlock(sdp);
541 	unlock_buffer(bd->bd_bh);
542 }
543 
544 static void gfs2_check_magic(struct buffer_head *bh)
545 {
546 	void *kaddr;
547 	__be32 *ptr;
548 
549 	clear_buffer_escaped(bh);
550 	kaddr = kmap_atomic(bh->b_page, KM_USER0);
551 	ptr = kaddr + bh_offset(bh);
552 	if (*ptr == cpu_to_be32(GFS2_MAGIC))
553 		set_buffer_escaped(bh);
554 	kunmap_atomic(kaddr, KM_USER0);
555 }
556 
557 static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
558 			      struct list_head *list, struct list_head *done,
559 			      unsigned int n)
560 {
561 	struct buffer_head *bh1;
562 	struct gfs2_log_descriptor *ld;
563 	struct gfs2_bufdata *bd;
564 	__be64 *ptr;
565 
566 	if (!bh)
567 		return;
568 
569 	ld = bh_log_desc(bh);
570 	ld->ld_length = cpu_to_be32(n + 1);
571 	ld->ld_data1 = cpu_to_be32(n);
572 
573 	ptr = bh_log_ptr(bh);
574 
575 	get_bh(bh);
576 	submit_bh(WRITE_SYNC_PLUG, bh);
577 	gfs2_log_lock(sdp);
578 	while(!list_empty(list)) {
579 		bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
580 		list_move_tail(&bd->bd_le.le_list, done);
581 		get_bh(bd->bd_bh);
582 		while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) {
583 			gfs2_log_incr_head(sdp);
584 			ptr += 2;
585 		}
586 		gfs2_log_unlock(sdp);
587 		lock_buffer(bd->bd_bh);
588 		if (buffer_escaped(bd->bd_bh)) {
589 			void *kaddr;
590 			bh1 = gfs2_log_get_buf(sdp);
591 			kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0);
592 			memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh),
593 			       bh1->b_size);
594 			kunmap_atomic(kaddr, KM_USER0);
595 			*(__be32 *)bh1->b_data = 0;
596 			clear_buffer_escaped(bd->bd_bh);
597 			unlock_buffer(bd->bd_bh);
598 			brelse(bd->bd_bh);
599 		} else {
600 			bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
601 		}
602 		submit_bh(WRITE_SYNC_PLUG, bh1);
603 		gfs2_log_lock(sdp);
604 		ptr += 2;
605 	}
606 	gfs2_log_unlock(sdp);
607 	brelse(bh);
608 }
609 
610 /**
611  * databuf_lo_before_commit - Scan the data buffers, writing as we go
612  *
613  */
614 
615 static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
616 {
617 	struct gfs2_bufdata *bd = NULL;
618 	struct buffer_head *bh = NULL;
619 	unsigned int n = 0;
620 	__be64 *ptr = NULL, *end = NULL;
621 	LIST_HEAD(processed);
622 	LIST_HEAD(in_progress);
623 
624 	gfs2_log_lock(sdp);
625 	while (!list_empty(&sdp->sd_log_le_databuf)) {
626 		if (ptr == end) {
627 			gfs2_log_unlock(sdp);
628 			gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
629 			n = 0;
630 			bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA);
631 			ptr = bh_log_ptr(bh);
632 			end = bh_ptr_end(bh) - 1;
633 			gfs2_log_lock(sdp);
634 			continue;
635 		}
636 		bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list);
637 		list_move_tail(&bd->bd_le.le_list, &in_progress);
638 		gfs2_check_magic(bd->bd_bh);
639 		*ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr);
640 		*ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0);
641 		n++;
642 	}
643 	gfs2_log_unlock(sdp);
644 	gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
645 	gfs2_log_lock(sdp);
646 	list_splice(&processed, &sdp->sd_log_le_databuf);
647 	gfs2_log_unlock(sdp);
648 }
649 
650 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
651 				    struct gfs2_log_descriptor *ld,
652 				    __be64 *ptr, int pass)
653 {
654 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
655 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
656 	struct gfs2_glock *gl = ip->i_gl;
657 	unsigned int blks = be32_to_cpu(ld->ld_data1);
658 	struct buffer_head *bh_log, *bh_ip;
659 	u64 blkno;
660 	u64 esc;
661 	int error = 0;
662 
663 	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
664 		return 0;
665 
666 	gfs2_replay_incr_blk(sdp, &start);
667 	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
668 		blkno = be64_to_cpu(*ptr++);
669 		esc = be64_to_cpu(*ptr++);
670 
671 		sdp->sd_found_blocks++;
672 
673 		if (gfs2_revoke_check(sdp, blkno, start))
674 			continue;
675 
676 		error = gfs2_replay_read_block(jd, start, &bh_log);
677 		if (error)
678 			return error;
679 
680 		bh_ip = gfs2_meta_new(gl, blkno);
681 		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
682 
683 		/* Unescape */
684 		if (esc) {
685 			__be32 *eptr = (__be32 *)bh_ip->b_data;
686 			*eptr = cpu_to_be32(GFS2_MAGIC);
687 		}
688 		mark_buffer_dirty(bh_ip);
689 
690 		brelse(bh_log);
691 		brelse(bh_ip);
692 		if (error)
693 			break;
694 
695 		sdp->sd_replayed_blocks++;
696 	}
697 
698 	return error;
699 }
700 
701 /* FIXME: sort out accounting for log blocks etc. */
702 
703 static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
704 {
705 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
706 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
707 
708 	if (error) {
709 		gfs2_meta_sync(ip->i_gl);
710 		return;
711 	}
712 	if (pass != 1)
713 		return;
714 
715 	/* data sync? */
716 	gfs2_meta_sync(ip->i_gl);
717 
718 	fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
719 		jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
720 }
721 
722 static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
723 {
724 	struct list_head *head = &sdp->sd_log_le_databuf;
725 	struct gfs2_bufdata *bd;
726 
727 	while (!list_empty(head)) {
728 		bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
729 		list_del_init(&bd->bd_le.le_list);
730 		sdp->sd_log_num_databuf--;
731 		gfs2_unpin(sdp, bd->bd_bh, ai);
732 	}
733 	gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
734 }
735 
736 
737 const struct gfs2_log_operations gfs2_buf_lops = {
738 	.lo_add = buf_lo_add,
739 	.lo_before_commit = buf_lo_before_commit,
740 	.lo_after_commit = buf_lo_after_commit,
741 	.lo_before_scan = buf_lo_before_scan,
742 	.lo_scan_elements = buf_lo_scan_elements,
743 	.lo_after_scan = buf_lo_after_scan,
744 	.lo_name = "buf",
745 };
746 
747 const struct gfs2_log_operations gfs2_revoke_lops = {
748 	.lo_add = revoke_lo_add,
749 	.lo_before_commit = revoke_lo_before_commit,
750 	.lo_before_scan = revoke_lo_before_scan,
751 	.lo_scan_elements = revoke_lo_scan_elements,
752 	.lo_after_scan = revoke_lo_after_scan,
753 	.lo_name = "revoke",
754 };
755 
756 const struct gfs2_log_operations gfs2_rg_lops = {
757 	.lo_add = rg_lo_add,
758 	.lo_after_commit = rg_lo_after_commit,
759 	.lo_name = "rg",
760 };
761 
762 const struct gfs2_log_operations gfs2_databuf_lops = {
763 	.lo_add = databuf_lo_add,
764 	.lo_before_commit = databuf_lo_before_commit,
765 	.lo_after_commit = databuf_lo_after_commit,
766 	.lo_scan_elements = databuf_lo_scan_elements,
767 	.lo_after_scan = databuf_lo_after_scan,
768 	.lo_name = "databuf",
769 };
770 
771 const struct gfs2_log_operations *gfs2_log_ops[] = {
772 	&gfs2_databuf_lops,
773 	&gfs2_buf_lops,
774 	&gfs2_rg_lops,
775 	&gfs2_revoke_lops,
776 	NULL,
777 };
778 
779