xref: /openbmc/linux/fs/jfs/jfs_logmgr.h (revision 1760371b277718062211fc7eb6f3042c5051c1a5)
1  /* SPDX-License-Identifier: GPL-2.0-or-later */
2  /*
3   *   Copyright (C) International Business Machines Corp., 2000-2004
4   *   Portions Copyright (C) Christoph Hellwig, 2001-2002
5   */
6  #ifndef	_H_JFS_LOGMGR
7  #define _H_JFS_LOGMGR
8  
9  #include <linux/uuid.h>
10  
11  #include "jfs_filsys.h"
12  #include "jfs_lock.h"
13  
14  /*
15   *	log manager configuration parameters
16   */
17  
18  /* log page size */
19  #define	LOGPSIZE	4096
20  #define	L2LOGPSIZE	12
21  
22  #define LOGPAGES	16	/* Log pages per mounted file system */
23  
24  /*
25   *	log logical volume
26   *
27   * a log is used to make the commit operation on journalled
28   * files within the same logical volume group atomic.
29   * a log is implemented with a logical volume.
30   * there is one log per logical volume group.
31   *
32   * block 0 of the log logical volume is not used (ipl etc).
33   * block 1 contains a log "superblock" and is used by logFormat(),
34   * lmLogInit(), lmLogShutdown(), and logRedo() to record status
35   * of the log but is not otherwise used during normal processing.
36   * blocks 2 - (N-1) are used to contain log records.
37   *
38   * when a volume group is varied-on-line, logRedo() must have
39   * been executed before the file systems (logical volumes) in
40   * the volume group can be mounted.
41   */
42  /*
43   *	log superblock (block 1 of logical volume)
44   */
45  #define	LOGSUPER_B	1
46  #define	LOGSTART_B	2
47  
48  #define	LOGMAGIC	0x87654321
49  #define	LOGVERSION	1
50  
51  #define MAX_ACTIVE	128	/* Max active file systems sharing log */
52  
53  struct logsuper {
54  	__le32 magic;		/* 4: log lv identifier */
55  	__le32 version;		/* 4: version number */
56  	__le32 serial;		/* 4: log open/mount counter */
57  	__le32 size;		/* 4: size in number of LOGPSIZE blocks */
58  	__le32 bsize;		/* 4: logical block size in byte */
59  	__le32 l2bsize;		/* 4: log2 of bsize */
60  
61  	__le32 flag;		/* 4: option */
62  	__le32 state;		/* 4: state - see below */
63  
64  	__le32 end;		/* 4: addr of last log record set by logredo */
65  	uuid_t uuid;		/* 16: 128-bit journal uuid */
66  	char label[16];		/* 16: journal label */
67  	struct {
68  		uuid_t uuid;
69  	} active[MAX_ACTIVE];	/* 2048: active file systems list */
70  };
71  
72  /* log flag: commit option (see jfs_filsys.h) */
73  
74  /* log state */
75  #define	LOGMOUNT	0	/* log mounted by lmLogInit() */
76  #define LOGREDONE	1	/* log shutdown by lmLogShutdown().
77  				 * log redo completed by logredo().
78  				 */
79  #define LOGWRAP		2	/* log wrapped */
80  #define LOGREADERR	3	/* log read error detected in logredo() */
81  
82  
83  /*
84   *	log logical page
85   *
86   * (this comment should be rewritten !)
87   * the header and trailer structures (h,t) will normally have
88   * the same page and eor value.
89   * An exception to this occurs when a complete page write is not
90   * accomplished on a power failure. Since the hardware may "split write"
91   * sectors in the page, any out of order sequence may occur during powerfail
92   * and needs to be recognized during log replay.  The xor value is
93   * an "exclusive or" of all log words in the page up to eor.  This
94   * 32 bit eor is stored with the top 16 bits in the header and the
95   * bottom 16 bits in the trailer.  logredo can easily recognize pages
96   * that were not completed by reconstructing this eor and checking
97   * the log page.
98   *
99   * Previous versions of the operating system did not allow split
100   * writes and detected partially written records in logredo by
101   * ordering the updates to the header, trailer, and the move of data
102   * into the logdata area.  The order: (1) data is moved (2) header
103   * is updated (3) trailer is updated.  In logredo, when the header
104   * differed from the trailer, the header and trailer were reconciled
105   * as follows: if h.page != t.page they were set to the smaller of
106   * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
107   * h.eor != t.eor they were set to the smaller of their two values.
108   */
109  struct logpage {
110  	struct {		/* header */
111  		__le32 page;	/* 4: log sequence page number */
112  		__le16 rsrvd;	/* 2: */
113  		__le16 eor;	/* 2: end-of-log offset of lasrt record write */
114  	} h;
115  
116  	__le32 data[LOGPSIZE / 4 - 4];	/* log record area */
117  
118  	struct {		/* trailer */
119  		__le32 page;	/* 4: normally the same as h.page */
120  		__le16 rsrvd;	/* 2: */
121  		__le16 eor;	/* 2: normally the same as h.eor */
122  	} t;
123  };
124  
125  #define LOGPHDRSIZE	8	/* log page header size */
126  #define LOGPTLRSIZE	8	/* log page trailer size */
127  
128  
129  /*
130   *	log record
131   *
132   * (this comment should be rewritten !)
133   * jfs uses only "after" log records (only a single writer is allowed
134   * in a page, pages are written to temporary paging space if
135   * they must be written to disk before commit, and i/o is
136   * scheduled for modified pages to their home location after
137   * the log records containing the after values and the commit
138   * record is written to the log on disk, undo discards the copy
139   * in main-memory.)
140   *
141   * a log record consists of a data area of variable length followed by
142   * a descriptor of fixed size LOGRDSIZE bytes.
143   * the data area is rounded up to an integral number of 4-bytes and
144   * must be no longer than LOGPSIZE.
145   * the descriptor is of size of multiple of 4-bytes and aligned on a
146   * 4-byte boundary.
147   * records are packed one after the other in the data area of log pages.
148   * (sometimes a DUMMY record is inserted so that at least one record ends
149   * on every page or the longest record is placed on at most two pages).
150   * the field eor in page header/trailer points to the byte following
151   * the last record on a page.
152   */
153  
154  /* log record types */
155  #define LOG_COMMIT		0x8000
156  #define LOG_SYNCPT		0x4000
157  #define LOG_MOUNT		0x2000
158  #define LOG_REDOPAGE		0x0800
159  #define LOG_NOREDOPAGE		0x0080
160  #define LOG_NOREDOINOEXT	0x0040
161  #define LOG_UPDATEMAP		0x0008
162  #define LOG_NOREDOFILE		0x0001
163  
164  /* REDOPAGE/NOREDOPAGE log record data type */
165  #define	LOG_INODE		0x0001
166  #define	LOG_XTREE		0x0002
167  #define	LOG_DTREE		0x0004
168  #define	LOG_BTROOT		0x0010
169  #define	LOG_EA			0x0020
170  #define	LOG_ACL			0x0040
171  #define	LOG_DATA		0x0080
172  #define	LOG_NEW			0x0100
173  #define	LOG_EXTEND		0x0200
174  #define LOG_RELOCATE		0x0400
175  #define LOG_DIR_XTREE		0x0800	/* Xtree is in directory inode */
176  
177  /* UPDATEMAP log record descriptor type */
178  #define	LOG_ALLOCXADLIST	0x0080
179  #define	LOG_ALLOCPXDLIST	0x0040
180  #define	LOG_ALLOCXAD		0x0020
181  #define	LOG_ALLOCPXD		0x0010
182  #define	LOG_FREEXADLIST		0x0008
183  #define	LOG_FREEPXDLIST		0x0004
184  #define	LOG_FREEXAD		0x0002
185  #define	LOG_FREEPXD		0x0001
186  
187  
188  struct lrd {
189  	/*
190  	 * type independent area
191  	 */
192  	__le32 logtid;		/* 4: log transaction identifier */
193  	__le32 backchain;	/* 4: ptr to prev record of same transaction */
194  	__le16 type;		/* 2: record type */
195  	__le16 length;		/* 2: length of data in record (in byte) */
196  	__le32 aggregate;	/* 4: file system lv/aggregate */
197  	/* (16) */
198  
199  	/*
200  	 * type dependent area (20)
201  	 */
202  	union {
203  
204  		/*
205  		 *	COMMIT: commit
206  		 *
207  		 * transaction commit: no type-dependent information;
208  		 */
209  
210  		/*
211  		 *	REDOPAGE: after-image
212  		 *
213  		 * apply after-image;
214  		 *
215  		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
216  		 */
217  		struct {
218  			__le32 fileset;	/* 4: fileset number */
219  			__le32 inode;	/* 4: inode number */
220  			__le16 type;	/* 2: REDOPAGE record type */
221  			__le16 l2linesize;	/* 2: log2 of line size */
222  			pxd_t pxd;	/* 8: on-disk page pxd */
223  		} redopage;	/* (20) */
224  
225  		/*
226  		 *	NOREDOPAGE: the page is freed
227  		 *
228  		 * do not apply after-image records which precede this record
229  		 * in the log with the same page block number to this page.
230  		 *
231  		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
232  		 */
233  		struct {
234  			__le32 fileset;	/* 4: fileset number */
235  			__le32 inode;	/* 4: inode number */
236  			__le16 type;	/* 2: NOREDOPAGE record type */
237  			__le16 rsrvd;	/* 2: reserved */
238  			pxd_t pxd;	/* 8: on-disk page pxd */
239  		} noredopage;	/* (20) */
240  
241  		/*
242  		 *	UPDATEMAP: update block allocation map
243  		 *
244  		 * either in-line PXD,
245  		 * or     out-of-line  XADLIST;
246  		 *
247  		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
248  		 */
249  		struct {
250  			__le32 fileset;	/* 4: fileset number */
251  			__le32 inode;	/* 4: inode number */
252  			__le16 type;	/* 2: UPDATEMAP record type */
253  			__le16 nxd;	/* 2: number of extents */
254  			pxd_t pxd;	/* 8: pxd */
255  		} updatemap;	/* (20) */
256  
257  		/*
258  		 *	NOREDOINOEXT: the inode extent is freed
259  		 *
260  		 * do not apply after-image records which precede this
261  		 * record in the log with the any of the 4 page block
262  		 * numbers in this inode extent.
263  		 *
264  		 * NOTE: The fileset and pxd fields MUST remain in
265  		 *       the same fields in the REDOPAGE record format.
266  		 *
267  		 */
268  		struct {
269  			__le32 fileset;	/* 4: fileset number */
270  			__le32 iagnum;	/* 4: IAG number     */
271  			__le32 inoext_idx;	/* 4: inode extent index */
272  			pxd_t pxd;	/* 8: on-disk page pxd */
273  		} noredoinoext;	/* (20) */
274  
275  		/*
276  		 *	SYNCPT: log sync point
277  		 *
278  		 * replay log up to syncpt address specified;
279  		 */
280  		struct {
281  			__le32 sync;	/* 4: syncpt address (0 = here) */
282  		} syncpt;
283  
284  		/*
285  		 *	MOUNT: file system mount
286  		 *
287  		 * file system mount: no type-dependent information;
288  		 */
289  
290  		/*
291  		 *	? FREEXTENT: free specified extent(s)
292  		 *
293  		 * free specified extent(s) from block allocation map
294  		 * N.B.: nextents should be length of data/sizeof(xad_t)
295  		 */
296  		struct {
297  			__le32 type;	/* 4: FREEXTENT record type */
298  			__le32 nextent;	/* 4: number of extents */
299  
300  			/* data: PXD or XAD list */
301  		} freextent;
302  
303  		/*
304  		 *	? NOREDOFILE: this file is freed
305  		 *
306  		 * do not apply records which precede this record in the log
307  		 * with the same inode number.
308  		 *
309  		 * NOREDOFILE must be the first to be written at commit
310  		 * (last to be read in logredo()) - it prevents
311  		 * replay of preceding updates of all preceding generations
312  		 * of the inumber esp. the on-disk inode itself.
313  		 */
314  		struct {
315  			__le32 fileset;	/* 4: fileset number */
316  			__le32 inode;	/* 4: inode number */
317  		} noredofile;
318  
319  		/*
320  		 *	? NEWPAGE:
321  		 *
322  		 * metadata type dependent
323  		 */
324  		struct {
325  			__le32 fileset;	/* 4: fileset number */
326  			__le32 inode;	/* 4: inode number */
327  			__le32 type;	/* 4: NEWPAGE record type */
328  			pxd_t pxd;	/* 8: on-disk page pxd */
329  		} newpage;
330  
331  		/*
332  		 *	? DUMMY: filler
333  		 *
334  		 * no type-dependent information
335  		 */
336  	} log;
337  };					/* (36) */
338  
339  #define	LOGRDSIZE	(sizeof(struct lrd))
340  
341  /*
342   *	line vector descriptor
343   */
344  struct lvd {
345  	__le16 offset;
346  	__le16 length;
347  };
348  
349  
350  /*
351   *	log logical volume
352   */
353  struct jfs_log {
354  
355  	struct list_head sb_list;/*  This is used to sync metadata
356  				 *    before writing syncpt.
357  				 */
358  	struct list_head journal_list; /* Global list */
359  	struct bdev_handle *bdev_handle; /* 4: log lv pointer */
360  	int serial;		/* 4: log mount serial number */
361  
362  	s64 base;		/* @8: log extent address (inline log ) */
363  	int size;		/* 4: log size in log page (in page) */
364  	int l2bsize;		/* 4: log2 of bsize */
365  
366  	unsigned long flag;	/* 4: flag */
367  
368  	struct lbuf *lbuf_free;	/* 4: free lbufs */
369  	wait_queue_head_t free_wait;	/* 4: */
370  
371  	/* log write */
372  	int logtid;		/* 4: log tid */
373  	int page;		/* 4: page number of eol page */
374  	int eor;		/* 4: eor of last record in eol page */
375  	struct lbuf *bp;	/* 4: current log page buffer */
376  
377  	struct mutex loglock;	/* 4: log write serialization lock */
378  
379  	/* syncpt */
380  	int nextsync;		/* 4: bytes to write before next syncpt */
381  	int active;		/* 4: */
382  	wait_queue_head_t syncwait;	/* 4: */
383  
384  	/* commit */
385  	uint cflag;		/* 4: */
386  	struct list_head cqueue; /* FIFO commit queue */
387  	struct tblock *flush_tblk; /* tblk we're waiting on for flush */
388  	int gcrtc;		/* 4: GC_READY transaction count */
389  	struct tblock *gclrt;	/* 4: latest GC_READY transaction */
390  	spinlock_t gclock;	/* 4: group commit lock */
391  	int logsize;		/* 4: log data area size in byte */
392  	int lsn;		/* 4: end-of-log */
393  	int clsn;		/* 4: clsn */
394  	int syncpt;		/* 4: addr of last syncpt record */
395  	int sync;		/* 4: addr from last logsync() */
396  	struct list_head synclist;	/* 8: logsynclist anchor */
397  	spinlock_t synclock;	/* 4: synclist lock */
398  	struct lbuf *wqueue;	/* 4: log pageout queue */
399  	int count;		/* 4: count */
400  	uuid_t uuid;		/* 16: 128-bit uuid of log device */
401  
402  	int no_integrity;	/* 3: flag to disable journaling to disk */
403  };
404  
405  /*
406   * Log flag
407   */
408  #define log_INLINELOG	1
409  #define log_SYNCBARRIER	2
410  #define log_QUIESCE	3
411  #define log_FLUSH	4
412  
413  /*
414   * group commit flag
415   */
416  /* jfs_log */
417  #define logGC_PAGEOUT	0x00000001
418  
419  /* tblock/lbuf */
420  #define tblkGC_QUEUE		0x0001
421  #define tblkGC_READY		0x0002
422  #define tblkGC_COMMIT		0x0004
423  #define tblkGC_COMMITTED	0x0008
424  #define tblkGC_EOP		0x0010
425  #define tblkGC_FREE		0x0020
426  #define tblkGC_LEADER		0x0040
427  #define tblkGC_ERROR		0x0080
428  #define tblkGC_LAZY		0x0100	// D230860
429  #define tblkGC_UNLOCKED		0x0200	// D230860
430  
431  /*
432   *		log cache buffer header
433   */
434  struct lbuf {
435  	struct jfs_log *l_log;	/* 4: log associated with buffer */
436  
437  	/*
438  	 * data buffer base area
439  	 */
440  	uint l_flag;		/* 4: pageout control flags */
441  
442  	struct lbuf *l_wqnext;	/* 4: write queue link */
443  	struct lbuf *l_freelist;	/* 4: freelistlink */
444  
445  	int l_pn;		/* 4: log page number */
446  	int l_eor;		/* 4: log record eor */
447  	int l_ceor;		/* 4: committed log record eor */
448  
449  	s64 l_blkno;		/* 8: log page block number */
450  	caddr_t l_ldata;	/* 4: data page */
451  	struct page *l_page;	/* The page itself */
452  	uint l_offset;		/* Offset of l_ldata within the page */
453  
454  	wait_queue_head_t l_ioevent;	/* 4: i/o done event */
455  };
456  
457  /* Reuse l_freelist for redrive list */
458  #define l_redrive_next l_freelist
459  
460  /*
461   *	logsynclist block
462   *
463   * common logsyncblk prefix for jbuf_t and tblock
464   */
465  struct logsyncblk {
466  	u16 xflag;		/* flags */
467  	u16 flag;		/* only meaninful in tblock */
468  	lid_t lid;		/* lock id */
469  	s32 lsn;		/* log sequence number */
470  	struct list_head synclist;	/* log sync list link */
471  };
472  
473  /*
474   *	logsynclist serialization (per log)
475   */
476  
477  #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
478  #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
479  #define LOGSYNC_UNLOCK(log, flags) \
480  	spin_unlock_irqrestore(&(log)->synclock, flags)
481  
482  /* compute the difference in bytes of lsn from sync point */
483  #define logdiff(diff, lsn, log)\
484  {\
485  	diff = (lsn) - (log)->syncpt;\
486  	if (diff < 0)\
487  		diff += (log)->logsize;\
488  }
489  
490  extern int lmLogOpen(struct super_block *sb);
491  extern int lmLogClose(struct super_block *sb);
492  extern int lmLogShutdown(struct jfs_log * log);
493  extern int lmLogInit(struct jfs_log * log);
494  extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
495  extern int lmGroupCommit(struct jfs_log *, struct tblock *);
496  extern int jfsIOWait(void *);
497  extern void jfs_flush_journal(struct jfs_log * log, int wait);
498  extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
499  
500  #endif				/* _H_JFS_LOGMGR */
501