xref: /openbmc/linux/fs/jfs/jfs_dtree.c (revision 278002edb19bce2c628fafb0af936e77000f3a5b)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   *   Copyright (C) International Business Machines Corp., 2000-2004
4   */
5  
6  /*
7   *	jfs_dtree.c: directory B+-tree manager
8   *
9   * B+-tree with variable length key directory:
10   *
11   * each directory page is structured as an array of 32-byte
12   * directory entry slots initialized as a freelist
13   * to avoid search/compaction of free space at insertion.
14   * when an entry is inserted, a number of slots are allocated
15   * from the freelist as required to store variable length data
16   * of the entry; when the entry is deleted, slots of the entry
17   * are returned to freelist.
18   *
19   * leaf entry stores full name as key and file serial number
20   * (aka inode number) as data.
21   * internal/router entry stores sufffix compressed name
22   * as key and simple extent descriptor as data.
23   *
24   * each directory page maintains a sorted entry index table
25   * which stores the start slot index of sorted entries
26   * to allow binary search on the table.
27   *
28   * directory starts as a root/leaf page in on-disk inode
29   * inline data area.
30   * when it becomes full, it starts a leaf of a external extent
31   * of length of 1 block. each time the first leaf becomes full,
32   * it is extended rather than split (its size is doubled),
33   * until its length becoms 4 KBytes, from then the extent is split
34   * with new 4 Kbyte extent when it becomes full
35   * to reduce external fragmentation of small directories.
36   *
37   * blah, blah, blah, for linear scan of directory in pieces by
38   * readdir().
39   *
40   *
41   *	case-insensitive directory file system
42   *
43   * names are stored in case-sensitive way in leaf entry.
44   * but stored, searched and compared in case-insensitive (uppercase) order
45   * (i.e., both search key and entry key are folded for search/compare):
46   * (note that case-sensitive order is BROKEN in storage, e.g.,
47   *  sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad
48   *
49   *  entries which folds to the same key makes up a equivalent class
50   *  whose members are stored as contiguous cluster (may cross page boundary)
51   *  but whose order is arbitrary and acts as duplicate, e.g.,
52   *  abc, Abc, aBc, abC)
53   *
54   * once match is found at leaf, requires scan forward/backward
55   * either for, in case-insensitive search, duplicate
56   * or for, in case-sensitive search, for exact match
57   *
58   * router entry must be created/stored in case-insensitive way
59   * in internal entry:
60   * (right most key of left page and left most key of right page
61   * are folded, and its suffix compression is propagated as router
62   * key in parent)
63   * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB>
64   * should be made the router key for the split)
65   *
66   * case-insensitive search:
67   *
68   *	fold search key;
69   *
70   *	case-insensitive search of B-tree:
71   *	for internal entry, router key is already folded;
72   *	for leaf entry, fold the entry key before comparison.
73   *
74   *	if (leaf entry case-insensitive match found)
75   *		if (next entry satisfies case-insensitive match)
76   *			return EDUPLICATE;
77   *		if (prev entry satisfies case-insensitive match)
78   *			return EDUPLICATE;
79   *		return match;
80   *	else
81   *		return no match;
82   *
83   *	serialization:
84   * target directory inode lock is being held on entry/exit
85   * of all main directory service routines.
86   *
87   *	log based recovery:
88   */
89  
90  #include <linux/fs.h>
91  #include <linux/quotaops.h>
92  #include <linux/slab.h>
93  #include "jfs_incore.h"
94  #include "jfs_superblock.h"
95  #include "jfs_filsys.h"
96  #include "jfs_metapage.h"
97  #include "jfs_dmap.h"
98  #include "jfs_unicode.h"
99  #include "jfs_debug.h"
100  
101  /* dtree split parameter */
102  struct dtsplit {
103  	struct metapage *mp;
104  	s16 index;
105  	s16 nslot;
106  	struct component_name *key;
107  	ddata_t *data;
108  	struct pxdlist *pxdlist;
109  };
110  
111  #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)
112  
113  /* get page buffer for specified block address */
114  #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)				\
115  do {									\
116  	BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot);	\
117  	if (!(RC)) {							\
118  		if (((P)->header.nextindex >				\
119  		     (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \
120  		    ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) {	\
121  			BT_PUTPAGE(MP);					\
122  			jfs_error((IP)->i_sb,				\
123  				  "DT_GETPAGE: dtree page corrupt\n");	\
124  			MP = NULL;					\
125  			RC = -EIO;					\
126  		}							\
127  	}								\
128  } while (0)
129  
130  /* for consistency */
131  #define DT_PUTPAGE(MP) BT_PUTPAGE(MP)
132  
133  #define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
134  	BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)
135  
136  /*
137   * forward references
138   */
139  static int dtSplitUp(tid_t tid, struct inode *ip,
140  		     struct dtsplit * split, struct btstack * btstack);
141  
142  static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
143  		       struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);
144  
145  static int dtExtendPage(tid_t tid, struct inode *ip,
146  			struct dtsplit * split, struct btstack * btstack);
147  
148  static int dtSplitRoot(tid_t tid, struct inode *ip,
149  		       struct dtsplit * split, struct metapage ** rmpp);
150  
151  static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
152  		      dtpage_t * fp, struct btstack * btstack);
153  
154  static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);
155  
156  static int dtReadFirst(struct inode *ip, struct btstack * btstack);
157  
158  static int dtReadNext(struct inode *ip,
159  		      loff_t * offset, struct btstack * btstack);
160  
161  static int dtCompare(struct component_name * key, dtpage_t * p, int si);
162  
163  static int ciCompare(struct component_name * key, dtpage_t * p, int si,
164  		     int flag);
165  
166  static void dtGetKey(dtpage_t * p, int i, struct component_name * key,
167  		     int flag);
168  
169  static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
170  			      int ri, struct component_name * key, int flag);
171  
172  static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
173  			  ddata_t * data, struct dt_lock **);
174  
175  static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
176  			struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
177  			int do_index);
178  
179  static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);
180  
181  static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);
182  
183  static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);
184  
185  #define ciToUpper(c)	UniStrupr((c)->name)
186  
187  /*
188   *	read_index_page()
189   *
190   *	Reads a page of a directory's index table.
191   *	Having metadata mapped into the directory inode's address space
192   *	presents a multitude of problems.  We avoid this by mapping to
193   *	the absolute address space outside of the *_metapage routines
194   */
read_index_page(struct inode * inode,s64 blkno)195  static struct metapage *read_index_page(struct inode *inode, s64 blkno)
196  {
197  	int rc;
198  	s64 xaddr;
199  	int xflag;
200  	s32 xlen;
201  
202  	rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
203  	if (rc || (xaddr == 0))
204  		return NULL;
205  
206  	return read_metapage(inode, xaddr, PSIZE, 1);
207  }
208  
209  /*
210   *	get_index_page()
211   *
212   *	Same as get_index_page(), but get's a new page without reading
213   */
get_index_page(struct inode * inode,s64 blkno)214  static struct metapage *get_index_page(struct inode *inode, s64 blkno)
215  {
216  	int rc;
217  	s64 xaddr;
218  	int xflag;
219  	s32 xlen;
220  
221  	rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
222  	if (rc || (xaddr == 0))
223  		return NULL;
224  
225  	return get_metapage(inode, xaddr, PSIZE, 1);
226  }
227  
228  /*
229   *	find_index()
230   *
231   *	Returns dtree page containing directory table entry for specified
232   *	index and pointer to its entry.
233   *
234   *	mp must be released by caller.
235   */
find_index(struct inode * ip,u32 index,struct metapage ** mp,s64 * lblock)236  static struct dir_table_slot *find_index(struct inode *ip, u32 index,
237  					 struct metapage ** mp, s64 *lblock)
238  {
239  	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
240  	s64 blkno;
241  	s64 offset;
242  	int page_offset;
243  	struct dir_table_slot *slot;
244  	static int maxWarnings = 10;
245  
246  	if (index < 2) {
247  		if (maxWarnings) {
248  			jfs_warn("find_entry called with index = %d", index);
249  			maxWarnings--;
250  		}
251  		return NULL;
252  	}
253  
254  	if (index >= jfs_ip->next_index) {
255  		jfs_warn("find_entry called with index >= next_index");
256  		return NULL;
257  	}
258  
259  	if (jfs_dirtable_inline(ip)) {
260  		/*
261  		 * Inline directory table
262  		 */
263  		*mp = NULL;
264  		slot = &jfs_ip->i_dirtable[index - 2];
265  	} else {
266  		offset = (index - 2) * sizeof(struct dir_table_slot);
267  		page_offset = offset & (PSIZE - 1);
268  		blkno = ((offset + 1) >> L2PSIZE) <<
269  		    JFS_SBI(ip->i_sb)->l2nbperpage;
270  
271  		if (*mp && (*lblock != blkno)) {
272  			release_metapage(*mp);
273  			*mp = NULL;
274  		}
275  		if (!(*mp)) {
276  			*lblock = blkno;
277  			*mp = read_index_page(ip, blkno);
278  		}
279  		if (!(*mp)) {
280  			jfs_err("free_index: error reading directory table");
281  			return NULL;
282  		}
283  
284  		slot =
285  		    (struct dir_table_slot *) ((char *) (*mp)->data +
286  					       page_offset);
287  	}
288  	return slot;
289  }
290  
lock_index(tid_t tid,struct inode * ip,struct metapage * mp,u32 index)291  static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
292  			      u32 index)
293  {
294  	struct tlock *tlck;
295  	struct linelock *llck;
296  	struct lv *lv;
297  
298  	tlck = txLock(tid, ip, mp, tlckDATA);
299  	llck = (struct linelock *) tlck->lock;
300  
301  	if (llck->index >= llck->maxcnt)
302  		llck = txLinelock(llck);
303  	lv = &llck->lv[llck->index];
304  
305  	/*
306  	 *	Linelock slot size is twice the size of directory table
307  	 *	slot size.  512 entries per page.
308  	 */
309  	lv->offset = ((index - 2) & 511) >> 1;
310  	lv->length = 1;
311  	llck->index++;
312  }
313  
314  /*
315   *	add_index()
316   *
317   *	Adds an entry to the directory index table.  This is used to provide
318   *	each directory entry with a persistent index in which to resume
319   *	directory traversals
320   */
add_index(tid_t tid,struct inode * ip,s64 bn,int slot)321  static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
322  {
323  	struct super_block *sb = ip->i_sb;
324  	struct jfs_sb_info *sbi = JFS_SBI(sb);
325  	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
326  	u64 blkno;
327  	struct dir_table_slot *dirtab_slot;
328  	u32 index;
329  	struct linelock *llck;
330  	struct lv *lv;
331  	struct metapage *mp;
332  	s64 offset;
333  	uint page_offset;
334  	struct tlock *tlck;
335  	s64 xaddr;
336  
337  	ASSERT(DO_INDEX(ip));
338  
339  	if (jfs_ip->next_index < 2) {
340  		jfs_warn("add_index: next_index = %d.  Resetting!",
341  			   jfs_ip->next_index);
342  		jfs_ip->next_index = 2;
343  	}
344  
345  	index = jfs_ip->next_index++;
346  
347  	if (index <= MAX_INLINE_DIRTABLE_ENTRY) {
348  		/*
349  		 * i_size reflects size of index table, or 8 bytes per entry.
350  		 */
351  		ip->i_size = (loff_t) (index - 1) << 3;
352  
353  		/*
354  		 * dir table fits inline within inode
355  		 */
356  		dirtab_slot = &jfs_ip->i_dirtable[index-2];
357  		dirtab_slot->flag = DIR_INDEX_VALID;
358  		dirtab_slot->slot = slot;
359  		DTSaddress(dirtab_slot, bn);
360  
361  		set_cflag(COMMIT_Dirtable, ip);
362  
363  		return index;
364  	}
365  	if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
366  		struct dir_table_slot temp_table[12];
367  
368  		/*
369  		 * It's time to move the inline table to an external
370  		 * page and begin to build the xtree
371  		 */
372  		if (dquot_alloc_block(ip, sbi->nbperpage))
373  			goto clean_up;
374  		if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
375  			dquot_free_block(ip, sbi->nbperpage);
376  			goto clean_up;
377  		}
378  
379  		/*
380  		 * Save the table, we're going to overwrite it with the
381  		 * xtree root
382  		 */
383  		memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));
384  
385  		/*
386  		 * Initialize empty x-tree
387  		 */
388  		xtInitRoot(tid, ip);
389  
390  		/*
391  		 * Add the first block to the xtree
392  		 */
393  		if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) {
394  			/* This really shouldn't fail */
395  			jfs_warn("add_index: xtInsert failed!");
396  			memcpy(&jfs_ip->i_dirtable, temp_table,
397  			       sizeof (temp_table));
398  			dbFree(ip, xaddr, sbi->nbperpage);
399  			dquot_free_block(ip, sbi->nbperpage);
400  			goto clean_up;
401  		}
402  		ip->i_size = PSIZE;
403  
404  		mp = get_index_page(ip, 0);
405  		if (!mp) {
406  			jfs_err("add_index: get_metapage failed!");
407  			xtTruncate(tid, ip, 0, COMMIT_PWMAP);
408  			memcpy(&jfs_ip->i_dirtable, temp_table,
409  			       sizeof (temp_table));
410  			goto clean_up;
411  		}
412  		tlck = txLock(tid, ip, mp, tlckDATA);
413  		llck = (struct linelock *) & tlck->lock;
414  		ASSERT(llck->index == 0);
415  		lv = &llck->lv[0];
416  
417  		lv->offset = 0;
418  		lv->length = 6;	/* tlckDATA slot size is 16 bytes */
419  		llck->index++;
420  
421  		memcpy(mp->data, temp_table, sizeof(temp_table));
422  
423  		mark_metapage_dirty(mp);
424  		release_metapage(mp);
425  
426  		/*
427  		 * Logging is now directed by xtree tlocks
428  		 */
429  		clear_cflag(COMMIT_Dirtable, ip);
430  	}
431  
432  	offset = (index - 2) * sizeof(struct dir_table_slot);
433  	page_offset = offset & (PSIZE - 1);
434  	blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage;
435  	if (page_offset == 0) {
436  		/*
437  		 * This will be the beginning of a new page
438  		 */
439  		xaddr = 0;
440  		if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) {
441  			jfs_warn("add_index: xtInsert failed!");
442  			goto clean_up;
443  		}
444  		ip->i_size += PSIZE;
445  
446  		if ((mp = get_index_page(ip, blkno)))
447  			memset(mp->data, 0, PSIZE);	/* Just looks better */
448  		else
449  			xtTruncate(tid, ip, offset, COMMIT_PWMAP);
450  	} else
451  		mp = read_index_page(ip, blkno);
452  
453  	if (!mp) {
454  		jfs_err("add_index: get/read_metapage failed!");
455  		goto clean_up;
456  	}
457  
458  	lock_index(tid, ip, mp, index);
459  
460  	dirtab_slot =
461  	    (struct dir_table_slot *) ((char *) mp->data + page_offset);
462  	dirtab_slot->flag = DIR_INDEX_VALID;
463  	dirtab_slot->slot = slot;
464  	DTSaddress(dirtab_slot, bn);
465  
466  	mark_metapage_dirty(mp);
467  	release_metapage(mp);
468  
469  	return index;
470  
471        clean_up:
472  
473  	jfs_ip->next_index--;
474  
475  	return 0;
476  }
477  
478  /*
479   *	free_index()
480   *
481   *	Marks an entry to the directory index table as free.
482   */
free_index(tid_t tid,struct inode * ip,u32 index,u32 next)483  static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
484  {
485  	struct dir_table_slot *dirtab_slot;
486  	s64 lblock;
487  	struct metapage *mp = NULL;
488  
489  	dirtab_slot = find_index(ip, index, &mp, &lblock);
490  
491  	if (!dirtab_slot)
492  		return;
493  
494  	dirtab_slot->flag = DIR_INDEX_FREE;
495  	dirtab_slot->slot = dirtab_slot->addr1 = 0;
496  	dirtab_slot->addr2 = cpu_to_le32(next);
497  
498  	if (mp) {
499  		lock_index(tid, ip, mp, index);
500  		mark_metapage_dirty(mp);
501  		release_metapage(mp);
502  	} else
503  		set_cflag(COMMIT_Dirtable, ip);
504  }
505  
506  /*
507   *	modify_index()
508   *
509   *	Changes an entry in the directory index table
510   */
modify_index(tid_t tid,struct inode * ip,u32 index,s64 bn,int slot,struct metapage ** mp,s64 * lblock)511  static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
512  			 int slot, struct metapage ** mp, s64 *lblock)
513  {
514  	struct dir_table_slot *dirtab_slot;
515  
516  	dirtab_slot = find_index(ip, index, mp, lblock);
517  
518  	if (!dirtab_slot)
519  		return;
520  
521  	DTSaddress(dirtab_slot, bn);
522  	dirtab_slot->slot = slot;
523  
524  	if (*mp) {
525  		lock_index(tid, ip, *mp, index);
526  		mark_metapage_dirty(*mp);
527  	} else
528  		set_cflag(COMMIT_Dirtable, ip);
529  }
530  
531  /*
532   *	read_index()
533   *
534   *	reads a directory table slot
535   */
read_index(struct inode * ip,u32 index,struct dir_table_slot * dirtab_slot)536  static int read_index(struct inode *ip, u32 index,
537  		     struct dir_table_slot * dirtab_slot)
538  {
539  	s64 lblock;
540  	struct metapage *mp = NULL;
541  	struct dir_table_slot *slot;
542  
543  	slot = find_index(ip, index, &mp, &lblock);
544  	if (!slot) {
545  		return -EIO;
546  	}
547  
548  	memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot));
549  
550  	if (mp)
551  		release_metapage(mp);
552  
553  	return 0;
554  }
555  
556  /*
557   *	dtSearch()
558   *
559   * function:
560   *	Search for the entry with specified key
561   *
562   * parameter:
563   *
564   * return: 0 - search result on stack, leaf page pinned;
565   *	   errno - I/O error
566   */
dtSearch(struct inode * ip,struct component_name * key,ino_t * data,struct btstack * btstack,int flag)567  int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
568  	     struct btstack * btstack, int flag)
569  {
570  	int rc = 0;
571  	int cmp = 1;		/* init for empty page */
572  	s64 bn;
573  	struct metapage *mp;
574  	dtpage_t *p;
575  	s8 *stbl;
576  	int base, index, lim;
577  	struct btframe *btsp;
578  	pxd_t *pxd;
579  	int psize = 288;	/* initial in-line directory */
580  	ino_t inumber;
581  	struct component_name ciKey;
582  	struct super_block *sb = ip->i_sb;
583  
584  	ciKey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t),
585  				   GFP_NOFS);
586  	if (!ciKey.name) {
587  		rc = -ENOMEM;
588  		goto dtSearch_Exit2;
589  	}
590  
591  
592  	/* uppercase search key for c-i directory */
593  	UniStrcpy(ciKey.name, key->name);
594  	ciKey.namlen = key->namlen;
595  
596  	/* only uppercase if case-insensitive support is on */
597  	if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
598  		ciToUpper(&ciKey);
599  	}
600  	BT_CLR(btstack);	/* reset stack */
601  
602  	/* init level count for max pages to split */
603  	btstack->nsplit = 1;
604  
605  	/*
606  	 *	search down tree from root:
607  	 *
608  	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
609  	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
610  	 *
611  	 * if entry with search key K is not found
612  	 * internal page search find the entry with largest key Ki
613  	 * less than K which point to the child page to search;
614  	 * leaf page search find the entry with smallest key Kj
615  	 * greater than K so that the returned index is the position of
616  	 * the entry to be shifted right for insertion of new entry.
617  	 * for empty tree, search key is greater than any key of the tree.
618  	 *
619  	 * by convention, root bn = 0.
620  	 */
621  	for (bn = 0;;) {
622  		/* get/pin the page to search */
623  		DT_GETPAGE(ip, bn, mp, psize, p, rc);
624  		if (rc)
625  			goto dtSearch_Exit1;
626  
627  		/* get sorted entry table of the page */
628  		stbl = DT_GETSTBL(p);
629  
630  		/*
631  		 * binary search with search key K on the current page.
632  		 */
633  		for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
634  			index = base + (lim >> 1);
635  
636  			if (stbl[index] < 0) {
637  				rc = -EIO;
638  				goto out;
639  			}
640  
641  			if (p->header.flag & BT_LEAF) {
642  				/* uppercase leaf name to compare */
643  				cmp =
644  				    ciCompare(&ciKey, p, stbl[index],
645  					      JFS_SBI(sb)->mntflag);
646  			} else {
647  				/* router key is in uppercase */
648  
649  				cmp = dtCompare(&ciKey, p, stbl[index]);
650  
651  
652  			}
653  			if (cmp == 0) {
654  				/*
655  				 *	search hit
656  				 */
657  				/* search hit - leaf page:
658  				 * return the entry found
659  				 */
660  				if (p->header.flag & BT_LEAF) {
661  					inumber = le32_to_cpu(
662  			((struct ldtentry *) & p->slot[stbl[index]])->inumber);
663  
664  					/*
665  					 * search for JFS_LOOKUP
666  					 */
667  					if (flag == JFS_LOOKUP) {
668  						*data = inumber;
669  						rc = 0;
670  						goto out;
671  					}
672  
673  					/*
674  					 * search for JFS_CREATE
675  					 */
676  					if (flag == JFS_CREATE) {
677  						*data = inumber;
678  						rc = -EEXIST;
679  						goto out;
680  					}
681  
682  					/*
683  					 * search for JFS_REMOVE or JFS_RENAME
684  					 */
685  					if ((flag == JFS_REMOVE ||
686  					     flag == JFS_RENAME) &&
687  					    *data != inumber) {
688  						rc = -ESTALE;
689  						goto out;
690  					}
691  
692  					/*
693  					 * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME
694  					 */
695  					/* save search result */
696  					*data = inumber;
697  					btsp = btstack->top;
698  					btsp->bn = bn;
699  					btsp->index = index;
700  					btsp->mp = mp;
701  
702  					rc = 0;
703  					goto dtSearch_Exit1;
704  				}
705  
706  				/* search hit - internal page:
707  				 * descend/search its child page
708  				 */
709  				goto getChild;
710  			}
711  
712  			if (cmp > 0) {
713  				base = index + 1;
714  				--lim;
715  			}
716  		}
717  
718  		/*
719  		 *	search miss
720  		 *
721  		 * base is the smallest index with key (Kj) greater than
722  		 * search key (K) and may be zero or (maxindex + 1) index.
723  		 */
724  		/*
725  		 * search miss - leaf page
726  		 *
727  		 * return location of entry (base) where new entry with
728  		 * search key K is to be inserted.
729  		 */
730  		if (p->header.flag & BT_LEAF) {
731  			/*
732  			 * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
733  			 */
734  			if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
735  			    flag == JFS_RENAME) {
736  				rc = -ENOENT;
737  				goto out;
738  			}
739  
740  			/*
741  			 * search for JFS_CREATE|JFS_FINDDIR:
742  			 *
743  			 * save search result
744  			 */
745  			*data = 0;
746  			btsp = btstack->top;
747  			btsp->bn = bn;
748  			btsp->index = base;
749  			btsp->mp = mp;
750  
751  			rc = 0;
752  			goto dtSearch_Exit1;
753  		}
754  
755  		/*
756  		 * search miss - internal page
757  		 *
758  		 * if base is non-zero, decrement base by one to get the parent
759  		 * entry of the child page to search.
760  		 */
761  		index = base ? base - 1 : base;
762  
763  		/*
764  		 * go down to child page
765  		 */
766  	      getChild:
767  		/* update max. number of pages to split */
768  		if (BT_STACK_FULL(btstack)) {
769  			/* Something's corrupted, mark filesystem dirty so
770  			 * chkdsk will fix it.
771  			 */
772  			jfs_error(sb, "stack overrun!\n");
773  			BT_STACK_DUMP(btstack);
774  			rc = -EIO;
775  			goto out;
776  		}
777  		btstack->nsplit++;
778  
779  		/* push (bn, index) of the parent page/entry */
780  		BT_PUSH(btstack, bn, index);
781  
782  		/* get the child page block number */
783  		pxd = (pxd_t *) & p->slot[stbl[index]];
784  		bn = addressPXD(pxd);
785  		psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
786  
787  		/* unpin the parent page */
788  		DT_PUTPAGE(mp);
789  	}
790  
791        out:
792  	DT_PUTPAGE(mp);
793  
794        dtSearch_Exit1:
795  
796  	kfree(ciKey.name);
797  
798        dtSearch_Exit2:
799  
800  	return rc;
801  }
802  
803  
804  /*
805   *	dtInsert()
806   *
807   * function: insert an entry to directory tree
808   *
809   * parameter:
810   *
811   * return: 0 - success;
812   *	   errno - failure;
813   */
dtInsert(tid_t tid,struct inode * ip,struct component_name * name,ino_t * fsn,struct btstack * btstack)814  int dtInsert(tid_t tid, struct inode *ip,
815  	 struct component_name * name, ino_t * fsn, struct btstack * btstack)
816  {
817  	int rc = 0;
818  	struct metapage *mp;	/* meta-page buffer */
819  	dtpage_t *p;		/* base B+-tree index page */
820  	s64 bn;
821  	int index;
822  	struct dtsplit split;	/* split information */
823  	ddata_t data;
824  	struct dt_lock *dtlck;
825  	int n;
826  	struct tlock *tlck;
827  	struct lv *lv;
828  
829  	/*
830  	 *	retrieve search result
831  	 *
832  	 * dtSearch() returns (leaf page pinned, index at which to insert).
833  	 * n.b. dtSearch() may return index of (maxindex + 1) of
834  	 * the full page.
835  	 */
836  	DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
837  	if (p->header.freelist == 0)
838  		return -EINVAL;
839  
840  	/*
841  	 *	insert entry for new key
842  	 */
843  	if (DO_INDEX(ip)) {
844  		if (JFS_IP(ip)->next_index == DIREND) {
845  			DT_PUTPAGE(mp);
846  			return -EMLINK;
847  		}
848  		n = NDTLEAF(name->namlen);
849  		data.leaf.tid = tid;
850  		data.leaf.ip = ip;
851  	} else {
852  		n = NDTLEAF_LEGACY(name->namlen);
853  		data.leaf.ip = NULL;	/* signifies legacy directory format */
854  	}
855  	data.leaf.ino = *fsn;
856  
857  	/*
858  	 *	leaf page does not have enough room for new entry:
859  	 *
860  	 *	extend/split the leaf page;
861  	 *
862  	 * dtSplitUp() will insert the entry and unpin the leaf page.
863  	 */
864  	if (n > p->header.freecnt) {
865  		split.mp = mp;
866  		split.index = index;
867  		split.nslot = n;
868  		split.key = name;
869  		split.data = &data;
870  		rc = dtSplitUp(tid, ip, &split, btstack);
871  		return rc;
872  	}
873  
874  	/*
875  	 *	leaf page does have enough room for new entry:
876  	 *
877  	 *	insert the new data entry into the leaf page;
878  	 */
879  	BT_MARK_DIRTY(mp, ip);
880  	/*
881  	 * acquire a transaction lock on the leaf page
882  	 */
883  	tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
884  	dtlck = (struct dt_lock *) & tlck->lock;
885  	ASSERT(dtlck->index == 0);
886  	lv = & dtlck->lv[0];
887  
888  	/* linelock header */
889  	lv->offset = 0;
890  	lv->length = 1;
891  	dtlck->index++;
892  
893  	dtInsertEntry(p, index, name, &data, &dtlck);
894  
895  	/* linelock stbl of non-root leaf page */
896  	if (!(p->header.flag & BT_ROOT)) {
897  		if (dtlck->index >= dtlck->maxcnt)
898  			dtlck = (struct dt_lock *) txLinelock(dtlck);
899  		lv = & dtlck->lv[dtlck->index];
900  		n = index >> L2DTSLOTSIZE;
901  		lv->offset = p->header.stblindex + n;
902  		lv->length =
903  		    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
904  		dtlck->index++;
905  	}
906  
907  	/* unpin the leaf page */
908  	DT_PUTPAGE(mp);
909  
910  	return 0;
911  }
912  
913  
914  /*
915   *	dtSplitUp()
916   *
917   * function: propagate insertion bottom up;
918   *
919   * parameter:
920   *
921   * return: 0 - success;
922   *	   errno - failure;
923   *	leaf page unpinned;
924   */
dtSplitUp(tid_t tid,struct inode * ip,struct dtsplit * split,struct btstack * btstack)925  static int dtSplitUp(tid_t tid,
926  	  struct inode *ip, struct dtsplit * split, struct btstack * btstack)
927  {
928  	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
929  	int rc = 0;
930  	struct metapage *smp;
931  	dtpage_t *sp;		/* split page */
932  	struct metapage *rmp;
933  	dtpage_t *rp;		/* new right page split from sp */
934  	pxd_t rpxd;		/* new right page extent descriptor */
935  	struct metapage *lmp;
936  	dtpage_t *lp;		/* left child page */
937  	int skip;		/* index of entry of insertion */
938  	struct btframe *parent;	/* parent page entry on traverse stack */
939  	s64 xaddr, nxaddr;
940  	int xlen, xsize;
941  	struct pxdlist pxdlist;
942  	pxd_t *pxd;
943  	struct component_name key = { 0, NULL };
944  	ddata_t *data = split->data;
945  	int n;
946  	struct dt_lock *dtlck;
947  	struct tlock *tlck;
948  	struct lv *lv;
949  	int quota_allocation = 0;
950  
951  	/* get split page */
952  	smp = split->mp;
953  	sp = DT_PAGE(ip, smp);
954  
955  	key.name = kmalloc_array(JFS_NAME_MAX + 2, sizeof(wchar_t), GFP_NOFS);
956  	if (!key.name) {
957  		DT_PUTPAGE(smp);
958  		rc = -ENOMEM;
959  		goto dtSplitUp_Exit;
960  	}
961  
962  	/*
963  	 *	split leaf page
964  	 *
965  	 * The split routines insert the new entry, and
966  	 * acquire txLock as appropriate.
967  	 */
968  	/*
969  	 *	split root leaf page:
970  	 */
971  	if (sp->header.flag & BT_ROOT) {
972  		/*
973  		 * allocate a single extent child page
974  		 */
975  		xlen = 1;
976  		n = sbi->bsize >> L2DTSLOTSIZE;
977  		n -= (n + 31) >> L2DTSLOTSIZE;	/* stbl size */
978  		n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */
979  		if (n <= split->nslot)
980  			xlen++;
981  		if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) {
982  			DT_PUTPAGE(smp);
983  			goto freeKeyName;
984  		}
985  
986  		pxdlist.maxnpxd = 1;
987  		pxdlist.npxd = 0;
988  		pxd = &pxdlist.pxd[0];
989  		PXDaddress(pxd, xaddr);
990  		PXDlength(pxd, xlen);
991  		split->pxdlist = &pxdlist;
992  		rc = dtSplitRoot(tid, ip, split, &rmp);
993  
994  		if (rc)
995  			dbFree(ip, xaddr, xlen);
996  		else
997  			DT_PUTPAGE(rmp);
998  
999  		DT_PUTPAGE(smp);
1000  
1001  		if (!DO_INDEX(ip))
1002  			ip->i_size = xlen << sbi->l2bsize;
1003  
1004  		goto freeKeyName;
1005  	}
1006  
1007  	/*
1008  	 *	extend first leaf page
1009  	 *
1010  	 * extend the 1st extent if less than buffer page size
1011  	 * (dtExtendPage() reurns leaf page unpinned)
1012  	 */
1013  	pxd = &sp->header.self;
1014  	xlen = lengthPXD(pxd);
1015  	xsize = xlen << sbi->l2bsize;
1016  	if (xsize < PSIZE) {
1017  		xaddr = addressPXD(pxd);
1018  		n = xsize >> L2DTSLOTSIZE;
1019  		n -= (n + 31) >> L2DTSLOTSIZE;	/* stbl size */
1020  		if ((n + sp->header.freecnt) <= split->nslot)
1021  			n = xlen + (xlen << 1);
1022  		else
1023  			n = xlen;
1024  
1025  		/* Allocate blocks to quota. */
1026  		rc = dquot_alloc_block(ip, n);
1027  		if (rc)
1028  			goto extendOut;
1029  		quota_allocation += n;
1030  
1031  		if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
1032  				    (s64) n, &nxaddr)))
1033  			goto extendOut;
1034  
1035  		pxdlist.maxnpxd = 1;
1036  		pxdlist.npxd = 0;
1037  		pxd = &pxdlist.pxd[0];
1038  		PXDaddress(pxd, nxaddr);
1039  		PXDlength(pxd, xlen + n);
1040  		split->pxdlist = &pxdlist;
1041  		if ((rc = dtExtendPage(tid, ip, split, btstack))) {
1042  			nxaddr = addressPXD(pxd);
1043  			if (xaddr != nxaddr) {
1044  				/* free relocated extent */
1045  				xlen = lengthPXD(pxd);
1046  				dbFree(ip, nxaddr, (s64) xlen);
1047  			} else {
1048  				/* free extended delta */
1049  				xlen = lengthPXD(pxd) - n;
1050  				xaddr = addressPXD(pxd) + xlen;
1051  				dbFree(ip, xaddr, (s64) n);
1052  			}
1053  		} else if (!DO_INDEX(ip))
1054  			ip->i_size = lengthPXD(pxd) << sbi->l2bsize;
1055  
1056  
1057  	      extendOut:
1058  		DT_PUTPAGE(smp);
1059  		goto freeKeyName;
1060  	}
1061  
1062  	/*
1063  	 *	split leaf page <sp> into <sp> and a new right page <rp>.
1064  	 *
1065  	 * return <rp> pinned and its extent descriptor <rpxd>
1066  	 */
1067  	/*
1068  	 * allocate new directory page extent and
1069  	 * new index page(s) to cover page split(s)
1070  	 *
1071  	 * allocation hint: ?
1072  	 */
1073  	n = btstack->nsplit;
1074  	pxdlist.maxnpxd = pxdlist.npxd = 0;
1075  	xlen = sbi->nbperpage;
1076  	for (pxd = pxdlist.pxd; n > 0; n--, pxd++) {
1077  		if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) {
1078  			PXDaddress(pxd, xaddr);
1079  			PXDlength(pxd, xlen);
1080  			pxdlist.maxnpxd++;
1081  			continue;
1082  		}
1083  
1084  		DT_PUTPAGE(smp);
1085  
1086  		/* undo allocation */
1087  		goto splitOut;
1088  	}
1089  
1090  	split->pxdlist = &pxdlist;
1091  	if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) {
1092  		DT_PUTPAGE(smp);
1093  
1094  		/* undo allocation */
1095  		goto splitOut;
1096  	}
1097  
1098  	if (!DO_INDEX(ip))
1099  		ip->i_size += PSIZE;
1100  
1101  	/*
1102  	 * propagate up the router entry for the leaf page just split
1103  	 *
1104  	 * insert a router entry for the new page into the parent page,
1105  	 * propagate the insert/split up the tree by walking back the stack
1106  	 * of (bn of parent page, index of child page entry in parent page)
1107  	 * that were traversed during the search for the page that split.
1108  	 *
1109  	 * the propagation of insert/split up the tree stops if the root
1110  	 * splits or the page inserted into doesn't have to split to hold
1111  	 * the new entry.
1112  	 *
1113  	 * the parent entry for the split page remains the same, and
1114  	 * a new entry is inserted at its right with the first key and
1115  	 * block number of the new right page.
1116  	 *
1117  	 * There are a maximum of 4 pages pinned at any time:
1118  	 * two children, left parent and right parent (when the parent splits).
1119  	 * keep the child pages pinned while working on the parent.
1120  	 * make sure that all pins are released at exit.
1121  	 */
1122  	while ((parent = BT_POP(btstack)) != NULL) {
1123  		/* parent page specified by stack frame <parent> */
1124  
1125  		/* keep current child pages (<lp>, <rp>) pinned */
1126  		lmp = smp;
1127  		lp = sp;
1128  
1129  		/*
1130  		 * insert router entry in parent for new right child page <rp>
1131  		 */
1132  		/* get the parent page <sp> */
1133  		DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1134  		if (rc) {
1135  			DT_PUTPAGE(lmp);
1136  			DT_PUTPAGE(rmp);
1137  			goto splitOut;
1138  		}
1139  
1140  		/*
1141  		 * The new key entry goes ONE AFTER the index of parent entry,
1142  		 * because the split was to the right.
1143  		 */
1144  		skip = parent->index + 1;
1145  
1146  		/*
1147  		 * compute the key for the router entry
1148  		 *
1149  		 * key suffix compression:
1150  		 * for internal pages that have leaf pages as children,
1151  		 * retain only what's needed to distinguish between
1152  		 * the new entry and the entry on the page to its left.
1153  		 * If the keys compare equal, retain the entire key.
1154  		 *
1155  		 * note that compression is performed only at computing
1156  		 * router key at the lowest internal level.
1157  		 * further compression of the key between pairs of higher
1158  		 * level internal pages loses too much information and
1159  		 * the search may fail.
1160  		 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,}
1161  		 * results in two adjacent parent entries (a)(xx).
1162  		 * if split occurs between these two entries, and
1163  		 * if compression is applied, the router key of parent entry
1164  		 * of right page (x) will divert search for x into right
1165  		 * subtree and miss x in the left subtree.)
1166  		 *
1167  		 * the entire key must be retained for the next-to-leftmost
1168  		 * internal key at any level of the tree, or search may fail
1169  		 * (e.g., ?)
1170  		 */
1171  		switch (rp->header.flag & BT_TYPE) {
1172  		case BT_LEAF:
1173  			/*
1174  			 * compute the length of prefix for suffix compression
1175  			 * between last entry of left page and first entry
1176  			 * of right page
1177  			 */
1178  			if ((sp->header.flag & BT_ROOT && skip > 1) ||
1179  			    sp->header.prev != 0 || skip > 1) {
1180  				/* compute uppercase router prefix key */
1181  				rc = ciGetLeafPrefixKey(lp,
1182  							lp->header.nextindex-1,
1183  							rp, 0, &key,
1184  							sbi->mntflag);
1185  				if (rc) {
1186  					DT_PUTPAGE(lmp);
1187  					DT_PUTPAGE(rmp);
1188  					DT_PUTPAGE(smp);
1189  					goto splitOut;
1190  				}
1191  			} else {
1192  				/* next to leftmost entry of
1193  				   lowest internal level */
1194  
1195  				/* compute uppercase router key */
1196  				dtGetKey(rp, 0, &key, sbi->mntflag);
1197  				key.name[key.namlen] = 0;
1198  
1199  				if ((sbi->mntflag & JFS_OS2) == JFS_OS2)
1200  					ciToUpper(&key);
1201  			}
1202  
1203  			n = NDTINTERNAL(key.namlen);
1204  			break;
1205  
1206  		case BT_INTERNAL:
1207  			dtGetKey(rp, 0, &key, sbi->mntflag);
1208  			n = NDTINTERNAL(key.namlen);
1209  			break;
1210  
1211  		default:
1212  			jfs_err("dtSplitUp(): UFO!");
1213  			break;
1214  		}
1215  
1216  		/* unpin left child page */
1217  		DT_PUTPAGE(lmp);
1218  
1219  		/*
1220  		 * compute the data for the router entry
1221  		 */
1222  		data->xd = rpxd;	/* child page xd */
1223  
1224  		/*
1225  		 * parent page is full - split the parent page
1226  		 */
1227  		if (n > sp->header.freecnt) {
1228  			/* init for parent page split */
1229  			split->mp = smp;
1230  			split->index = skip;	/* index at insert */
1231  			split->nslot = n;
1232  			split->key = &key;
1233  			/* split->data = data; */
1234  
1235  			/* unpin right child page */
1236  			DT_PUTPAGE(rmp);
1237  
1238  			/* The split routines insert the new entry,
1239  			 * acquire txLock as appropriate.
1240  			 * return <rp> pinned and its block number <rbn>.
1241  			 */
1242  			rc = (sp->header.flag & BT_ROOT) ?
1243  			    dtSplitRoot(tid, ip, split, &rmp) :
1244  			    dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd);
1245  			if (rc) {
1246  				DT_PUTPAGE(smp);
1247  				goto splitOut;
1248  			}
1249  
1250  			/* smp and rmp are pinned */
1251  		}
1252  		/*
1253  		 * parent page is not full - insert router entry in parent page
1254  		 */
1255  		else {
1256  			BT_MARK_DIRTY(smp, ip);
1257  			/*
1258  			 * acquire a transaction lock on the parent page
1259  			 */
1260  			tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1261  			dtlck = (struct dt_lock *) & tlck->lock;
1262  			ASSERT(dtlck->index == 0);
1263  			lv = & dtlck->lv[0];
1264  
1265  			/* linelock header */
1266  			lv->offset = 0;
1267  			lv->length = 1;
1268  			dtlck->index++;
1269  
1270  			/* linelock stbl of non-root parent page */
1271  			if (!(sp->header.flag & BT_ROOT)) {
1272  				lv++;
1273  				n = skip >> L2DTSLOTSIZE;
1274  				lv->offset = sp->header.stblindex + n;
1275  				lv->length =
1276  				    ((sp->header.nextindex -
1277  				      1) >> L2DTSLOTSIZE) - n + 1;
1278  				dtlck->index++;
1279  			}
1280  
1281  			dtInsertEntry(sp, skip, &key, data, &dtlck);
1282  
1283  			/* exit propagate up */
1284  			break;
1285  		}
1286  	}
1287  
1288  	/* unpin current split and its right page */
1289  	DT_PUTPAGE(smp);
1290  	DT_PUTPAGE(rmp);
1291  
1292  	/*
1293  	 * free remaining extents allocated for split
1294  	 */
1295        splitOut:
1296  	n = pxdlist.npxd;
1297  	pxd = &pxdlist.pxd[n];
1298  	for (; n < pxdlist.maxnpxd; n++, pxd++)
1299  		dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd));
1300  
1301        freeKeyName:
1302  	kfree(key.name);
1303  
1304  	/* Rollback quota allocation */
1305  	if (rc && quota_allocation)
1306  		dquot_free_block(ip, quota_allocation);
1307  
1308        dtSplitUp_Exit:
1309  
1310  	return rc;
1311  }
1312  
1313  
1314  /*
1315   *	dtSplitPage()
1316   *
1317   * function: Split a non-root page of a btree.
1318   *
1319   * parameter:
1320   *
1321   * return: 0 - success;
1322   *	   errno - failure;
1323   *	return split and new page pinned;
1324   */
dtSplitPage(tid_t tid,struct inode * ip,struct dtsplit * split,struct metapage ** rmpp,dtpage_t ** rpp,pxd_t * rpxdp)1325  static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1326  	    struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp)
1327  {
1328  	int rc = 0;
1329  	struct metapage *smp;
1330  	dtpage_t *sp;
1331  	struct metapage *rmp;
1332  	dtpage_t *rp;		/* new right page allocated */
1333  	s64 rbn;		/* new right page block number */
1334  	struct metapage *mp;
1335  	dtpage_t *p;
1336  	s64 nextbn;
1337  	struct pxdlist *pxdlist;
1338  	pxd_t *pxd;
1339  	int skip, nextindex, half, left, nxt, off, si;
1340  	struct ldtentry *ldtentry;
1341  	struct idtentry *idtentry;
1342  	u8 *stbl;
1343  	struct dtslot *f;
1344  	int fsi, stblsize;
1345  	int n;
1346  	struct dt_lock *sdtlck, *rdtlck;
1347  	struct tlock *tlck;
1348  	struct dt_lock *dtlck;
1349  	struct lv *slv, *rlv, *lv;
1350  
1351  	/* get split page */
1352  	smp = split->mp;
1353  	sp = DT_PAGE(ip, smp);
1354  
1355  	/*
1356  	 * allocate the new right page for the split
1357  	 */
1358  	pxdlist = split->pxdlist;
1359  	pxd = &pxdlist->pxd[pxdlist->npxd];
1360  	pxdlist->npxd++;
1361  	rbn = addressPXD(pxd);
1362  	rmp = get_metapage(ip, rbn, PSIZE, 1);
1363  	if (rmp == NULL)
1364  		return -EIO;
1365  
1366  	/* Allocate blocks to quota. */
1367  	rc = dquot_alloc_block(ip, lengthPXD(pxd));
1368  	if (rc) {
1369  		release_metapage(rmp);
1370  		return rc;
1371  	}
1372  
1373  	jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1374  
1375  	BT_MARK_DIRTY(rmp, ip);
1376  	/*
1377  	 * acquire a transaction lock on the new right page
1378  	 */
1379  	tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1380  	rdtlck = (struct dt_lock *) & tlck->lock;
1381  
1382  	rp = (dtpage_t *) rmp->data;
1383  	*rpp = rp;
1384  	rp->header.self = *pxd;
1385  
1386  	BT_MARK_DIRTY(smp, ip);
1387  	/*
1388  	 * acquire a transaction lock on the split page
1389  	 *
1390  	 * action:
1391  	 */
1392  	tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1393  	sdtlck = (struct dt_lock *) & tlck->lock;
1394  
1395  	/* linelock header of split page */
1396  	ASSERT(sdtlck->index == 0);
1397  	slv = & sdtlck->lv[0];
1398  	slv->offset = 0;
1399  	slv->length = 1;
1400  	sdtlck->index++;
1401  
1402  	/*
1403  	 * initialize/update sibling pointers between sp and rp
1404  	 */
1405  	nextbn = le64_to_cpu(sp->header.next);
1406  	rp->header.next = cpu_to_le64(nextbn);
1407  	rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1408  	sp->header.next = cpu_to_le64(rbn);
1409  
1410  	/*
1411  	 * initialize new right page
1412  	 */
1413  	rp->header.flag = sp->header.flag;
1414  
1415  	/* compute sorted entry table at start of extent data area */
1416  	rp->header.nextindex = 0;
1417  	rp->header.stblindex = 1;
1418  
1419  	n = PSIZE >> L2DTSLOTSIZE;
1420  	rp->header.maxslot = n;
1421  	stblsize = (n + 31) >> L2DTSLOTSIZE;	/* in unit of slot */
1422  
1423  	/* init freelist */
1424  	fsi = rp->header.stblindex + stblsize;
1425  	rp->header.freelist = fsi;
1426  	rp->header.freecnt = rp->header.maxslot - fsi;
1427  
1428  	/*
1429  	 *	sequential append at tail: append without split
1430  	 *
1431  	 * If splitting the last page on a level because of appending
1432  	 * a entry to it (skip is maxentry), it's likely that the access is
1433  	 * sequential. Adding an empty page on the side of the level is less
1434  	 * work and can push the fill factor much higher than normal.
1435  	 * If we're wrong it's no big deal, we'll just do the split the right
1436  	 * way next time.
1437  	 * (It may look like it's equally easy to do a similar hack for
1438  	 * reverse sorted data, that is, split the tree left,
1439  	 * but it's not. Be my guest.)
1440  	 */
1441  	if (nextbn == 0 && split->index == sp->header.nextindex) {
1442  		/* linelock header + stbl (first slot) of new page */
1443  		rlv = & rdtlck->lv[rdtlck->index];
1444  		rlv->offset = 0;
1445  		rlv->length = 2;
1446  		rdtlck->index++;
1447  
1448  		/*
1449  		 * initialize freelist of new right page
1450  		 */
1451  		f = &rp->slot[fsi];
1452  		for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1453  			f->next = fsi;
1454  		f->next = -1;
1455  
1456  		/* insert entry at the first entry of the new right page */
1457  		dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);
1458  
1459  		goto out;
1460  	}
1461  
1462  	/*
1463  	 *	non-sequential insert (at possibly middle page)
1464  	 */
1465  
1466  	/*
1467  	 * update prev pointer of previous right sibling page;
1468  	 */
1469  	if (nextbn != 0) {
1470  		DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1471  		if (rc) {
1472  			discard_metapage(rmp);
1473  			return rc;
1474  		}
1475  
1476  		BT_MARK_DIRTY(mp, ip);
1477  		/*
1478  		 * acquire a transaction lock on the next page
1479  		 */
1480  		tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
1481  		jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p",
1482  			tlck, ip, mp);
1483  		dtlck = (struct dt_lock *) & tlck->lock;
1484  
1485  		/* linelock header of previous right sibling page */
1486  		lv = & dtlck->lv[dtlck->index];
1487  		lv->offset = 0;
1488  		lv->length = 1;
1489  		dtlck->index++;
1490  
1491  		p->header.prev = cpu_to_le64(rbn);
1492  
1493  		DT_PUTPAGE(mp);
1494  	}
1495  
1496  	/*
1497  	 * split the data between the split and right pages.
1498  	 */
1499  	skip = split->index;
1500  	half = (PSIZE >> L2DTSLOTSIZE) >> 1;	/* swag */
1501  	left = 0;
1502  
1503  	/*
1504  	 *	compute fill factor for split pages
1505  	 *
1506  	 * <nxt> traces the next entry to move to rp
1507  	 * <off> traces the next entry to stay in sp
1508  	 */
1509  	stbl = (u8 *) & sp->slot[sp->header.stblindex];
1510  	nextindex = sp->header.nextindex;
1511  	for (nxt = off = 0; nxt < nextindex; ++off) {
1512  		if (off == skip)
1513  			/* check for fill factor with new entry size */
1514  			n = split->nslot;
1515  		else {
1516  			si = stbl[nxt];
1517  			switch (sp->header.flag & BT_TYPE) {
1518  			case BT_LEAF:
1519  				ldtentry = (struct ldtentry *) & sp->slot[si];
1520  				if (DO_INDEX(ip))
1521  					n = NDTLEAF(ldtentry->namlen);
1522  				else
1523  					n = NDTLEAF_LEGACY(ldtentry->
1524  							   namlen);
1525  				break;
1526  
1527  			case BT_INTERNAL:
1528  				idtentry = (struct idtentry *) & sp->slot[si];
1529  				n = NDTINTERNAL(idtentry->namlen);
1530  				break;
1531  
1532  			default:
1533  				break;
1534  			}
1535  
1536  			++nxt;	/* advance to next entry to move in sp */
1537  		}
1538  
1539  		left += n;
1540  		if (left >= half)
1541  			break;
1542  	}
1543  
1544  	/* <nxt> poins to the 1st entry to move */
1545  
1546  	/*
1547  	 *	move entries to right page
1548  	 *
1549  	 * dtMoveEntry() initializes rp and reserves entry for insertion
1550  	 *
1551  	 * split page moved out entries are linelocked;
1552  	 * new/right page moved in entries are linelocked;
1553  	 */
1554  	/* linelock header + stbl of new right page */
1555  	rlv = & rdtlck->lv[rdtlck->index];
1556  	rlv->offset = 0;
1557  	rlv->length = 5;
1558  	rdtlck->index++;
1559  
1560  	dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip));
1561  
1562  	sp->header.nextindex = nxt;
1563  
1564  	/*
1565  	 * finalize freelist of new right page
1566  	 */
1567  	fsi = rp->header.freelist;
1568  	f = &rp->slot[fsi];
1569  	for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1570  		f->next = fsi;
1571  	f->next = -1;
1572  
1573  	/*
1574  	 * Update directory index table for entries now in right page
1575  	 */
1576  	if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1577  		s64 lblock;
1578  
1579  		mp = NULL;
1580  		stbl = DT_GETSTBL(rp);
1581  		for (n = 0; n < rp->header.nextindex; n++) {
1582  			ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
1583  			modify_index(tid, ip, le32_to_cpu(ldtentry->index),
1584  				     rbn, n, &mp, &lblock);
1585  		}
1586  		if (mp)
1587  			release_metapage(mp);
1588  	}
1589  
1590  	/*
1591  	 * the skipped index was on the left page,
1592  	 */
1593  	if (skip <= off) {
1594  		/* insert the new entry in the split page */
1595  		dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);
1596  
1597  		/* linelock stbl of split page */
1598  		if (sdtlck->index >= sdtlck->maxcnt)
1599  			sdtlck = (struct dt_lock *) txLinelock(sdtlck);
1600  		slv = & sdtlck->lv[sdtlck->index];
1601  		n = skip >> L2DTSLOTSIZE;
1602  		slv->offset = sp->header.stblindex + n;
1603  		slv->length =
1604  		    ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
1605  		sdtlck->index++;
1606  	}
1607  	/*
1608  	 * the skipped index was on the right page,
1609  	 */
1610  	else {
1611  		/* adjust the skip index to reflect the new position */
1612  		skip -= nxt;
1613  
1614  		/* insert the new entry in the right page */
1615  		dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
1616  	}
1617  
1618        out:
1619  	*rmpp = rmp;
1620  	*rpxdp = *pxd;
1621  
1622  	return rc;
1623  }
1624  
1625  
1626  /*
1627   *	dtExtendPage()
1628   *
1629   * function: extend 1st/only directory leaf page
1630   *
1631   * parameter:
1632   *
1633   * return: 0 - success;
1634   *	   errno - failure;
1635   *	return extended page pinned;
1636   */
dtExtendPage(tid_t tid,struct inode * ip,struct dtsplit * split,struct btstack * btstack)1637  static int dtExtendPage(tid_t tid,
1638  	     struct inode *ip, struct dtsplit * split, struct btstack * btstack)
1639  {
1640  	struct super_block *sb = ip->i_sb;
1641  	int rc;
1642  	struct metapage *smp, *pmp, *mp;
1643  	dtpage_t *sp, *pp;
1644  	struct pxdlist *pxdlist;
1645  	pxd_t *pxd, *tpxd;
1646  	int xlen, xsize;
1647  	int newstblindex, newstblsize;
1648  	int oldstblindex, oldstblsize;
1649  	int fsi, last;
1650  	struct dtslot *f;
1651  	struct btframe *parent;
1652  	int n;
1653  	struct dt_lock *dtlck;
1654  	s64 xaddr, txaddr;
1655  	struct tlock *tlck;
1656  	struct pxd_lock *pxdlock;
1657  	struct lv *lv;
1658  	uint type;
1659  	struct ldtentry *ldtentry;
1660  	u8 *stbl;
1661  
1662  	/* get page to extend */
1663  	smp = split->mp;
1664  	sp = DT_PAGE(ip, smp);
1665  
1666  	/* get parent/root page */
1667  	parent = BT_POP(btstack);
1668  	DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc);
1669  	if (rc)
1670  		return (rc);
1671  
1672  	/*
1673  	 *	extend the extent
1674  	 */
1675  	pxdlist = split->pxdlist;
1676  	pxd = &pxdlist->pxd[pxdlist->npxd];
1677  	pxdlist->npxd++;
1678  
1679  	xaddr = addressPXD(pxd);
1680  	tpxd = &sp->header.self;
1681  	txaddr = addressPXD(tpxd);
1682  	/* in-place extension */
1683  	if (xaddr == txaddr) {
1684  		type = tlckEXTEND;
1685  	}
1686  	/* relocation */
1687  	else {
1688  		type = tlckNEW;
1689  
1690  		/* save moved extent descriptor for later free */
1691  		tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE);
1692  		pxdlock = (struct pxd_lock *) & tlck->lock;
1693  		pxdlock->flag = mlckFREEPXD;
1694  		pxdlock->pxd = sp->header.self;
1695  		pxdlock->index = 1;
1696  
1697  		/*
1698  		 * Update directory index table to reflect new page address
1699  		 */
1700  		if (DO_INDEX(ip)) {
1701  			s64 lblock;
1702  
1703  			mp = NULL;
1704  			stbl = DT_GETSTBL(sp);
1705  			for (n = 0; n < sp->header.nextindex; n++) {
1706  				ldtentry =
1707  				    (struct ldtentry *) & sp->slot[stbl[n]];
1708  				modify_index(tid, ip,
1709  					     le32_to_cpu(ldtentry->index),
1710  					     xaddr, n, &mp, &lblock);
1711  			}
1712  			if (mp)
1713  				release_metapage(mp);
1714  		}
1715  	}
1716  
1717  	/*
1718  	 *	extend the page
1719  	 */
1720  	sp->header.self = *pxd;
1721  
1722  	jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp);
1723  
1724  	BT_MARK_DIRTY(smp, ip);
1725  	/*
1726  	 * acquire a transaction lock on the extended/leaf page
1727  	 */
1728  	tlck = txLock(tid, ip, smp, tlckDTREE | type);
1729  	dtlck = (struct dt_lock *) & tlck->lock;
1730  	lv = & dtlck->lv[0];
1731  
1732  	/* update buffer extent descriptor of extended page */
1733  	xlen = lengthPXD(pxd);
1734  	xsize = xlen << JFS_SBI(sb)->l2bsize;
1735  
1736  	/*
1737  	 * copy old stbl to new stbl at start of extended area
1738  	 */
1739  	oldstblindex = sp->header.stblindex;
1740  	oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE;
1741  	newstblindex = sp->header.maxslot;
1742  	n = xsize >> L2DTSLOTSIZE;
1743  	newstblsize = (n + 31) >> L2DTSLOTSIZE;
1744  	memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex],
1745  	       sp->header.nextindex);
1746  
1747  	/*
1748  	 * in-line extension: linelock old area of extended page
1749  	 */
1750  	if (type == tlckEXTEND) {
1751  		/* linelock header */
1752  		lv->offset = 0;
1753  		lv->length = 1;
1754  		dtlck->index++;
1755  		lv++;
1756  
1757  		/* linelock new stbl of extended page */
1758  		lv->offset = newstblindex;
1759  		lv->length = newstblsize;
1760  	}
1761  	/*
1762  	 * relocation: linelock whole relocated area
1763  	 */
1764  	else {
1765  		lv->offset = 0;
1766  		lv->length = sp->header.maxslot + newstblsize;
1767  	}
1768  
1769  	dtlck->index++;
1770  
1771  	sp->header.maxslot = n;
1772  	sp->header.stblindex = newstblindex;
1773  	/* sp->header.nextindex remains the same */
1774  
1775  	/*
1776  	 * add old stbl region at head of freelist
1777  	 */
1778  	fsi = oldstblindex;
1779  	f = &sp->slot[fsi];
1780  	last = sp->header.freelist;
1781  	for (n = 0; n < oldstblsize; n++, fsi++, f++) {
1782  		f->next = last;
1783  		last = fsi;
1784  	}
1785  	sp->header.freelist = last;
1786  	sp->header.freecnt += oldstblsize;
1787  
1788  	/*
1789  	 * append free region of newly extended area at tail of freelist
1790  	 */
1791  	/* init free region of newly extended area */
1792  	fsi = n = newstblindex + newstblsize;
1793  	f = &sp->slot[fsi];
1794  	for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
1795  		f->next = fsi;
1796  	f->next = -1;
1797  
1798  	/* append new free region at tail of old freelist */
1799  	fsi = sp->header.freelist;
1800  	if (fsi == -1)
1801  		sp->header.freelist = n;
1802  	else {
1803  		do {
1804  			f = &sp->slot[fsi];
1805  			fsi = f->next;
1806  		} while (fsi != -1);
1807  
1808  		f->next = n;
1809  	}
1810  
1811  	sp->header.freecnt += sp->header.maxslot - n;
1812  
1813  	/*
1814  	 * insert the new entry
1815  	 */
1816  	dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);
1817  
1818  	BT_MARK_DIRTY(pmp, ip);
1819  	/*
1820  	 * linelock any freeslots residing in old extent
1821  	 */
1822  	if (type == tlckEXTEND) {
1823  		n = sp->header.maxslot >> 2;
1824  		if (sp->header.freelist < n)
1825  			dtLinelockFreelist(sp, n, &dtlck);
1826  	}
1827  
1828  	/*
1829  	 *	update parent entry on the parent/root page
1830  	 */
1831  	/*
1832  	 * acquire a transaction lock on the parent/root page
1833  	 */
1834  	tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
1835  	dtlck = (struct dt_lock *) & tlck->lock;
1836  	lv = & dtlck->lv[dtlck->index];
1837  
1838  	/* linelock parent entry - 1st slot */
1839  	lv->offset = 1;
1840  	lv->length = 1;
1841  	dtlck->index++;
1842  
1843  	/* update the parent pxd for page extension */
1844  	tpxd = (pxd_t *) & pp->slot[1];
1845  	*tpxd = *pxd;
1846  
1847  	DT_PUTPAGE(pmp);
1848  	return 0;
1849  }
1850  
1851  
1852  /*
1853   *	dtSplitRoot()
1854   *
1855   * function:
1856   *	split the full root page into
1857   *	original/root/split page and new right page
1858   *	i.e., root remains fixed in tree anchor (inode) and
1859   *	the root is copied to a single new right child page
1860   *	since root page << non-root page, and
1861   *	the split root page contains a single entry for the
1862   *	new right child page.
1863   *
1864   * parameter:
1865   *
1866   * return: 0 - success;
1867   *	   errno - failure;
1868   *	return new page pinned;
1869   */
dtSplitRoot(tid_t tid,struct inode * ip,struct dtsplit * split,struct metapage ** rmpp)1870  static int dtSplitRoot(tid_t tid,
1871  	    struct inode *ip, struct dtsplit * split, struct metapage ** rmpp)
1872  {
1873  	struct super_block *sb = ip->i_sb;
1874  	struct metapage *smp;
1875  	dtroot_t *sp;
1876  	struct metapage *rmp;
1877  	dtpage_t *rp;
1878  	s64 rbn;
1879  	int xlen;
1880  	int xsize;
1881  	struct dtslot *f;
1882  	s8 *stbl;
1883  	int fsi, stblsize, n;
1884  	struct idtentry *s;
1885  	pxd_t *ppxd;
1886  	struct pxdlist *pxdlist;
1887  	pxd_t *pxd;
1888  	struct dt_lock *dtlck;
1889  	struct tlock *tlck;
1890  	struct lv *lv;
1891  	int rc;
1892  
1893  	/* get split root page */
1894  	smp = split->mp;
1895  	sp = &JFS_IP(ip)->i_dtroot;
1896  
1897  	/*
1898  	 *	allocate/initialize a single (right) child page
1899  	 *
1900  	 * N.B. at first split, a one (or two) block to fit new entry
1901  	 * is allocated; at subsequent split, a full page is allocated;
1902  	 */
1903  	pxdlist = split->pxdlist;
1904  	pxd = &pxdlist->pxd[pxdlist->npxd];
1905  	pxdlist->npxd++;
1906  	rbn = addressPXD(pxd);
1907  	xlen = lengthPXD(pxd);
1908  	xsize = xlen << JFS_SBI(sb)->l2bsize;
1909  	rmp = get_metapage(ip, rbn, xsize, 1);
1910  	if (!rmp)
1911  		return -EIO;
1912  
1913  	rp = rmp->data;
1914  
1915  	/* Allocate blocks to quota. */
1916  	rc = dquot_alloc_block(ip, lengthPXD(pxd));
1917  	if (rc) {
1918  		release_metapage(rmp);
1919  		return rc;
1920  	}
1921  
1922  	BT_MARK_DIRTY(rmp, ip);
1923  	/*
1924  	 * acquire a transaction lock on the new right page
1925  	 */
1926  	tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1927  	dtlck = (struct dt_lock *) & tlck->lock;
1928  
1929  	rp->header.flag =
1930  	    (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1931  	rp->header.self = *pxd;
1932  
1933  	/* initialize sibling pointers */
1934  	rp->header.next = 0;
1935  	rp->header.prev = 0;
1936  
1937  	/*
1938  	 *	move in-line root page into new right page extent
1939  	 */
1940  	/* linelock header + copied entries + new stbl (1st slot) in new page */
1941  	ASSERT(dtlck->index == 0);
1942  	lv = & dtlck->lv[0];
1943  	lv->offset = 0;
1944  	lv->length = 10;	/* 1 + 8 + 1 */
1945  	dtlck->index++;
1946  
1947  	n = xsize >> L2DTSLOTSIZE;
1948  	rp->header.maxslot = n;
1949  	stblsize = (n + 31) >> L2DTSLOTSIZE;
1950  
1951  	/* copy old stbl to new stbl at start of extended area */
1952  	rp->header.stblindex = DTROOTMAXSLOT;
1953  	stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
1954  	memcpy(stbl, sp->header.stbl, sp->header.nextindex);
1955  	rp->header.nextindex = sp->header.nextindex;
1956  
1957  	/* copy old data area to start of new data area */
1958  	memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);
1959  
1960  	/*
1961  	 * append free region of newly extended area at tail of freelist
1962  	 */
1963  	/* init free region of newly extended area */
1964  	fsi = n = DTROOTMAXSLOT + stblsize;
1965  	f = &rp->slot[fsi];
1966  	for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1967  		f->next = fsi;
1968  	f->next = -1;
1969  
1970  	/* append new free region at tail of old freelist */
1971  	fsi = sp->header.freelist;
1972  	if (fsi == -1)
1973  		rp->header.freelist = n;
1974  	else {
1975  		rp->header.freelist = fsi;
1976  
1977  		do {
1978  			f = &rp->slot[fsi];
1979  			fsi = f->next;
1980  		} while (fsi >= 0);
1981  
1982  		f->next = n;
1983  	}
1984  
1985  	rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n;
1986  
1987  	/*
1988  	 * Update directory index table for entries now in right page
1989  	 */
1990  	if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1991  		s64 lblock;
1992  		struct metapage *mp = NULL;
1993  		struct ldtentry *ldtentry;
1994  
1995  		stbl = DT_GETSTBL(rp);
1996  		for (n = 0; n < rp->header.nextindex; n++) {
1997  			ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
1998  			modify_index(tid, ip, le32_to_cpu(ldtentry->index),
1999  				     rbn, n, &mp, &lblock);
2000  		}
2001  		if (mp)
2002  			release_metapage(mp);
2003  	}
2004  	/*
2005  	 * insert the new entry into the new right/child page
2006  	 * (skip index in the new right page will not change)
2007  	 */
2008  	dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
2009  
2010  	/*
2011  	 *	reset parent/root page
2012  	 *
2013  	 * set the 1st entry offset to 0, which force the left-most key
2014  	 * at any level of the tree to be less than any search key.
2015  	 *
2016  	 * The btree comparison code guarantees that the left-most key on any
2017  	 * level of the tree is never used, so it doesn't need to be filled in.
2018  	 */
2019  	BT_MARK_DIRTY(smp, ip);
2020  	/*
2021  	 * acquire a transaction lock on the root page (in-memory inode)
2022  	 */
2023  	tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
2024  	dtlck = (struct dt_lock *) & tlck->lock;
2025  
2026  	/* linelock root */
2027  	ASSERT(dtlck->index == 0);
2028  	lv = & dtlck->lv[0];
2029  	lv->offset = 0;
2030  	lv->length = DTROOTMAXSLOT;
2031  	dtlck->index++;
2032  
2033  	/* update page header of root */
2034  	if (sp->header.flag & BT_LEAF) {
2035  		sp->header.flag &= ~BT_LEAF;
2036  		sp->header.flag |= BT_INTERNAL;
2037  	}
2038  
2039  	/* init the first entry */
2040  	s = (struct idtentry *) & sp->slot[DTENTRYSTART];
2041  	ppxd = (pxd_t *) s;
2042  	*ppxd = *pxd;
2043  	s->next = -1;
2044  	s->namlen = 0;
2045  
2046  	stbl = sp->header.stbl;
2047  	stbl[0] = DTENTRYSTART;
2048  	sp->header.nextindex = 1;
2049  
2050  	/* init freelist */
2051  	fsi = DTENTRYSTART + 1;
2052  	f = &sp->slot[fsi];
2053  
2054  	/* init free region of remaining area */
2055  	for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2056  		f->next = fsi;
2057  	f->next = -1;
2058  
2059  	sp->header.freelist = DTENTRYSTART + 1;
2060  	sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1);
2061  
2062  	*rmpp = rmp;
2063  
2064  	return 0;
2065  }
2066  
2067  
2068  /*
2069   *	dtDelete()
2070   *
2071   * function: delete the entry(s) referenced by a key.
2072   *
2073   * parameter:
2074   *
2075   * return:
2076   */
dtDelete(tid_t tid,struct inode * ip,struct component_name * key,ino_t * ino,int flag)2077  int dtDelete(tid_t tid,
2078  	 struct inode *ip, struct component_name * key, ino_t * ino, int flag)
2079  {
2080  	int rc = 0;
2081  	s64 bn;
2082  	struct metapage *mp, *imp;
2083  	dtpage_t *p;
2084  	int index;
2085  	struct btstack btstack;
2086  	struct dt_lock *dtlck;
2087  	struct tlock *tlck;
2088  	struct lv *lv;
2089  	int i;
2090  	struct ldtentry *ldtentry;
2091  	u8 *stbl;
2092  	u32 table_index, next_index;
2093  	struct metapage *nmp;
2094  	dtpage_t *np;
2095  
2096  	/*
2097  	 *	search for the entry to delete:
2098  	 *
2099  	 * dtSearch() returns (leaf page pinned, index at which to delete).
2100  	 */
2101  	if ((rc = dtSearch(ip, key, ino, &btstack, flag)))
2102  		return rc;
2103  
2104  	/* retrieve search result */
2105  	DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2106  
2107  	/*
2108  	 * We need to find put the index of the next entry into the
2109  	 * directory index table in order to resume a readdir from this
2110  	 * entry.
2111  	 */
2112  	if (DO_INDEX(ip)) {
2113  		stbl = DT_GETSTBL(p);
2114  		ldtentry = (struct ldtentry *) & p->slot[stbl[index]];
2115  		table_index = le32_to_cpu(ldtentry->index);
2116  		if (index == (p->header.nextindex - 1)) {
2117  			/*
2118  			 * Last entry in this leaf page
2119  			 */
2120  			if ((p->header.flag & BT_ROOT)
2121  			    || (p->header.next == 0))
2122  				next_index = -1;
2123  			else {
2124  				/* Read next leaf page */
2125  				DT_GETPAGE(ip, le64_to_cpu(p->header.next),
2126  					   nmp, PSIZE, np, rc);
2127  				if (rc)
2128  					next_index = -1;
2129  				else {
2130  					stbl = DT_GETSTBL(np);
2131  					ldtentry =
2132  					    (struct ldtentry *) & np->
2133  					    slot[stbl[0]];
2134  					next_index =
2135  					    le32_to_cpu(ldtentry->index);
2136  					DT_PUTPAGE(nmp);
2137  				}
2138  			}
2139  		} else {
2140  			ldtentry =
2141  			    (struct ldtentry *) & p->slot[stbl[index + 1]];
2142  			next_index = le32_to_cpu(ldtentry->index);
2143  		}
2144  		free_index(tid, ip, table_index, next_index);
2145  	}
2146  	/*
2147  	 * the leaf page becomes empty, delete the page
2148  	 */
2149  	if (p->header.nextindex == 1) {
2150  		/* delete empty page */
2151  		rc = dtDeleteUp(tid, ip, mp, p, &btstack);
2152  	}
2153  	/*
2154  	 * the leaf page has other entries remaining:
2155  	 *
2156  	 * delete the entry from the leaf page.
2157  	 */
2158  	else {
2159  		BT_MARK_DIRTY(mp, ip);
2160  		/*
2161  		 * acquire a transaction lock on the leaf page
2162  		 */
2163  		tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2164  		dtlck = (struct dt_lock *) & tlck->lock;
2165  
2166  		/*
2167  		 * Do not assume that dtlck->index will be zero.  During a
2168  		 * rename within a directory, this transaction may have
2169  		 * modified this page already when adding the new entry.
2170  		 */
2171  
2172  		/* linelock header */
2173  		if (dtlck->index >= dtlck->maxcnt)
2174  			dtlck = (struct dt_lock *) txLinelock(dtlck);
2175  		lv = & dtlck->lv[dtlck->index];
2176  		lv->offset = 0;
2177  		lv->length = 1;
2178  		dtlck->index++;
2179  
2180  		/* linelock stbl of non-root leaf page */
2181  		if (!(p->header.flag & BT_ROOT)) {
2182  			if (dtlck->index >= dtlck->maxcnt)
2183  				dtlck = (struct dt_lock *) txLinelock(dtlck);
2184  			lv = & dtlck->lv[dtlck->index];
2185  			i = index >> L2DTSLOTSIZE;
2186  			lv->offset = p->header.stblindex + i;
2187  			lv->length =
2188  			    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2189  			    i + 1;
2190  			dtlck->index++;
2191  		}
2192  
2193  		/* free the leaf entry */
2194  		dtDeleteEntry(p, index, &dtlck);
2195  
2196  		/*
2197  		 * Update directory index table for entries moved in stbl
2198  		 */
2199  		if (DO_INDEX(ip) && index < p->header.nextindex) {
2200  			s64 lblock;
2201  
2202  			imp = NULL;
2203  			stbl = DT_GETSTBL(p);
2204  			for (i = index; i < p->header.nextindex; i++) {
2205  				ldtentry =
2206  				    (struct ldtentry *) & p->slot[stbl[i]];
2207  				modify_index(tid, ip,
2208  					     le32_to_cpu(ldtentry->index),
2209  					     bn, i, &imp, &lblock);
2210  			}
2211  			if (imp)
2212  				release_metapage(imp);
2213  		}
2214  
2215  		DT_PUTPAGE(mp);
2216  	}
2217  
2218  	return rc;
2219  }
2220  
2221  
2222  /*
2223   *	dtDeleteUp()
2224   *
2225   * function:
2226   *	free empty pages as propagating deletion up the tree
2227   *
2228   * parameter:
2229   *
2230   * return:
2231   */
dtDeleteUp(tid_t tid,struct inode * ip,struct metapage * fmp,dtpage_t * fp,struct btstack * btstack)2232  static int dtDeleteUp(tid_t tid, struct inode *ip,
2233  	   struct metapage * fmp, dtpage_t * fp, struct btstack * btstack)
2234  {
2235  	int rc = 0;
2236  	struct metapage *mp;
2237  	dtpage_t *p;
2238  	int index, nextindex;
2239  	int xlen;
2240  	struct btframe *parent;
2241  	struct dt_lock *dtlck;
2242  	struct tlock *tlck;
2243  	struct lv *lv;
2244  	struct pxd_lock *pxdlock;
2245  	int i;
2246  
2247  	/*
2248  	 *	keep the root leaf page which has become empty
2249  	 */
2250  	if (BT_IS_ROOT(fmp)) {
2251  		/*
2252  		 * reset the root
2253  		 *
2254  		 * dtInitRoot() acquires txlock on the root
2255  		 */
2256  		dtInitRoot(tid, ip, PARENT(ip));
2257  
2258  		DT_PUTPAGE(fmp);
2259  
2260  		return 0;
2261  	}
2262  
2263  	/*
2264  	 *	free the non-root leaf page
2265  	 */
2266  	/*
2267  	 * acquire a transaction lock on the page
2268  	 *
2269  	 * write FREEXTENT|NOREDOPAGE log record
2270  	 * N.B. linelock is overlaid as freed extent descriptor, and
2271  	 * the buffer page is freed;
2272  	 */
2273  	tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2274  	pxdlock = (struct pxd_lock *) & tlck->lock;
2275  	pxdlock->flag = mlckFREEPXD;
2276  	pxdlock->pxd = fp->header.self;
2277  	pxdlock->index = 1;
2278  
2279  	/* update sibling pointers */
2280  	if ((rc = dtRelink(tid, ip, fp))) {
2281  		BT_PUTPAGE(fmp);
2282  		return rc;
2283  	}
2284  
2285  	xlen = lengthPXD(&fp->header.self);
2286  
2287  	/* Free quota allocation. */
2288  	dquot_free_block(ip, xlen);
2289  
2290  	/* free/invalidate its buffer page */
2291  	discard_metapage(fmp);
2292  
2293  	/*
2294  	 *	propagate page deletion up the directory tree
2295  	 *
2296  	 * If the delete from the parent page makes it empty,
2297  	 * continue all the way up the tree.
2298  	 * stop if the root page is reached (which is never deleted) or
2299  	 * if the entry deletion does not empty the page.
2300  	 */
2301  	while ((parent = BT_POP(btstack)) != NULL) {
2302  		/* pin the parent page <sp> */
2303  		DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2304  		if (rc)
2305  			return rc;
2306  
2307  		/*
2308  		 * free the extent of the child page deleted
2309  		 */
2310  		index = parent->index;
2311  
2312  		/*
2313  		 * delete the entry for the child page from parent
2314  		 */
2315  		nextindex = p->header.nextindex;
2316  
2317  		/*
2318  		 * the parent has the single entry being deleted:
2319  		 *
2320  		 * free the parent page which has become empty.
2321  		 */
2322  		if (nextindex == 1) {
2323  			/*
2324  			 * keep the root internal page which has become empty
2325  			 */
2326  			if (p->header.flag & BT_ROOT) {
2327  				/*
2328  				 * reset the root
2329  				 *
2330  				 * dtInitRoot() acquires txlock on the root
2331  				 */
2332  				dtInitRoot(tid, ip, PARENT(ip));
2333  
2334  				DT_PUTPAGE(mp);
2335  
2336  				return 0;
2337  			}
2338  			/*
2339  			 * free the parent page
2340  			 */
2341  			else {
2342  				/*
2343  				 * acquire a transaction lock on the page
2344  				 *
2345  				 * write FREEXTENT|NOREDOPAGE log record
2346  				 */
2347  				tlck =
2348  				    txMaplock(tid, ip,
2349  					      tlckDTREE | tlckFREE);
2350  				pxdlock = (struct pxd_lock *) & tlck->lock;
2351  				pxdlock->flag = mlckFREEPXD;
2352  				pxdlock->pxd = p->header.self;
2353  				pxdlock->index = 1;
2354  
2355  				/* update sibling pointers */
2356  				if ((rc = dtRelink(tid, ip, p))) {
2357  					DT_PUTPAGE(mp);
2358  					return rc;
2359  				}
2360  
2361  				xlen = lengthPXD(&p->header.self);
2362  
2363  				/* Free quota allocation */
2364  				dquot_free_block(ip, xlen);
2365  
2366  				/* free/invalidate its buffer page */
2367  				discard_metapage(mp);
2368  
2369  				/* propagate up */
2370  				continue;
2371  			}
2372  		}
2373  
2374  		/*
2375  		 * the parent has other entries remaining:
2376  		 *
2377  		 * delete the router entry from the parent page.
2378  		 */
2379  		BT_MARK_DIRTY(mp, ip);
2380  		/*
2381  		 * acquire a transaction lock on the page
2382  		 *
2383  		 * action: router entry deletion
2384  		 */
2385  		tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2386  		dtlck = (struct dt_lock *) & tlck->lock;
2387  
2388  		/* linelock header */
2389  		if (dtlck->index >= dtlck->maxcnt)
2390  			dtlck = (struct dt_lock *) txLinelock(dtlck);
2391  		lv = & dtlck->lv[dtlck->index];
2392  		lv->offset = 0;
2393  		lv->length = 1;
2394  		dtlck->index++;
2395  
2396  		/* linelock stbl of non-root leaf page */
2397  		if (!(p->header.flag & BT_ROOT)) {
2398  			if (dtlck->index < dtlck->maxcnt)
2399  				lv++;
2400  			else {
2401  				dtlck = (struct dt_lock *) txLinelock(dtlck);
2402  				lv = & dtlck->lv[0];
2403  			}
2404  			i = index >> L2DTSLOTSIZE;
2405  			lv->offset = p->header.stblindex + i;
2406  			lv->length =
2407  			    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2408  			    i + 1;
2409  			dtlck->index++;
2410  		}
2411  
2412  		/* free the router entry */
2413  		dtDeleteEntry(p, index, &dtlck);
2414  
2415  		/* reset key of new leftmost entry of level (for consistency) */
2416  		if (index == 0 &&
2417  		    ((p->header.flag & BT_ROOT) || p->header.prev == 0))
2418  			dtTruncateEntry(p, 0, &dtlck);
2419  
2420  		/* unpin the parent page */
2421  		DT_PUTPAGE(mp);
2422  
2423  		/* exit propagation up */
2424  		break;
2425  	}
2426  
2427  	if (!DO_INDEX(ip))
2428  		ip->i_size -= PSIZE;
2429  
2430  	return 0;
2431  }
2432  
2433  /*
2434   *	dtRelink()
2435   *
2436   * function:
2437   *	link around a freed page.
2438   *
2439   * parameter:
2440   *	fp:	page to be freed
2441   *
2442   * return:
2443   */
dtRelink(tid_t tid,struct inode * ip,dtpage_t * p)2444  static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p)
2445  {
2446  	int rc;
2447  	struct metapage *mp;
2448  	s64 nextbn, prevbn;
2449  	struct tlock *tlck;
2450  	struct dt_lock *dtlck;
2451  	struct lv *lv;
2452  
2453  	nextbn = le64_to_cpu(p->header.next);
2454  	prevbn = le64_to_cpu(p->header.prev);
2455  
2456  	/* update prev pointer of the next page */
2457  	if (nextbn != 0) {
2458  		DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
2459  		if (rc)
2460  			return rc;
2461  
2462  		BT_MARK_DIRTY(mp, ip);
2463  		/*
2464  		 * acquire a transaction lock on the next page
2465  		 *
2466  		 * action: update prev pointer;
2467  		 */
2468  		tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2469  		jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2470  			tlck, ip, mp);
2471  		dtlck = (struct dt_lock *) & tlck->lock;
2472  
2473  		/* linelock header */
2474  		if (dtlck->index >= dtlck->maxcnt)
2475  			dtlck = (struct dt_lock *) txLinelock(dtlck);
2476  		lv = & dtlck->lv[dtlck->index];
2477  		lv->offset = 0;
2478  		lv->length = 1;
2479  		dtlck->index++;
2480  
2481  		p->header.prev = cpu_to_le64(prevbn);
2482  		DT_PUTPAGE(mp);
2483  	}
2484  
2485  	/* update next pointer of the previous page */
2486  	if (prevbn != 0) {
2487  		DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
2488  		if (rc)
2489  			return rc;
2490  
2491  		BT_MARK_DIRTY(mp, ip);
2492  		/*
2493  		 * acquire a transaction lock on the prev page
2494  		 *
2495  		 * action: update next pointer;
2496  		 */
2497  		tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2498  		jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2499  			tlck, ip, mp);
2500  		dtlck = (struct dt_lock *) & tlck->lock;
2501  
2502  		/* linelock header */
2503  		if (dtlck->index >= dtlck->maxcnt)
2504  			dtlck = (struct dt_lock *) txLinelock(dtlck);
2505  		lv = & dtlck->lv[dtlck->index];
2506  		lv->offset = 0;
2507  		lv->length = 1;
2508  		dtlck->index++;
2509  
2510  		p->header.next = cpu_to_le64(nextbn);
2511  		DT_PUTPAGE(mp);
2512  	}
2513  
2514  	return 0;
2515  }
2516  
2517  
2518  /*
2519   *	dtInitRoot()
2520   *
2521   * initialize directory root (inline in inode)
2522   */
dtInitRoot(tid_t tid,struct inode * ip,u32 idotdot)2523  void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
2524  {
2525  	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2526  	dtroot_t *p;
2527  	int fsi;
2528  	struct dtslot *f;
2529  	struct tlock *tlck;
2530  	struct dt_lock *dtlck;
2531  	struct lv *lv;
2532  	u16 xflag_save;
2533  
2534  	/*
2535  	 * If this was previously an non-empty directory, we need to remove
2536  	 * the old directory table.
2537  	 */
2538  	if (DO_INDEX(ip)) {
2539  		if (!jfs_dirtable_inline(ip)) {
2540  			struct tblock *tblk = tid_to_tblock(tid);
2541  			/*
2542  			 * We're playing games with the tid's xflag.  If
2543  			 * we're removing a regular file, the file's xtree
2544  			 * is committed with COMMIT_PMAP, but we always
2545  			 * commit the directories xtree with COMMIT_PWMAP.
2546  			 */
2547  			xflag_save = tblk->xflag;
2548  			tblk->xflag = 0;
2549  			/*
2550  			 * xtTruncate isn't guaranteed to fully truncate
2551  			 * the xtree.  The caller needs to check i_size
2552  			 * after committing the transaction to see if
2553  			 * additional truncation is needed.  The
2554  			 * COMMIT_Stale flag tells caller that we
2555  			 * initiated the truncation.
2556  			 */
2557  			xtTruncate(tid, ip, 0, COMMIT_PWMAP);
2558  			set_cflag(COMMIT_Stale, ip);
2559  
2560  			tblk->xflag = xflag_save;
2561  		} else
2562  			ip->i_size = 1;
2563  
2564  		jfs_ip->next_index = 2;
2565  	} else
2566  		ip->i_size = IDATASIZE;
2567  
2568  	/*
2569  	 * acquire a transaction lock on the root
2570  	 *
2571  	 * action: directory initialization;
2572  	 */
2573  	tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag,
2574  		      tlckDTREE | tlckENTRY | tlckBTROOT);
2575  	dtlck = (struct dt_lock *) & tlck->lock;
2576  
2577  	/* linelock root */
2578  	ASSERT(dtlck->index == 0);
2579  	lv = & dtlck->lv[0];
2580  	lv->offset = 0;
2581  	lv->length = DTROOTMAXSLOT;
2582  	dtlck->index++;
2583  
2584  	p = &jfs_ip->i_dtroot;
2585  
2586  	p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
2587  
2588  	p->header.nextindex = 0;
2589  
2590  	/* init freelist */
2591  	fsi = 1;
2592  	f = &p->slot[fsi];
2593  
2594  	/* init data area of root */
2595  	for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2596  		f->next = fsi;
2597  	f->next = -1;
2598  
2599  	p->header.freelist = 1;
2600  	p->header.freecnt = 8;
2601  
2602  	/* init '..' entry */
2603  	p->header.idotdot = cpu_to_le32(idotdot);
2604  
2605  	return;
2606  }
2607  
2608  /*
2609   *	add_missing_indices()
2610   *
2611   * function: Fix dtree page in which one or more entries has an invalid index.
2612   *	     fsck.jfs should really fix this, but it currently does not.
2613   *	     Called from jfs_readdir when bad index is detected.
2614   */
add_missing_indices(struct inode * inode,s64 bn)2615  static void add_missing_indices(struct inode *inode, s64 bn)
2616  {
2617  	struct ldtentry *d;
2618  	struct dt_lock *dtlck;
2619  	int i;
2620  	uint index;
2621  	struct lv *lv;
2622  	struct metapage *mp;
2623  	dtpage_t *p;
2624  	int rc;
2625  	s8 *stbl;
2626  	tid_t tid;
2627  	struct tlock *tlck;
2628  
2629  	tid = txBegin(inode->i_sb, 0);
2630  
2631  	DT_GETPAGE(inode, bn, mp, PSIZE, p, rc);
2632  
2633  	if (rc) {
2634  		printk(KERN_ERR "DT_GETPAGE failed!\n");
2635  		goto end;
2636  	}
2637  	BT_MARK_DIRTY(mp, inode);
2638  
2639  	ASSERT(p->header.flag & BT_LEAF);
2640  
2641  	tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY);
2642  	if (BT_IS_ROOT(mp))
2643  		tlck->type |= tlckBTROOT;
2644  
2645  	dtlck = (struct dt_lock *) &tlck->lock;
2646  
2647  	stbl = DT_GETSTBL(p);
2648  	for (i = 0; i < p->header.nextindex; i++) {
2649  		d = (struct ldtentry *) &p->slot[stbl[i]];
2650  		index = le32_to_cpu(d->index);
2651  		if ((index < 2) || (index >= JFS_IP(inode)->next_index)) {
2652  			d->index = cpu_to_le32(add_index(tid, inode, bn, i));
2653  			if (dtlck->index >= dtlck->maxcnt)
2654  				dtlck = (struct dt_lock *) txLinelock(dtlck);
2655  			lv = &dtlck->lv[dtlck->index];
2656  			lv->offset = stbl[i];
2657  			lv->length = 1;
2658  			dtlck->index++;
2659  		}
2660  	}
2661  
2662  	DT_PUTPAGE(mp);
2663  	(void) txCommit(tid, 1, &inode, 0);
2664  end:
2665  	txEnd(tid);
2666  }
2667  
2668  /*
2669   * Buffer to hold directory entry info while traversing a dtree page
2670   * before being fed to the filldir function
2671   */
2672  struct jfs_dirent {
2673  	loff_t position;
2674  	int ino;
2675  	u16 name_len;
2676  	char name[];
2677  };
2678  
2679  /*
2680   * function to determine next variable-sized jfs_dirent in buffer
2681   */
next_jfs_dirent(struct jfs_dirent * dirent)2682  static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
2683  {
2684  	return (struct jfs_dirent *)
2685  		((char *)dirent +
2686  		 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 +
2687  		   sizeof (loff_t) - 1) &
2688  		  ~(sizeof (loff_t) - 1)));
2689  }
2690  
2691  /*
2692   *	jfs_readdir()
2693   *
2694   * function: read directory entries sequentially
2695   *	from the specified entry offset
2696   *
2697   * parameter:
2698   *
2699   * return: offset = (pn, index) of start entry
2700   *	of next jfs_readdir()/dtRead()
2701   */
jfs_readdir(struct file * file,struct dir_context * ctx)2702  int jfs_readdir(struct file *file, struct dir_context *ctx)
2703  {
2704  	struct inode *ip = file_inode(file);
2705  	struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
2706  	int rc = 0;
2707  	loff_t dtpos;	/* legacy OS/2 style position */
2708  	struct dtoffset {
2709  		s16 pn;
2710  		s16 index;
2711  		s32 unused;
2712  	} *dtoffset = (struct dtoffset *) &dtpos;
2713  	s64 bn;
2714  	struct metapage *mp;
2715  	dtpage_t *p;
2716  	int index;
2717  	s8 *stbl;
2718  	struct btstack btstack;
2719  	int i, next;
2720  	struct ldtentry *d;
2721  	struct dtslot *t;
2722  	int d_namleft, len, outlen;
2723  	unsigned long dirent_buf;
2724  	char *name_ptr;
2725  	u32 dir_index;
2726  	int do_index = 0;
2727  	uint loop_count = 0;
2728  	struct jfs_dirent *jfs_dirent;
2729  	int jfs_dirents;
2730  	int overflow, fix_page, page_fixed = 0;
2731  	static int unique_pos = 2;	/* If we can't fix broken index */
2732  
2733  	if (ctx->pos == DIREND)
2734  		return 0;
2735  
2736  	if (DO_INDEX(ip)) {
2737  		/*
2738  		 * persistent index is stored in directory entries.
2739  		 * Special cases:	 0 = .
2740  		 *			 1 = ..
2741  		 *			-1 = End of directory
2742  		 */
2743  		do_index = 1;
2744  
2745  		dir_index = (u32) ctx->pos;
2746  
2747  		/*
2748  		 * NFSv4 reserves cookies 1 and 2 for . and .. so the value
2749  		 * we return to the vfs is one greater than the one we use
2750  		 * internally.
2751  		 */
2752  		if (dir_index)
2753  			dir_index--;
2754  
2755  		if (dir_index > 1) {
2756  			struct dir_table_slot dirtab_slot;
2757  
2758  			if (dtEmpty(ip) ||
2759  			    (dir_index >= JFS_IP(ip)->next_index)) {
2760  				/* Stale position.  Directory has shrunk */
2761  				ctx->pos = DIREND;
2762  				return 0;
2763  			}
2764  		      repeat:
2765  			rc = read_index(ip, dir_index, &dirtab_slot);
2766  			if (rc) {
2767  				ctx->pos = DIREND;
2768  				return rc;
2769  			}
2770  			if (dirtab_slot.flag == DIR_INDEX_FREE) {
2771  				if (loop_count++ > JFS_IP(ip)->next_index) {
2772  					jfs_err("jfs_readdir detected infinite loop!");
2773  					ctx->pos = DIREND;
2774  					return 0;
2775  				}
2776  				dir_index = le32_to_cpu(dirtab_slot.addr2);
2777  				if (dir_index == -1) {
2778  					ctx->pos = DIREND;
2779  					return 0;
2780  				}
2781  				goto repeat;
2782  			}
2783  			bn = addressDTS(&dirtab_slot);
2784  			index = dirtab_slot.slot;
2785  			DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2786  			if (rc) {
2787  				ctx->pos = DIREND;
2788  				return 0;
2789  			}
2790  			if (p->header.flag & BT_INTERNAL) {
2791  				jfs_err("jfs_readdir: bad index table");
2792  				DT_PUTPAGE(mp);
2793  				ctx->pos = DIREND;
2794  				return 0;
2795  			}
2796  		} else {
2797  			if (dir_index == 0) {
2798  				/*
2799  				 * self "."
2800  				 */
2801  				ctx->pos = 1;
2802  				if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR))
2803  					return 0;
2804  			}
2805  			/*
2806  			 * parent ".."
2807  			 */
2808  			ctx->pos = 2;
2809  			if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR))
2810  				return 0;
2811  
2812  			/*
2813  			 * Find first entry of left-most leaf
2814  			 */
2815  			if (dtEmpty(ip)) {
2816  				ctx->pos = DIREND;
2817  				return 0;
2818  			}
2819  
2820  			if ((rc = dtReadFirst(ip, &btstack)))
2821  				return rc;
2822  
2823  			DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2824  		}
2825  	} else {
2826  		/*
2827  		 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
2828  		 *
2829  		 * pn = 0; index = 1:	First entry "."
2830  		 * pn = 0; index = 2:	Second entry ".."
2831  		 * pn > 0:		Real entries, pn=1 -> leftmost page
2832  		 * pn = index = -1:	No more entries
2833  		 */
2834  		dtpos = ctx->pos;
2835  		if (dtpos < 2) {
2836  			/* build "." entry */
2837  			ctx->pos = 1;
2838  			if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR))
2839  				return 0;
2840  			dtoffset->index = 2;
2841  			ctx->pos = dtpos;
2842  		}
2843  
2844  		if (dtoffset->pn == 0) {
2845  			if (dtoffset->index == 2) {
2846  				/* build ".." entry */
2847  				if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR))
2848  					return 0;
2849  			} else {
2850  				jfs_err("jfs_readdir called with invalid offset!");
2851  			}
2852  			dtoffset->pn = 1;
2853  			dtoffset->index = 0;
2854  			ctx->pos = dtpos;
2855  		}
2856  
2857  		if (dtEmpty(ip)) {
2858  			ctx->pos = DIREND;
2859  			return 0;
2860  		}
2861  
2862  		if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) {
2863  			jfs_err("jfs_readdir: unexpected rc = %d from dtReadNext",
2864  				rc);
2865  			ctx->pos = DIREND;
2866  			return 0;
2867  		}
2868  		/* get start leaf page and index */
2869  		DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2870  
2871  		/* offset beyond directory eof ? */
2872  		if (bn < 0) {
2873  			ctx->pos = DIREND;
2874  			return 0;
2875  		}
2876  	}
2877  
2878  	dirent_buf = __get_free_page(GFP_KERNEL);
2879  	if (dirent_buf == 0) {
2880  		DT_PUTPAGE(mp);
2881  		jfs_warn("jfs_readdir: __get_free_page failed!");
2882  		ctx->pos = DIREND;
2883  		return -ENOMEM;
2884  	}
2885  
2886  	while (1) {
2887  		jfs_dirent = (struct jfs_dirent *) dirent_buf;
2888  		jfs_dirents = 0;
2889  		overflow = fix_page = 0;
2890  
2891  		stbl = DT_GETSTBL(p);
2892  
2893  		for (i = index; i < p->header.nextindex; i++) {
2894  			if (stbl[i] < 0 || stbl[i] > 127) {
2895  				jfs_err("JFS: Invalid stbl[%d] = %d for inode %ld, block = %lld",
2896  					i, stbl[i], (long)ip->i_ino, (long long)bn);
2897  				free_page(dirent_buf);
2898  				DT_PUTPAGE(mp);
2899  				return -EIO;
2900  			}
2901  
2902  			d = (struct ldtentry *) & p->slot[stbl[i]];
2903  
2904  			if (((long) jfs_dirent + d->namlen + 1) >
2905  			    (dirent_buf + PAGE_SIZE)) {
2906  				/* DBCS codepages could overrun dirent_buf */
2907  				index = i;
2908  				overflow = 1;
2909  				break;
2910  			}
2911  
2912  			d_namleft = d->namlen;
2913  			name_ptr = jfs_dirent->name;
2914  			jfs_dirent->ino = le32_to_cpu(d->inumber);
2915  
2916  			if (do_index) {
2917  				len = min(d_namleft, DTLHDRDATALEN);
2918  				jfs_dirent->position = le32_to_cpu(d->index);
2919  				/*
2920  				 * d->index should always be valid, but it
2921  				 * isn't.  fsck.jfs doesn't create the
2922  				 * directory index for the lost+found
2923  				 * directory.  Rather than let it go,
2924  				 * we can try to fix it.
2925  				 */
2926  				if ((jfs_dirent->position < 2) ||
2927  				    (jfs_dirent->position >=
2928  				     JFS_IP(ip)->next_index)) {
2929  					if (!page_fixed && !isReadOnly(ip)) {
2930  						fix_page = 1;
2931  						/*
2932  						 * setting overflow and setting
2933  						 * index to i will cause the
2934  						 * same page to be processed
2935  						 * again starting here
2936  						 */
2937  						overflow = 1;
2938  						index = i;
2939  						break;
2940  					}
2941  					jfs_dirent->position = unique_pos++;
2942  				}
2943  				/*
2944  				 * We add 1 to the index because we may
2945  				 * use a value of 2 internally, and NFSv4
2946  				 * doesn't like that.
2947  				 */
2948  				jfs_dirent->position++;
2949  			} else {
2950  				jfs_dirent->position = dtpos;
2951  				len = min(d_namleft, DTLHDRDATALEN_LEGACY);
2952  			}
2953  
2954  			/* copy the name of head/only segment */
2955  			outlen = jfs_strfromUCS_le(name_ptr, d->name, len,
2956  						   codepage);
2957  			jfs_dirent->name_len = outlen;
2958  
2959  			/* copy name in the additional segment(s) */
2960  			next = d->next;
2961  			while (next >= 0) {
2962  				t = (struct dtslot *) & p->slot[next];
2963  				name_ptr += outlen;
2964  				d_namleft -= len;
2965  				/* Sanity Check */
2966  				if (d_namleft == 0) {
2967  					jfs_error(ip->i_sb,
2968  						  "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n",
2969  						  (long)ip->i_ino,
2970  						  (long long)bn,
2971  						  i);
2972  					goto skip_one;
2973  				}
2974  				len = min(d_namleft, DTSLOTDATALEN);
2975  				outlen = jfs_strfromUCS_le(name_ptr, t->name,
2976  							   len, codepage);
2977  				jfs_dirent->name_len += outlen;
2978  
2979  				next = t->next;
2980  			}
2981  
2982  			jfs_dirents++;
2983  			jfs_dirent = next_jfs_dirent(jfs_dirent);
2984  skip_one:
2985  			if (!do_index)
2986  				dtoffset->index++;
2987  		}
2988  
2989  		if (!overflow) {
2990  			/* Point to next leaf page */
2991  			if (p->header.flag & BT_ROOT)
2992  				bn = 0;
2993  			else {
2994  				bn = le64_to_cpu(p->header.next);
2995  				index = 0;
2996  				/* update offset (pn:index) for new page */
2997  				if (!do_index) {
2998  					dtoffset->pn++;
2999  					dtoffset->index = 0;
3000  				}
3001  			}
3002  			page_fixed = 0;
3003  		}
3004  
3005  		/* unpin previous leaf page */
3006  		DT_PUTPAGE(mp);
3007  
3008  		jfs_dirent = (struct jfs_dirent *) dirent_buf;
3009  		while (jfs_dirents--) {
3010  			ctx->pos = jfs_dirent->position;
3011  			if (!dir_emit(ctx, jfs_dirent->name,
3012  				    jfs_dirent->name_len,
3013  				    jfs_dirent->ino, DT_UNKNOWN))
3014  				goto out;
3015  			jfs_dirent = next_jfs_dirent(jfs_dirent);
3016  		}
3017  
3018  		if (fix_page) {
3019  			add_missing_indices(ip, bn);
3020  			page_fixed = 1;
3021  		}
3022  
3023  		if (!overflow && (bn == 0)) {
3024  			ctx->pos = DIREND;
3025  			break;
3026  		}
3027  
3028  		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3029  		if (rc) {
3030  			free_page(dirent_buf);
3031  			return rc;
3032  		}
3033  	}
3034  
3035        out:
3036  	free_page(dirent_buf);
3037  
3038  	return rc;
3039  }
3040  
3041  
3042  /*
3043   *	dtReadFirst()
3044   *
3045   * function: get the leftmost page of the directory
3046   */
dtReadFirst(struct inode * ip,struct btstack * btstack)3047  static int dtReadFirst(struct inode *ip, struct btstack * btstack)
3048  {
3049  	int rc = 0;
3050  	s64 bn;
3051  	int psize = 288;	/* initial in-line directory */
3052  	struct metapage *mp;
3053  	dtpage_t *p;
3054  	s8 *stbl;
3055  	struct btframe *btsp;
3056  	pxd_t *xd;
3057  
3058  	BT_CLR(btstack);	/* reset stack */
3059  
3060  	/*
3061  	 *	descend leftmost path of the tree
3062  	 *
3063  	 * by convention, root bn = 0.
3064  	 */
3065  	for (bn = 0;;) {
3066  		DT_GETPAGE(ip, bn, mp, psize, p, rc);
3067  		if (rc)
3068  			return rc;
3069  
3070  		/*
3071  		 * leftmost leaf page
3072  		 */
3073  		if (p->header.flag & BT_LEAF) {
3074  			/* return leftmost entry */
3075  			btsp = btstack->top;
3076  			btsp->bn = bn;
3077  			btsp->index = 0;
3078  			btsp->mp = mp;
3079  
3080  			return 0;
3081  		}
3082  
3083  		/*
3084  		 * descend down to leftmost child page
3085  		 */
3086  		if (BT_STACK_FULL(btstack)) {
3087  			DT_PUTPAGE(mp);
3088  			jfs_error(ip->i_sb, "btstack overrun\n");
3089  			BT_STACK_DUMP(btstack);
3090  			return -EIO;
3091  		}
3092  		/* push (bn, index) of the parent page/entry */
3093  		BT_PUSH(btstack, bn, 0);
3094  
3095  		/* get the leftmost entry */
3096  		stbl = DT_GETSTBL(p);
3097  
3098  		if (stbl[0] < 0 || stbl[0] > 127) {
3099  			DT_PUTPAGE(mp);
3100  			jfs_error(ip->i_sb, "stbl[0] out of bound\n");
3101  			return -EIO;
3102  		}
3103  
3104  		xd = (pxd_t *) & p->slot[stbl[0]];
3105  
3106  		/* get the child page block address */
3107  		bn = addressPXD(xd);
3108  		psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize;
3109  
3110  		/* unpin the parent page */
3111  		DT_PUTPAGE(mp);
3112  	}
3113  }
3114  
3115  
3116  /*
3117   *	dtReadNext()
3118   *
3119   * function: get the page of the specified offset (pn:index)
3120   *
3121   * return: if (offset > eof), bn = -1;
3122   *
3123   * note: if index > nextindex of the target leaf page,
3124   * start with 1st entry of next leaf page;
3125   */
dtReadNext(struct inode * ip,loff_t * offset,struct btstack * btstack)3126  static int dtReadNext(struct inode *ip, loff_t * offset,
3127  		      struct btstack * btstack)
3128  {
3129  	int rc = 0;
3130  	struct dtoffset {
3131  		s16 pn;
3132  		s16 index;
3133  		s32 unused;
3134  	} *dtoffset = (struct dtoffset *) offset;
3135  	s64 bn;
3136  	struct metapage *mp;
3137  	dtpage_t *p;
3138  	int index;
3139  	int pn;
3140  	s8 *stbl;
3141  	struct btframe *btsp, *parent;
3142  	pxd_t *xd;
3143  
3144  	/*
3145  	 * get leftmost leaf page pinned
3146  	 */
3147  	if ((rc = dtReadFirst(ip, btstack)))
3148  		return rc;
3149  
3150  	/* get leaf page */
3151  	DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
3152  
3153  	/* get the start offset (pn:index) */
3154  	pn = dtoffset->pn - 1;	/* Now pn = 0 represents leftmost leaf */
3155  	index = dtoffset->index;
3156  
3157  	/* start at leftmost page ? */
3158  	if (pn == 0) {
3159  		/* offset beyond eof ? */
3160  		if (index < p->header.nextindex)
3161  			goto out;
3162  
3163  		if (p->header.flag & BT_ROOT) {
3164  			bn = -1;
3165  			goto out;
3166  		}
3167  
3168  		/* start with 1st entry of next leaf page */
3169  		dtoffset->pn++;
3170  		dtoffset->index = index = 0;
3171  		goto a;
3172  	}
3173  
3174  	/* start at non-leftmost page: scan parent pages for large pn */
3175  	if (p->header.flag & BT_ROOT) {
3176  		bn = -1;
3177  		goto out;
3178  	}
3179  
3180  	/* start after next leaf page ? */
3181  	if (pn > 1)
3182  		goto b;
3183  
3184  	/* get leaf page pn = 1 */
3185        a:
3186  	bn = le64_to_cpu(p->header.next);
3187  
3188  	/* unpin leaf page */
3189  	DT_PUTPAGE(mp);
3190  
3191  	/* offset beyond eof ? */
3192  	if (bn == 0) {
3193  		bn = -1;
3194  		goto out;
3195  	}
3196  
3197  	goto c;
3198  
3199  	/*
3200  	 * scan last internal page level to get target leaf page
3201  	 */
3202        b:
3203  	/* unpin leftmost leaf page */
3204  	DT_PUTPAGE(mp);
3205  
3206  	/* get left most parent page */
3207  	btsp = btstack->top;
3208  	parent = btsp - 1;
3209  	bn = parent->bn;
3210  	DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3211  	if (rc)
3212  		return rc;
3213  
3214  	/* scan parent pages at last internal page level */
3215  	while (pn >= p->header.nextindex) {
3216  		pn -= p->header.nextindex;
3217  
3218  		/* get next parent page address */
3219  		bn = le64_to_cpu(p->header.next);
3220  
3221  		/* unpin current parent page */
3222  		DT_PUTPAGE(mp);
3223  
3224  		/* offset beyond eof ? */
3225  		if (bn == 0) {
3226  			bn = -1;
3227  			goto out;
3228  		}
3229  
3230  		/* get next parent page */
3231  		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3232  		if (rc)
3233  			return rc;
3234  
3235  		/* update parent page stack frame */
3236  		parent->bn = bn;
3237  	}
3238  
3239  	/* get leaf page address */
3240  	stbl = DT_GETSTBL(p);
3241  	xd = (pxd_t *) & p->slot[stbl[pn]];
3242  	bn = addressPXD(xd);
3243  
3244  	/* unpin parent page */
3245  	DT_PUTPAGE(mp);
3246  
3247  	/*
3248  	 * get target leaf page
3249  	 */
3250        c:
3251  	DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3252  	if (rc)
3253  		return rc;
3254  
3255  	/*
3256  	 * leaf page has been completed:
3257  	 * start with 1st entry of next leaf page
3258  	 */
3259  	if (index >= p->header.nextindex) {
3260  		bn = le64_to_cpu(p->header.next);
3261  
3262  		/* unpin leaf page */
3263  		DT_PUTPAGE(mp);
3264  
3265  		/* offset beyond eof ? */
3266  		if (bn == 0) {
3267  			bn = -1;
3268  			goto out;
3269  		}
3270  
3271  		/* get next leaf page */
3272  		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3273  		if (rc)
3274  			return rc;
3275  
3276  		/* start with 1st entry of next leaf page */
3277  		dtoffset->pn++;
3278  		dtoffset->index = 0;
3279  	}
3280  
3281        out:
3282  	/* return target leaf page pinned */
3283  	btsp = btstack->top;
3284  	btsp->bn = bn;
3285  	btsp->index = dtoffset->index;
3286  	btsp->mp = mp;
3287  
3288  	return 0;
3289  }
3290  
3291  
3292  /*
3293   *	dtCompare()
3294   *
3295   * function: compare search key with an internal entry
3296   *
3297   * return:
3298   *	< 0 if k is < record
3299   *	= 0 if k is = record
3300   *	> 0 if k is > record
3301   */
dtCompare(struct component_name * key,dtpage_t * p,int si)3302  static int dtCompare(struct component_name * key,	/* search key */
3303  		     dtpage_t * p,	/* directory page */
3304  		     int si)
3305  {				/* entry slot index */
3306  	wchar_t *kname;
3307  	__le16 *name;
3308  	int klen, namlen, len, rc;
3309  	struct idtentry *ih;
3310  	struct dtslot *t;
3311  
3312  	/*
3313  	 * force the left-most key on internal pages, at any level of
3314  	 * the tree, to be less than any search key.
3315  	 * this obviates having to update the leftmost key on an internal
3316  	 * page when the user inserts a new key in the tree smaller than
3317  	 * anything that has been stored.
3318  	 *
3319  	 * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3320  	 * at any internal page at any level of the tree,
3321  	 * it descends to child of the entry anyway -
3322  	 * ? make the entry as min size dummy entry)
3323  	 *
3324  	 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3325  	 * return (1);
3326  	 */
3327  
3328  	kname = key->name;
3329  	klen = key->namlen;
3330  
3331  	ih = (struct idtentry *) & p->slot[si];
3332  	si = ih->next;
3333  	name = ih->name;
3334  	namlen = ih->namlen;
3335  	len = min(namlen, DTIHDRDATALEN);
3336  
3337  	/* compare with head/only segment */
3338  	len = min(klen, len);
3339  	if ((rc = UniStrncmp_le(kname, name, len)))
3340  		return rc;
3341  
3342  	klen -= len;
3343  	namlen -= len;
3344  
3345  	/* compare with additional segment(s) */
3346  	kname += len;
3347  	while (klen > 0 && namlen > 0) {
3348  		/* compare with next name segment */
3349  		t = (struct dtslot *) & p->slot[si];
3350  		len = min(namlen, DTSLOTDATALEN);
3351  		len = min(klen, len);
3352  		name = t->name;
3353  		if ((rc = UniStrncmp_le(kname, name, len)))
3354  			return rc;
3355  
3356  		klen -= len;
3357  		namlen -= len;
3358  		kname += len;
3359  		si = t->next;
3360  	}
3361  
3362  	return (klen - namlen);
3363  }
3364  
3365  
3366  
3367  
3368  /*
3369   *	ciCompare()
3370   *
3371   * function: compare search key with an (leaf/internal) entry
3372   *
3373   * return:
3374   *	< 0 if k is < record
3375   *	= 0 if k is = record
3376   *	> 0 if k is > record
3377   */
ciCompare(struct component_name * key,dtpage_t * p,int si,int flag)3378  static int ciCompare(struct component_name * key,	/* search key */
3379  		     dtpage_t * p,	/* directory page */
3380  		     int si,	/* entry slot index */
3381  		     int flag)
3382  {
3383  	wchar_t *kname, x;
3384  	__le16 *name;
3385  	int klen, namlen, len, rc;
3386  	struct ldtentry *lh;
3387  	struct idtentry *ih;
3388  	struct dtslot *t;
3389  	int i;
3390  
3391  	/*
3392  	 * force the left-most key on internal pages, at any level of
3393  	 * the tree, to be less than any search key.
3394  	 * this obviates having to update the leftmost key on an internal
3395  	 * page when the user inserts a new key in the tree smaller than
3396  	 * anything that has been stored.
3397  	 *
3398  	 * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3399  	 * at any internal page at any level of the tree,
3400  	 * it descends to child of the entry anyway -
3401  	 * ? make the entry as min size dummy entry)
3402  	 *
3403  	 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3404  	 * return (1);
3405  	 */
3406  
3407  	kname = key->name;
3408  	klen = key->namlen;
3409  
3410  	/*
3411  	 * leaf page entry
3412  	 */
3413  	if (p->header.flag & BT_LEAF) {
3414  		lh = (struct ldtentry *) & p->slot[si];
3415  		si = lh->next;
3416  		name = lh->name;
3417  		namlen = lh->namlen;
3418  		if (flag & JFS_DIR_INDEX)
3419  			len = min(namlen, DTLHDRDATALEN);
3420  		else
3421  			len = min(namlen, DTLHDRDATALEN_LEGACY);
3422  	}
3423  	/*
3424  	 * internal page entry
3425  	 */
3426  	else {
3427  		ih = (struct idtentry *) & p->slot[si];
3428  		si = ih->next;
3429  		name = ih->name;
3430  		namlen = ih->namlen;
3431  		len = min(namlen, DTIHDRDATALEN);
3432  	}
3433  
3434  	/* compare with head/only segment */
3435  	len = min(klen, len);
3436  	for (i = 0; i < len; i++, kname++, name++) {
3437  		/* only uppercase if case-insensitive support is on */
3438  		if ((flag & JFS_OS2) == JFS_OS2)
3439  			x = UniToupper(le16_to_cpu(*name));
3440  		else
3441  			x = le16_to_cpu(*name);
3442  		if ((rc = *kname - x))
3443  			return rc;
3444  	}
3445  
3446  	klen -= len;
3447  	namlen -= len;
3448  
3449  	/* compare with additional segment(s) */
3450  	while (klen > 0 && namlen > 0) {
3451  		/* compare with next name segment */
3452  		t = (struct dtslot *) & p->slot[si];
3453  		len = min(namlen, DTSLOTDATALEN);
3454  		len = min(klen, len);
3455  		name = t->name;
3456  		for (i = 0; i < len; i++, kname++, name++) {
3457  			/* only uppercase if case-insensitive support is on */
3458  			if ((flag & JFS_OS2) == JFS_OS2)
3459  				x = UniToupper(le16_to_cpu(*name));
3460  			else
3461  				x = le16_to_cpu(*name);
3462  
3463  			if ((rc = *kname - x))
3464  				return rc;
3465  		}
3466  
3467  		klen -= len;
3468  		namlen -= len;
3469  		si = t->next;
3470  	}
3471  
3472  	return (klen - namlen);
3473  }
3474  
3475  
3476  /*
3477   *	ciGetLeafPrefixKey()
3478   *
3479   * function: compute prefix of suffix compression
3480   *	     from two adjacent leaf entries
3481   *	     across page boundary
3482   *
3483   * return: non-zero on error
3484   *
3485   */
ciGetLeafPrefixKey(dtpage_t * lp,int li,dtpage_t * rp,int ri,struct component_name * key,int flag)3486  static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
3487  			       int ri, struct component_name * key, int flag)
3488  {
3489  	int klen, namlen;
3490  	wchar_t *pl, *pr, *kname;
3491  	struct component_name lkey;
3492  	struct component_name rkey;
3493  
3494  	lkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t),
3495  					GFP_KERNEL);
3496  	if (lkey.name == NULL)
3497  		return -ENOMEM;
3498  
3499  	rkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t),
3500  					GFP_KERNEL);
3501  	if (rkey.name == NULL) {
3502  		kfree(lkey.name);
3503  		return -ENOMEM;
3504  	}
3505  
3506  	/* get left and right key */
3507  	dtGetKey(lp, li, &lkey, flag);
3508  	lkey.name[lkey.namlen] = 0;
3509  
3510  	if ((flag & JFS_OS2) == JFS_OS2)
3511  		ciToUpper(&lkey);
3512  
3513  	dtGetKey(rp, ri, &rkey, flag);
3514  	rkey.name[rkey.namlen] = 0;
3515  
3516  
3517  	if ((flag & JFS_OS2) == JFS_OS2)
3518  		ciToUpper(&rkey);
3519  
3520  	/* compute prefix */
3521  	klen = 0;
3522  	kname = key->name;
3523  	namlen = min(lkey.namlen, rkey.namlen);
3524  	for (pl = lkey.name, pr = rkey.name;
3525  	     namlen; pl++, pr++, namlen--, klen++, kname++) {
3526  		*kname = *pr;
3527  		if (*pl != *pr) {
3528  			key->namlen = klen + 1;
3529  			goto free_names;
3530  		}
3531  	}
3532  
3533  	/* l->namlen <= r->namlen since l <= r */
3534  	if (lkey.namlen < rkey.namlen) {
3535  		*kname = *pr;
3536  		key->namlen = klen + 1;
3537  	} else			/* l->namelen == r->namelen */
3538  		key->namlen = klen;
3539  
3540  free_names:
3541  	kfree(lkey.name);
3542  	kfree(rkey.name);
3543  	return 0;
3544  }
3545  
3546  
3547  
3548  /*
3549   *	dtGetKey()
3550   *
3551   * function: get key of the entry
3552   */
dtGetKey(dtpage_t * p,int i,struct component_name * key,int flag)3553  static void dtGetKey(dtpage_t * p, int i,	/* entry index */
3554  		     struct component_name * key, int flag)
3555  {
3556  	int si;
3557  	s8 *stbl;
3558  	struct ldtentry *lh;
3559  	struct idtentry *ih;
3560  	struct dtslot *t;
3561  	int namlen, len;
3562  	wchar_t *kname;
3563  	__le16 *name;
3564  
3565  	/* get entry */
3566  	stbl = DT_GETSTBL(p);
3567  	si = stbl[i];
3568  	if (p->header.flag & BT_LEAF) {
3569  		lh = (struct ldtentry *) & p->slot[si];
3570  		si = lh->next;
3571  		namlen = lh->namlen;
3572  		name = lh->name;
3573  		if (flag & JFS_DIR_INDEX)
3574  			len = min(namlen, DTLHDRDATALEN);
3575  		else
3576  			len = min(namlen, DTLHDRDATALEN_LEGACY);
3577  	} else {
3578  		ih = (struct idtentry *) & p->slot[si];
3579  		si = ih->next;
3580  		namlen = ih->namlen;
3581  		name = ih->name;
3582  		len = min(namlen, DTIHDRDATALEN);
3583  	}
3584  
3585  	key->namlen = namlen;
3586  	kname = key->name;
3587  
3588  	/*
3589  	 * move head/only segment
3590  	 */
3591  	UniStrncpy_from_le(kname, name, len);
3592  
3593  	/*
3594  	 * move additional segment(s)
3595  	 */
3596  	while (si >= 0) {
3597  		/* get next segment */
3598  		t = &p->slot[si];
3599  		kname += len;
3600  		namlen -= len;
3601  		len = min(namlen, DTSLOTDATALEN);
3602  		UniStrncpy_from_le(kname, t->name, len);
3603  
3604  		si = t->next;
3605  	}
3606  }
3607  
3608  
3609  /*
3610   *	dtInsertEntry()
3611   *
3612   * function: allocate free slot(s) and
3613   *	     write a leaf/internal entry
3614   *
3615   * return: entry slot index
3616   */
dtInsertEntry(dtpage_t * p,int index,struct component_name * key,ddata_t * data,struct dt_lock ** dtlock)3617  static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
3618  			  ddata_t * data, struct dt_lock ** dtlock)
3619  {
3620  	struct dtslot *h, *t;
3621  	struct ldtentry *lh = NULL;
3622  	struct idtentry *ih = NULL;
3623  	int hsi, fsi, klen, len, nextindex;
3624  	wchar_t *kname;
3625  	__le16 *name;
3626  	s8 *stbl;
3627  	pxd_t *xd;
3628  	struct dt_lock *dtlck = *dtlock;
3629  	struct lv *lv;
3630  	int xsi, n;
3631  	s64 bn = 0;
3632  	struct metapage *mp = NULL;
3633  
3634  	klen = key->namlen;
3635  	kname = key->name;
3636  
3637  	/* allocate a free slot */
3638  	hsi = fsi = p->header.freelist;
3639  	h = &p->slot[fsi];
3640  	p->header.freelist = h->next;
3641  	--p->header.freecnt;
3642  
3643  	/* open new linelock */
3644  	if (dtlck->index >= dtlck->maxcnt)
3645  		dtlck = (struct dt_lock *) txLinelock(dtlck);
3646  
3647  	lv = & dtlck->lv[dtlck->index];
3648  	lv->offset = hsi;
3649  
3650  	/* write head/only segment */
3651  	if (p->header.flag & BT_LEAF) {
3652  		lh = (struct ldtentry *) h;
3653  		lh->next = h->next;
3654  		lh->inumber = cpu_to_le32(data->leaf.ino);
3655  		lh->namlen = klen;
3656  		name = lh->name;
3657  		if (data->leaf.ip) {
3658  			len = min(klen, DTLHDRDATALEN);
3659  			if (!(p->header.flag & BT_ROOT))
3660  				bn = addressPXD(&p->header.self);
3661  			lh->index = cpu_to_le32(add_index(data->leaf.tid,
3662  							  data->leaf.ip,
3663  							  bn, index));
3664  		} else
3665  			len = min(klen, DTLHDRDATALEN_LEGACY);
3666  	} else {
3667  		ih = (struct idtentry *) h;
3668  		ih->next = h->next;
3669  		xd = (pxd_t *) ih;
3670  		*xd = data->xd;
3671  		ih->namlen = klen;
3672  		name = ih->name;
3673  		len = min(klen, DTIHDRDATALEN);
3674  	}
3675  
3676  	UniStrncpy_to_le(name, kname, len);
3677  
3678  	n = 1;
3679  	xsi = hsi;
3680  
3681  	/* write additional segment(s) */
3682  	t = h;
3683  	klen -= len;
3684  	while (klen) {
3685  		/* get free slot */
3686  		fsi = p->header.freelist;
3687  		t = &p->slot[fsi];
3688  		p->header.freelist = t->next;
3689  		--p->header.freecnt;
3690  
3691  		/* is next slot contiguous ? */
3692  		if (fsi != xsi + 1) {
3693  			/* close current linelock */
3694  			lv->length = n;
3695  			dtlck->index++;
3696  
3697  			/* open new linelock */
3698  			if (dtlck->index < dtlck->maxcnt)
3699  				lv++;
3700  			else {
3701  				dtlck = (struct dt_lock *) txLinelock(dtlck);
3702  				lv = & dtlck->lv[0];
3703  			}
3704  
3705  			lv->offset = fsi;
3706  			n = 0;
3707  		}
3708  
3709  		kname += len;
3710  		len = min(klen, DTSLOTDATALEN);
3711  		UniStrncpy_to_le(t->name, kname, len);
3712  
3713  		n++;
3714  		xsi = fsi;
3715  		klen -= len;
3716  	}
3717  
3718  	/* close current linelock */
3719  	lv->length = n;
3720  	dtlck->index++;
3721  
3722  	*dtlock = dtlck;
3723  
3724  	/* terminate last/only segment */
3725  	if (h == t) {
3726  		/* single segment entry */
3727  		if (p->header.flag & BT_LEAF)
3728  			lh->next = -1;
3729  		else
3730  			ih->next = -1;
3731  	} else
3732  		/* multi-segment entry */
3733  		t->next = -1;
3734  
3735  	/* if insert into middle, shift right succeeding entries in stbl */
3736  	stbl = DT_GETSTBL(p);
3737  	nextindex = p->header.nextindex;
3738  	if (index < nextindex) {
3739  		memmove(stbl + index + 1, stbl + index, nextindex - index);
3740  
3741  		if ((p->header.flag & BT_LEAF) && data->leaf.ip) {
3742  			s64 lblock;
3743  
3744  			/*
3745  			 * Need to update slot number for entries that moved
3746  			 * in the stbl
3747  			 */
3748  			mp = NULL;
3749  			for (n = index + 1; n <= nextindex; n++) {
3750  				lh = (struct ldtentry *) & (p->slot[stbl[n]]);
3751  				modify_index(data->leaf.tid, data->leaf.ip,
3752  					     le32_to_cpu(lh->index), bn, n,
3753  					     &mp, &lblock);
3754  			}
3755  			if (mp)
3756  				release_metapage(mp);
3757  		}
3758  	}
3759  
3760  	stbl[index] = hsi;
3761  
3762  	/* advance next available entry index of stbl */
3763  	++p->header.nextindex;
3764  }
3765  
3766  
3767  /*
3768   *	dtMoveEntry()
3769   *
3770   * function: move entries from split/left page to new/right page
3771   *
3772   *	nextindex of dst page and freelist/freecnt of both pages
3773   *	are updated.
3774   */
dtMoveEntry(dtpage_t * sp,int si,dtpage_t * dp,struct dt_lock ** sdtlock,struct dt_lock ** ddtlock,int do_index)3775  static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
3776  			struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
3777  			int do_index)
3778  {
3779  	int ssi, next;		/* src slot index */
3780  	int di;			/* dst entry index */
3781  	int dsi;		/* dst slot index */
3782  	s8 *sstbl, *dstbl;	/* sorted entry table */
3783  	int snamlen, len;
3784  	struct ldtentry *slh, *dlh = NULL;
3785  	struct idtentry *sih, *dih = NULL;
3786  	struct dtslot *h, *s, *d;
3787  	struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock;
3788  	struct lv *slv, *dlv;
3789  	int xssi, ns, nd;
3790  	int sfsi;
3791  
3792  	sstbl = (s8 *) & sp->slot[sp->header.stblindex];
3793  	dstbl = (s8 *) & dp->slot[dp->header.stblindex];
3794  
3795  	dsi = dp->header.freelist;	/* first (whole page) free slot */
3796  	sfsi = sp->header.freelist;
3797  
3798  	/* linelock destination entry slot */
3799  	dlv = & ddtlck->lv[ddtlck->index];
3800  	dlv->offset = dsi;
3801  
3802  	/* linelock source entry slot */
3803  	slv = & sdtlck->lv[sdtlck->index];
3804  	slv->offset = sstbl[si];
3805  	xssi = slv->offset - 1;
3806  
3807  	/*
3808  	 * move entries
3809  	 */
3810  	ns = nd = 0;
3811  	for (di = 0; si < sp->header.nextindex; si++, di++) {
3812  		ssi = sstbl[si];
3813  		dstbl[di] = dsi;
3814  
3815  		/* is next slot contiguous ? */
3816  		if (ssi != xssi + 1) {
3817  			/* close current linelock */
3818  			slv->length = ns;
3819  			sdtlck->index++;
3820  
3821  			/* open new linelock */
3822  			if (sdtlck->index < sdtlck->maxcnt)
3823  				slv++;
3824  			else {
3825  				sdtlck = (struct dt_lock *) txLinelock(sdtlck);
3826  				slv = & sdtlck->lv[0];
3827  			}
3828  
3829  			slv->offset = ssi;
3830  			ns = 0;
3831  		}
3832  
3833  		/*
3834  		 * move head/only segment of an entry
3835  		 */
3836  		/* get dst slot */
3837  		h = d = &dp->slot[dsi];
3838  
3839  		/* get src slot and move */
3840  		s = &sp->slot[ssi];
3841  		if (sp->header.flag & BT_LEAF) {
3842  			/* get source entry */
3843  			slh = (struct ldtentry *) s;
3844  			dlh = (struct ldtentry *) h;
3845  			snamlen = slh->namlen;
3846  
3847  			if (do_index) {
3848  				len = min(snamlen, DTLHDRDATALEN);
3849  				dlh->index = slh->index; /* little-endian */
3850  			} else
3851  				len = min(snamlen, DTLHDRDATALEN_LEGACY);
3852  
3853  			memcpy(dlh, slh, 6 + len * 2);
3854  
3855  			next = slh->next;
3856  
3857  			/* update dst head/only segment next field */
3858  			dsi++;
3859  			dlh->next = dsi;
3860  		} else {
3861  			sih = (struct idtentry *) s;
3862  			snamlen = sih->namlen;
3863  
3864  			len = min(snamlen, DTIHDRDATALEN);
3865  			dih = (struct idtentry *) h;
3866  			memcpy(dih, sih, 10 + len * 2);
3867  			next = sih->next;
3868  
3869  			dsi++;
3870  			dih->next = dsi;
3871  		}
3872  
3873  		/* free src head/only segment */
3874  		s->next = sfsi;
3875  		s->cnt = 1;
3876  		sfsi = ssi;
3877  
3878  		ns++;
3879  		nd++;
3880  		xssi = ssi;
3881  
3882  		/*
3883  		 * move additional segment(s) of the entry
3884  		 */
3885  		snamlen -= len;
3886  		while ((ssi = next) >= 0) {
3887  			/* is next slot contiguous ? */
3888  			if (ssi != xssi + 1) {
3889  				/* close current linelock */
3890  				slv->length = ns;
3891  				sdtlck->index++;
3892  
3893  				/* open new linelock */
3894  				if (sdtlck->index < sdtlck->maxcnt)
3895  					slv++;
3896  				else {
3897  					sdtlck =
3898  					    (struct dt_lock *)
3899  					    txLinelock(sdtlck);
3900  					slv = & sdtlck->lv[0];
3901  				}
3902  
3903  				slv->offset = ssi;
3904  				ns = 0;
3905  			}
3906  
3907  			/* get next source segment */
3908  			s = &sp->slot[ssi];
3909  
3910  			/* get next destination free slot */
3911  			d++;
3912  
3913  			len = min(snamlen, DTSLOTDATALEN);
3914  			UniStrncpy_le(d->name, s->name, len);
3915  
3916  			ns++;
3917  			nd++;
3918  			xssi = ssi;
3919  
3920  			dsi++;
3921  			d->next = dsi;
3922  
3923  			/* free source segment */
3924  			next = s->next;
3925  			s->next = sfsi;
3926  			s->cnt = 1;
3927  			sfsi = ssi;
3928  
3929  			snamlen -= len;
3930  		}		/* end while */
3931  
3932  		/* terminate dst last/only segment */
3933  		if (h == d) {
3934  			/* single segment entry */
3935  			if (dp->header.flag & BT_LEAF)
3936  				dlh->next = -1;
3937  			else
3938  				dih->next = -1;
3939  		} else
3940  			/* multi-segment entry */
3941  			d->next = -1;
3942  	}			/* end for */
3943  
3944  	/* close current linelock */
3945  	slv->length = ns;
3946  	sdtlck->index++;
3947  	*sdtlock = sdtlck;
3948  
3949  	dlv->length = nd;
3950  	ddtlck->index++;
3951  	*ddtlock = ddtlck;
3952  
3953  	/* update source header */
3954  	sp->header.freelist = sfsi;
3955  	sp->header.freecnt += nd;
3956  
3957  	/* update destination header */
3958  	dp->header.nextindex = di;
3959  
3960  	dp->header.freelist = dsi;
3961  	dp->header.freecnt -= nd;
3962  }
3963  
3964  
3965  /*
3966   *	dtDeleteEntry()
3967   *
3968   * function: free a (leaf/internal) entry
3969   *
3970   * log freelist header, stbl, and each segment slot of entry
3971   * (even though last/only segment next field is modified,
3972   * physical image logging requires all segment slots of
3973   * the entry logged to avoid applying previous updates
3974   * to the same slots)
3975   */
dtDeleteEntry(dtpage_t * p,int fi,struct dt_lock ** dtlock)3976  static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock)
3977  {
3978  	int fsi;		/* free entry slot index */
3979  	s8 *stbl;
3980  	struct dtslot *t;
3981  	int si, freecnt;
3982  	struct dt_lock *dtlck = *dtlock;
3983  	struct lv *lv;
3984  	int xsi, n;
3985  
3986  	/* get free entry slot index */
3987  	stbl = DT_GETSTBL(p);
3988  	fsi = stbl[fi];
3989  
3990  	/* open new linelock */
3991  	if (dtlck->index >= dtlck->maxcnt)
3992  		dtlck = (struct dt_lock *) txLinelock(dtlck);
3993  	lv = & dtlck->lv[dtlck->index];
3994  
3995  	lv->offset = fsi;
3996  
3997  	/* get the head/only segment */
3998  	t = &p->slot[fsi];
3999  	if (p->header.flag & BT_LEAF)
4000  		si = ((struct ldtentry *) t)->next;
4001  	else
4002  		si = ((struct idtentry *) t)->next;
4003  	t->next = si;
4004  	t->cnt = 1;
4005  
4006  	n = freecnt = 1;
4007  	xsi = fsi;
4008  
4009  	/* find the last/only segment */
4010  	while (si >= 0) {
4011  		/* is next slot contiguous ? */
4012  		if (si != xsi + 1) {
4013  			/* close current linelock */
4014  			lv->length = n;
4015  			dtlck->index++;
4016  
4017  			/* open new linelock */
4018  			if (dtlck->index < dtlck->maxcnt)
4019  				lv++;
4020  			else {
4021  				dtlck = (struct dt_lock *) txLinelock(dtlck);
4022  				lv = & dtlck->lv[0];
4023  			}
4024  
4025  			lv->offset = si;
4026  			n = 0;
4027  		}
4028  
4029  		n++;
4030  		xsi = si;
4031  		freecnt++;
4032  
4033  		t = &p->slot[si];
4034  		t->cnt = 1;
4035  		si = t->next;
4036  	}
4037  
4038  	/* close current linelock */
4039  	lv->length = n;
4040  	dtlck->index++;
4041  
4042  	*dtlock = dtlck;
4043  
4044  	/* update freelist */
4045  	t->next = p->header.freelist;
4046  	p->header.freelist = fsi;
4047  	p->header.freecnt += freecnt;
4048  
4049  	/* if delete from middle,
4050  	 * shift left the succedding entries in the stbl
4051  	 */
4052  	si = p->header.nextindex;
4053  	if (fi < si - 1)
4054  		memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1);
4055  
4056  	p->header.nextindex--;
4057  }
4058  
4059  
4060  /*
4061   *	dtTruncateEntry()
4062   *
4063   * function: truncate a (leaf/internal) entry
4064   *
4065   * log freelist header, stbl, and each segment slot of entry
4066   * (even though last/only segment next field is modified,
4067   * physical image logging requires all segment slots of
4068   * the entry logged to avoid applying previous updates
4069   * to the same slots)
4070   */
dtTruncateEntry(dtpage_t * p,int ti,struct dt_lock ** dtlock)4071  static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock)
4072  {
4073  	int tsi;		/* truncate entry slot index */
4074  	s8 *stbl;
4075  	struct dtslot *t;
4076  	int si, freecnt;
4077  	struct dt_lock *dtlck = *dtlock;
4078  	struct lv *lv;
4079  	int fsi, xsi, n;
4080  
4081  	/* get free entry slot index */
4082  	stbl = DT_GETSTBL(p);
4083  	tsi = stbl[ti];
4084  
4085  	/* open new linelock */
4086  	if (dtlck->index >= dtlck->maxcnt)
4087  		dtlck = (struct dt_lock *) txLinelock(dtlck);
4088  	lv = & dtlck->lv[dtlck->index];
4089  
4090  	lv->offset = tsi;
4091  
4092  	/* get the head/only segment */
4093  	t = &p->slot[tsi];
4094  	ASSERT(p->header.flag & BT_INTERNAL);
4095  	((struct idtentry *) t)->namlen = 0;
4096  	si = ((struct idtentry *) t)->next;
4097  	((struct idtentry *) t)->next = -1;
4098  
4099  	n = 1;
4100  	freecnt = 0;
4101  	fsi = si;
4102  	xsi = tsi;
4103  
4104  	/* find the last/only segment */
4105  	while (si >= 0) {
4106  		/* is next slot contiguous ? */
4107  		if (si != xsi + 1) {
4108  			/* close current linelock */
4109  			lv->length = n;
4110  			dtlck->index++;
4111  
4112  			/* open new linelock */
4113  			if (dtlck->index < dtlck->maxcnt)
4114  				lv++;
4115  			else {
4116  				dtlck = (struct dt_lock *) txLinelock(dtlck);
4117  				lv = & dtlck->lv[0];
4118  			}
4119  
4120  			lv->offset = si;
4121  			n = 0;
4122  		}
4123  
4124  		n++;
4125  		xsi = si;
4126  		freecnt++;
4127  
4128  		t = &p->slot[si];
4129  		t->cnt = 1;
4130  		si = t->next;
4131  	}
4132  
4133  	/* close current linelock */
4134  	lv->length = n;
4135  	dtlck->index++;
4136  
4137  	*dtlock = dtlck;
4138  
4139  	/* update freelist */
4140  	if (freecnt == 0)
4141  		return;
4142  	t->next = p->header.freelist;
4143  	p->header.freelist = fsi;
4144  	p->header.freecnt += freecnt;
4145  }
4146  
4147  
4148  /*
4149   *	dtLinelockFreelist()
4150   */
dtLinelockFreelist(dtpage_t * p,int m,struct dt_lock ** dtlock)4151  static void dtLinelockFreelist(dtpage_t * p,	/* directory page */
4152  			       int m,	/* max slot index */
4153  			       struct dt_lock ** dtlock)
4154  {
4155  	int fsi;		/* free entry slot index */
4156  	struct dtslot *t;
4157  	int si;
4158  	struct dt_lock *dtlck = *dtlock;
4159  	struct lv *lv;
4160  	int xsi, n;
4161  
4162  	/* get free entry slot index */
4163  	fsi = p->header.freelist;
4164  
4165  	/* open new linelock */
4166  	if (dtlck->index >= dtlck->maxcnt)
4167  		dtlck = (struct dt_lock *) txLinelock(dtlck);
4168  	lv = & dtlck->lv[dtlck->index];
4169  
4170  	lv->offset = fsi;
4171  
4172  	n = 1;
4173  	xsi = fsi;
4174  
4175  	t = &p->slot[fsi];
4176  	si = t->next;
4177  
4178  	/* find the last/only segment */
4179  	while (si < m && si >= 0) {
4180  		/* is next slot contiguous ? */
4181  		if (si != xsi + 1) {
4182  			/* close current linelock */
4183  			lv->length = n;
4184  			dtlck->index++;
4185  
4186  			/* open new linelock */
4187  			if (dtlck->index < dtlck->maxcnt)
4188  				lv++;
4189  			else {
4190  				dtlck = (struct dt_lock *) txLinelock(dtlck);
4191  				lv = & dtlck->lv[0];
4192  			}
4193  
4194  			lv->offset = si;
4195  			n = 0;
4196  		}
4197  
4198  		n++;
4199  		xsi = si;
4200  
4201  		t = &p->slot[si];
4202  		si = t->next;
4203  	}
4204  
4205  	/* close current linelock */
4206  	lv->length = n;
4207  	dtlck->index++;
4208  
4209  	*dtlock = dtlck;
4210  }
4211  
4212  
4213  /*
4214   * NAME: dtModify
4215   *
4216   * FUNCTION: Modify the inode number part of a directory entry
4217   *
4218   * PARAMETERS:
4219   *	tid	- Transaction id
4220   *	ip	- Inode of parent directory
4221   *	key	- Name of entry to be modified
4222   *	orig_ino	- Original inode number expected in entry
4223   *	new_ino	- New inode number to put into entry
4224   *	flag	- JFS_RENAME
4225   *
4226   * RETURNS:
4227   *	-ESTALE	- If entry found does not match orig_ino passed in
4228   *	-ENOENT	- If no entry can be found to match key
4229   *	0	- If successfully modified entry
4230   */
dtModify(tid_t tid,struct inode * ip,struct component_name * key,ino_t * orig_ino,ino_t new_ino,int flag)4231  int dtModify(tid_t tid, struct inode *ip,
4232  	 struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag)
4233  {
4234  	int rc;
4235  	s64 bn;
4236  	struct metapage *mp;
4237  	dtpage_t *p;
4238  	int index;
4239  	struct btstack btstack;
4240  	struct tlock *tlck;
4241  	struct dt_lock *dtlck;
4242  	struct lv *lv;
4243  	s8 *stbl;
4244  	int entry_si;		/* entry slot index */
4245  	struct ldtentry *entry;
4246  
4247  	/*
4248  	 *	search for the entry to modify:
4249  	 *
4250  	 * dtSearch() returns (leaf page pinned, index at which to modify).
4251  	 */
4252  	if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag)))
4253  		return rc;
4254  
4255  	/* retrieve search result */
4256  	DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
4257  
4258  	BT_MARK_DIRTY(mp, ip);
4259  	/*
4260  	 * acquire a transaction lock on the leaf page of named entry
4261  	 */
4262  	tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
4263  	dtlck = (struct dt_lock *) & tlck->lock;
4264  
4265  	/* get slot index of the entry */
4266  	stbl = DT_GETSTBL(p);
4267  	entry_si = stbl[index];
4268  
4269  	/* linelock entry */
4270  	ASSERT(dtlck->index == 0);
4271  	lv = & dtlck->lv[0];
4272  	lv->offset = entry_si;
4273  	lv->length = 1;
4274  	dtlck->index++;
4275  
4276  	/* get the head/only segment */
4277  	entry = (struct ldtentry *) & p->slot[entry_si];
4278  
4279  	/* substitute the inode number of the entry */
4280  	entry->inumber = cpu_to_le32(new_ino);
4281  
4282  	/* unpin the leaf page */
4283  	DT_PUTPAGE(mp);
4284  
4285  	return 0;
4286  }
4287