1 /* 2 * Copyright 2000-2002 by Hans Reiser, licensing governed by reiserfs/README 3 * 4 * GRUB -- GRand Unified Bootloader 5 * Copyright (C) 2000, 2001 Free Software Foundation, Inc. 6 * 7 * (C) Copyright 2003 - 2004 8 * Sysgo AG, <www.elinos.com>, Pavel Bartusek <pba@sysgo.com> 9 * 10 * 11 * SPDX-License-Identifier: GPL-2.0+ 12 */ 13 14 /* An implementation for the ReiserFS filesystem ported from GRUB. 15 * Some parts of this code (mainly the structures and defines) are 16 * from the original reiser fs code, as found in the linux kernel. 17 */ 18 19 #ifndef __BYTE_ORDER 20 #if defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN) 21 #define __BYTE_ORDER __LITTLE_ENDIAN 22 #elif defined(__BIG_ENDIAN) && !defined(__LITTLE_ENDIAN) 23 #define __BYTE_ORDER __BIG_ENDIAN 24 #else 25 #error "unable to define __BYTE_ORDER" 26 #endif 27 #endif /* not __BYTE_ORDER */ 28 29 #define FSYS_BUFLEN 0x8000 30 #define FSYS_BUF fsys_buf 31 32 /* This is the new super block of a journaling reiserfs system */ 33 struct reiserfs_super_block 34 { 35 __u32 s_block_count; /* blocks count */ 36 __u32 s_free_blocks; /* free blocks count */ 37 __u32 s_root_block; /* root block number */ 38 __u32 s_journal_block; /* journal block number */ 39 __u32 s_journal_dev; /* journal device number */ 40 __u32 s_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */ 41 __u32 s_journal_trans_max; /* max number of blocks in a transaction. */ 42 __u32 s_journal_magic; /* random value made on fs creation */ 43 __u32 s_journal_max_batch; /* max number of blocks to batch into a trans */ 44 __u32 s_journal_max_commit_age; /* in seconds, how old can an async commit be */ 45 __u32 s_journal_max_trans_age; /* in seconds, how old can a transaction be */ 46 __u16 s_blocksize; /* block size */ 47 __u16 s_oid_maxsize; /* max size of object id array */ 48 __u16 s_oid_cursize; /* current size of object id array */ 49 __u16 s_state; /* valid or error */ 50 char s_magic[16]; /* reiserfs magic string indicates that file system is reiserfs */ 51 __u16 s_tree_height; /* height of disk tree */ 52 __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */ 53 __u16 s_version; 54 char s_unused[128]; /* zero filled by mkreiserfs */ 55 }; 56 57 58 #define sb_root_block(sbp) (__le32_to_cpu((sbp)->s_root_block)) 59 #define sb_journal_block(sbp) (__le32_to_cpu((sbp)->s_journal_block)) 60 #define set_sb_journal_block(sbp,v) ((sbp)->s_journal_block = __cpu_to_le32(v)) 61 #define sb_journal_size(sbp) (__le32_to_cpu((sbp)->s_journal_size)) 62 #define sb_blocksize(sbp) (__le16_to_cpu((sbp)->s_blocksize)) 63 #define set_sb_blocksize(sbp,v) ((sbp)->s_blocksize = __cpu_to_le16(v)) 64 #define sb_version(sbp) (__le16_to_cpu((sbp)->s_version)) 65 #define set_sb_version(sbp,v) ((sbp)->s_version = __cpu_to_le16(v)) 66 67 68 #define REISERFS_MAX_SUPPORTED_VERSION 2 69 #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" 70 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" 71 #define REISER3FS_SUPER_MAGIC_STRING "ReIsEr3Fs" 72 73 #define MAX_HEIGHT 7 74 75 /* must be correct to keep the desc and commit structs at 4k */ 76 #define JOURNAL_TRANS_HALF 1018 77 78 /* first block written in a commit. */ 79 struct reiserfs_journal_desc { 80 __u32 j_trans_id; /* id of commit */ 81 __u32 j_len; /* length of commit. len +1 is the commit block */ 82 __u32 j_mount_id; /* mount id of this trans*/ 83 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the first blocks */ 84 char j_magic[12]; 85 }; 86 87 /* last block written in a commit */ 88 struct reiserfs_journal_commit { 89 __u32 j_trans_id; /* must match j_trans_id from the desc block */ 90 __u32 j_len; /* ditto */ 91 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the last blocks */ 92 char j_digest[16]; /* md5 sum of all the blocks involved, including desc and commit. not used, kill it */ 93 }; 94 95 /* this header block gets written whenever a transaction is considered 96 fully flushed, and is more recent than the last fully flushed 97 transaction. 98 fully flushed means all the log blocks and all the real blocks are 99 on disk, and this transaction does not need to be replayed. 100 */ 101 struct reiserfs_journal_header { 102 /* id of last fully flushed transaction */ 103 __u32 j_last_flush_trans_id; 104 /* offset in the log of where to start replay after a crash */ 105 __u32 j_first_unflushed_offset; 106 /* mount id to detect very old transactions */ 107 __u32 j_mount_id; 108 }; 109 110 /* magic string to find desc blocks in the journal */ 111 #define JOURNAL_DESC_MAGIC "ReIsErLB" 112 113 114 /* 115 * directories use this key as well as old files 116 */ 117 struct offset_v1 118 { 119 /* 120 * for regular files this is the offset to the first byte of the 121 * body, contained in the object-item, as measured from the start of 122 * the entire body of the object. 123 * 124 * for directory entries, k_offset consists of hash derived from 125 * hashing the name and using few bits (23 or more) of the resulting 126 * hash, and generation number that allows distinguishing names with 127 * hash collisions. If number of collisions overflows generation 128 * number, we return EEXIST. High order bit is 0 always 129 */ 130 __u32 k_offset; 131 __u32 k_uniqueness; 132 }; 133 134 struct offset_v2 { 135 /* 136 * for regular files this is the offset to the first byte of the 137 * body, contained in the object-item, as measured from the start of 138 * the entire body of the object. 139 * 140 * for directory entries, k_offset consists of hash derived from 141 * hashing the name and using few bits (23 or more) of the resulting 142 * hash, and generation number that allows distinguishing names with 143 * hash collisions. If number of collisions overflows generation 144 * number, we return EEXIST. High order bit is 0 always 145 */ 146 147 #if defined(__LITTLE_ENDIAN_BITFIELD) 148 /* little endian version */ 149 __u64 k_offset:60; 150 __u64 k_type: 4; 151 #elif defined(__BIG_ENDIAN_BITFIELD) 152 /* big endian version */ 153 __u64 k_type: 4; 154 __u64 k_offset:60; 155 #else 156 #error "__LITTLE_ENDIAN_BITFIELD or __BIG_ENDIAN_BITFIELD must be defined" 157 #endif 158 } __attribute__ ((__packed__)); 159 160 #define TYPE_MAXTYPE 3 161 #define TYPE_ANY 15 162 163 #if (__BYTE_ORDER == __BIG_ENDIAN) 164 typedef union { 165 struct offset_v2 offset_v2; 166 __u64 linear; 167 } __attribute__ ((__packed__)) offset_v2_esafe_overlay; 168 169 static inline __u16 offset_v2_k_type( const struct offset_v2 *v2 ) 170 { 171 offset_v2_esafe_overlay tmp = *(const offset_v2_esafe_overlay *)v2; 172 tmp.linear = __le64_to_cpu( tmp.linear ); 173 return (tmp.offset_v2.k_type <= TYPE_MAXTYPE)?tmp.offset_v2.k_type:TYPE_ANY; 174 } 175 176 static inline loff_t offset_v2_k_offset( const struct offset_v2 *v2 ) 177 { 178 offset_v2_esafe_overlay tmp = *(const offset_v2_esafe_overlay *)v2; 179 tmp.linear = __le64_to_cpu( tmp.linear ); 180 return tmp.offset_v2.k_offset; 181 } 182 #elif (__BYTE_ORDER == __LITTLE_ENDIAN) 183 # define offset_v2_k_type(v2) ((v2)->k_type) 184 # define offset_v2_k_offset(v2) ((v2)->k_offset) 185 #else 186 #error "__BYTE_ORDER must be __LITTLE_ENDIAN or __BIG_ENDIAN" 187 #endif 188 189 struct key 190 { 191 /* packing locality: by default parent directory object id */ 192 __u32 k_dir_id; 193 /* object identifier */ 194 __u32 k_objectid; 195 /* the offset and node type (old and new form) */ 196 union 197 { 198 struct offset_v1 v1; 199 struct offset_v2 v2; 200 } 201 u; 202 }; 203 204 #define KEY_SIZE (sizeof (struct key)) 205 206 /* Header of a disk block. More precisely, header of a formatted leaf 207 or internal node, and not the header of an unformatted node. */ 208 struct block_head 209 { 210 __u16 blk_level; /* Level of a block in the tree. */ 211 __u16 blk_nr_item; /* Number of keys/items in a block. */ 212 __u16 blk_free_space; /* Block free space in bytes. */ 213 struct key blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes 214 only) */ 215 }; 216 #define BLKH_SIZE (sizeof (struct block_head)) 217 #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ 218 219 struct item_head 220 { 221 /* Everything in the tree is found by searching for it based on 222 * its key.*/ 223 struct key ih_key; 224 union { 225 /* The free space in the last unformatted node of an 226 indirect item if this is an indirect item. This 227 equals 0xFFFF iff this is a direct item or stat data 228 item. Note that the key, not this field, is used to 229 determine the item type, and thus which field this 230 union contains. */ 231 __u16 ih_free_space; 232 /* Iff this is a directory item, this field equals the 233 number of directory entries in the directory item. */ 234 __u16 ih_entry_count; 235 } __attribute__ ((__packed__)) u; 236 __u16 ih_item_len; /* total size of the item body */ 237 __u16 ih_item_location; /* an offset to the item body 238 * within the block */ 239 __u16 ih_version; /* 0 for all old items, 2 for new 240 ones. Highest bit is set by fsck 241 temporary, cleaned after all 242 done */ 243 } __attribute__ ((__packed__)); 244 245 /* size of item header */ 246 #define IH_SIZE (sizeof (struct item_head)) 247 248 #define ITEM_VERSION_1 0 249 #define ITEM_VERSION_2 1 250 251 #define ih_version(ih) (__le16_to_cpu((ih)->ih_version)) 252 253 #define IH_KEY_OFFSET(ih) (ih_version(ih) == ITEM_VERSION_1 \ 254 ? __le32_to_cpu((ih)->ih_key.u.v1.k_offset) \ 255 : offset_v2_k_offset(&((ih)->ih_key.u.v2))) 256 257 #define IH_KEY_ISTYPE(ih, type) (ih_version(ih) == ITEM_VERSION_1 \ 258 ? __le32_to_cpu((ih)->ih_key.u.v1.k_uniqueness) == V1_##type \ 259 : offset_v2_k_type(&((ih)->ih_key.u.v2)) == V2_##type) 260 261 /***************************************************************************/ 262 /* DISK CHILD */ 263 /***************************************************************************/ 264 /* Disk child pointer: The pointer from an internal node of the tree 265 to a node that is on disk. */ 266 struct disk_child { 267 __u32 dc_block_number; /* Disk child's block number. */ 268 __u16 dc_size; /* Disk child's used space. */ 269 __u16 dc_reserved; 270 }; 271 272 #define DC_SIZE (sizeof(struct disk_child)) 273 #define dc_block_number(dc_p) (__le32_to_cpu((dc_p)->dc_block_number)) 274 275 276 /* 277 * old stat data is 32 bytes long. We are going to distinguish new one by 278 * different size 279 */ 280 struct stat_data_v1 281 { 282 __u16 sd_mode; /* file type, permissions */ 283 __u16 sd_nlink; /* number of hard links */ 284 __u16 sd_uid; /* owner */ 285 __u16 sd_gid; /* group */ 286 __u32 sd_size; /* file size */ 287 __u32 sd_atime; /* time of last access */ 288 __u32 sd_mtime; /* time file was last modified */ 289 __u32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ 290 union { 291 __u32 sd_rdev; 292 __u32 sd_blocks; /* number of blocks file uses */ 293 } __attribute__ ((__packed__)) u; 294 __u32 sd_first_direct_byte; /* first byte of file which is stored 295 in a direct item: except that if it 296 equals 1 it is a symlink and if it 297 equals ~(__u32)0 there is no 298 direct item. The existence of this 299 field really grates on me. Let's 300 replace it with a macro based on 301 sd_size and our tail suppression 302 policy. Someday. -Hans */ 303 } __attribute__ ((__packed__)); 304 305 #define stat_data_v1(ih) (ih_version(ih) == ITEM_VERSION_1) 306 #define sd_v1_mode(sdp) ((sdp)->sd_mode) 307 #define sd_v1_nlink(sdp) (__le16_to_cpu((sdp)->sd_nlink)) 308 #define sd_v1_uid(sdp) (__le16_to_cpu((sdp)->sd_uid)) 309 #define sd_v1_gid(sdp) (__le16_to_cpu((sdp)->sd_gid)) 310 #define sd_v1_size(sdp) (__le32_to_cpu((sdp)->sd_size)) 311 #define sd_v1_mtime(sdp) (__le32_to_cpu((sdp)->sd_mtime)) 312 313 /* Stat Data on disk (reiserfs version of UFS disk inode minus the 314 address blocks) */ 315 struct stat_data { 316 __u16 sd_mode; /* file type, permissions */ 317 __u16 sd_attrs; /* persistent inode flags */ 318 __u32 sd_nlink; /* number of hard links */ 319 __u64 sd_size; /* file size */ 320 __u32 sd_uid; /* owner */ 321 __u32 sd_gid; /* group */ 322 __u32 sd_atime; /* time of last access */ 323 __u32 sd_mtime; /* time file was last modified */ 324 __u32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ 325 __u32 sd_blocks; 326 union { 327 __u32 sd_rdev; 328 __u32 sd_generation; 329 /*__u32 sd_first_direct_byte; */ 330 /* first byte of file which is stored in a 331 direct item: except that if it equals 1 332 it is a symlink and if it equals 333 ~(__u32)0 there is no direct item. The 334 existence of this field really grates 335 on me. Let's replace it with a macro 336 based on sd_size and our tail 337 suppression policy? */ 338 } __attribute__ ((__packed__)) u; 339 } __attribute__ ((__packed__)); 340 341 #define stat_data_v2(ih) (ih_version(ih) == ITEM_VERSION_2) 342 #define sd_v2_mode(sdp) (__le16_to_cpu((sdp)->sd_mode)) 343 #define sd_v2_nlink(sdp) (__le32_to_cpu((sdp)->sd_nlink)) 344 #define sd_v2_size(sdp) (__le64_to_cpu((sdp)->sd_size)) 345 #define sd_v2_uid(sdp) (__le32_to_cpu((sdp)->sd_uid)) 346 #define sd_v2_gid(sdp) (__le32_to_cpu((sdp)->sd_gid)) 347 #define sd_v2_mtime(sdp) (__le32_to_cpu((sdp)->sd_mtime)) 348 349 #define sd_mode(sdp) (__le16_to_cpu((sdp)->sd_mode)) 350 #define sd_size(sdp) (__le32_to_cpu((sdp)->sd_size)) 351 #define sd_size_hi(sdp) (__le32_to_cpu((sdp)->sd_size_hi)) 352 353 struct reiserfs_de_head 354 { 355 __u32 deh_offset; /* third component of the directory entry key */ 356 __u32 deh_dir_id; /* objectid of the parent directory of the 357 object, that is referenced by directory entry */ 358 __u32 deh_objectid;/* objectid of the object, that is referenced by 359 directory entry */ 360 __u16 deh_location;/* offset of name in the whole item */ 361 __u16 deh_state; /* whether 1) entry contains stat data (for 362 future), and 2) whether entry is hidden 363 (unlinked) */ 364 }; 365 366 #define DEH_SIZE (sizeof (struct reiserfs_de_head)) 367 #define deh_offset(p_deh) (__le32_to_cpu((p_deh)->deh_offset)) 368 #define deh_dir_id(p_deh) (__le32_to_cpu((p_deh)->deh_dir_id)) 369 #define deh_objectid(p_deh) (__le32_to_cpu((p_deh)->deh_objectid)) 370 #define deh_location(p_deh) (__le16_to_cpu((p_deh)->deh_location)) 371 #define deh_state(p_deh) (__le16_to_cpu((p_deh)->deh_state)) 372 373 374 #define DEH_Statdata (1 << 0) /* not used now */ 375 #define DEH_Visible (1 << 2) 376 377 #define SD_OFFSET 0 378 #define SD_UNIQUENESS 0 379 #define DOT_OFFSET 1 380 #define DOT_DOT_OFFSET 2 381 #define DIRENTRY_UNIQUENESS 500 382 383 #define V1_TYPE_STAT_DATA 0x0 384 #define V1_TYPE_DIRECT 0xffffffff 385 #define V1_TYPE_INDIRECT 0xfffffffe 386 #define V1_TYPE_DIRECTORY_MAX 0xfffffffd 387 #define V2_TYPE_STAT_DATA 0 388 #define V2_TYPE_INDIRECT 1 389 #define V2_TYPE_DIRECT 2 390 #define V2_TYPE_DIRENTRY 3 391 392 #define REISERFS_ROOT_OBJECTID 2 393 #define REISERFS_ROOT_PARENT_OBJECTID 1 394 #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) 395 /* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */ 396 #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024) 397 #define REISERFS_OLD_BLOCKSIZE 4096 398 399 #define S_ISREG(mode) (((mode) & 0170000) == 0100000) 400 #define S_ISDIR(mode) (((mode) & 0170000) == 0040000) 401 #define S_ISLNK(mode) (((mode) & 0170000) == 0120000) 402 403 #define PATH_MAX 1024 /* include/linux/limits.h */ 404 #define MAX_LINK_COUNT 5 /* number of symbolic links to follow */ 405 406 /* The size of the node cache */ 407 #define FSYSREISER_CACHE_SIZE 24*1024 408 #define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE 409 #define FSYSREISER_MAX_BLOCKSIZE FSYSREISER_CACHE_SIZE / 3 410 411 /* Info about currently opened file */ 412 struct fsys_reiser_fileinfo 413 { 414 __u32 k_dir_id; 415 __u32 k_objectid; 416 }; 417 418 /* In memory info about the currently mounted filesystem */ 419 struct fsys_reiser_info 420 { 421 /* The last read item head */ 422 struct item_head *current_ih; 423 /* The last read item */ 424 char *current_item; 425 /* The information for the currently opened file */ 426 struct fsys_reiser_fileinfo fileinfo; 427 /* The start of the journal */ 428 __u32 journal_block; 429 /* The size of the journal */ 430 __u32 journal_block_count; 431 /* The first valid descriptor block in journal 432 (relative to journal_block) */ 433 __u32 journal_first_desc; 434 435 /* The ReiserFS version. */ 436 __u16 version; 437 /* The current depth of the reiser tree. */ 438 __u16 tree_depth; 439 /* SECTOR_SIZE << blocksize_shift == blocksize. */ 440 __u8 blocksize_shift; 441 /* 1 << full_blocksize_shift == blocksize. */ 442 __u8 fullblocksize_shift; 443 /* The reiserfs block size (must be a power of 2) */ 444 __u16 blocksize; 445 /* The number of cached tree nodes */ 446 __u16 cached_slots; 447 /* The number of valid transactions in journal */ 448 __u16 journal_transactions; 449 450 unsigned int blocks[MAX_HEIGHT]; 451 unsigned int next_key_nr[MAX_HEIGHT]; 452 }; 453 454 /* The cached s+tree blocks in FSYS_BUF, see below 455 * for a more detailed description. 456 */ 457 #define ROOT ((char *) ((int) FSYS_BUF)) 458 #define CACHE(i) (ROOT + ((i) << INFO->fullblocksize_shift)) 459 #define LEAF CACHE (DISK_LEAF_NODE_LEVEL) 460 461 #define BLOCKHEAD(cache) ((struct block_head *) cache) 462 #define ITEMHEAD ((struct item_head *) ((int) LEAF + BLKH_SIZE)) 463 #define KEY(cache) ((struct key *) ((int) cache + BLKH_SIZE)) 464 #define DC(cache) ((struct disk_child *) \ 465 ((int) cache + BLKH_SIZE + KEY_SIZE * nr_item)) 466 /* The fsys_reiser_info block. 467 */ 468 #define INFO \ 469 ((struct fsys_reiser_info *) ((int) FSYS_BUF + FSYSREISER_CACHE_SIZE)) 470 /* 471 * The journal cache. For each transaction it contains the number of 472 * blocks followed by the real block numbers of this transaction. 473 * 474 * If the block numbers of some transaction won't fit in this space, 475 * this list is stopped with a 0xffffffff marker and the remaining 476 * uncommitted transactions aren't cached. 477 */ 478 #define JOURNAL_START ((__u32 *) (INFO + 1)) 479 #define JOURNAL_END ((__u32 *) (FSYS_BUF + FSYS_BUFLEN)) 480 481 482 static __inline__ unsigned long 483 log2 (unsigned long word) 484 { 485 #ifdef __I386__ 486 __asm__ ("bsfl %1,%0" 487 : "=r" (word) 488 : "r" (word)); 489 return word; 490 #else 491 int i; 492 493 for(i=0; i<(8*sizeof(word)); i++) 494 if ((1<<i) & word) 495 return i; 496 497 return 0; 498 #endif 499 } 500 501 static __inline__ int 502 is_power_of_two (unsigned long word) 503 { 504 return (word & -word) == word; 505 } 506 507 extern const char *bb_mode_string(int mode); 508 extern int reiserfs_devread (int sector, int byte_offset, int byte_len, char *buf); 509