ppc64: Don't set Kp bit on SLB
[openbios/afaerber.git] / fs / grubfs / fsys_reiserfs.c
blobb94a335291d9e6e7dd5fecddbf9314fa8ea9afd7
1 /* fsys_reiserfs.c - an implementation for the ReiserFS filesystem */
2 /*
3 * GRUB -- GRand Unified Bootloader
4 * Copyright (C) 2000, 2001 Free Software Foundation, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
19 * MA 02110-1301, USA.
22 #ifdef FSYS_REISERFS
23 #include "shared.h"
24 #include "filesys.h"
26 #undef REISERDEBUG
28 /* Some parts of this code (mainly the structures and defines) are
29 * from the original reiser fs code, as found in the linux kernel.
32 /* include/asm-i386/types.h */
33 typedef __signed__ char __s8;
34 typedef unsigned char __u8;
35 typedef __signed__ short __s16;
36 typedef unsigned short __u16;
37 typedef __signed__ int __s32;
38 typedef unsigned int __u32;
39 typedef unsigned long long __u64;
41 /* linux/posix_type.h */
42 typedef long linux_off_t;
44 #include "libc/byteorder.h"
46 /* include/linux/reiser_fs.h */
47 /* This is the new super block of a journaling reiserfs system */
48 struct reiserfs_super_block
50 __u32 s_block_count; /* blocks count */
51 __u32 s_free_blocks; /* free blocks count */
52 __u32 s_root_block; /* root block number */
53 __u32 s_journal_block; /* journal block number */
54 __u32 s_journal_dev; /* journal device number */
55 __u32 s_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */
56 __u32 s_journal_trans_max; /* max number of blocks in a transaction. */
57 __u32 s_journal_magic; /* random value made on fs creation */
58 __u32 s_journal_max_batch; /* max number of blocks to batch into a trans */
59 __u32 s_journal_max_commit_age; /* in seconds, how old can an async commit be */
60 __u32 s_journal_max_trans_age; /* in seconds, how old can a transaction be */
61 __u16 s_blocksize; /* block size */
62 __u16 s_oid_maxsize; /* max size of object id array */
63 __u16 s_oid_cursize; /* current size of object id array */
64 __u16 s_state; /* valid or error */
65 char s_magic[16]; /* reiserfs magic string indicates that file system is reiserfs */
66 __u16 s_tree_height; /* height of disk tree */
67 __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */
68 __u16 s_version;
69 char s_unused[128]; /* zero filled by mkreiserfs */
72 #define REISERFS_MAX_SUPPORTED_VERSION 2
73 #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
74 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
76 #define MAX_HEIGHT 7
78 /* must be correct to keep the desc and commit structs at 4k */
79 #define JOURNAL_TRANS_HALF 1018
81 /* first block written in a commit. */
82 struct reiserfs_journal_desc {
83 __u32 j_trans_id; /* id of commit */
84 __u32 j_len; /* length of commit. len +1 is the commit block */
85 __u32 j_mount_id; /* mount id of this trans*/
86 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the first blocks */
87 char j_magic[12];
90 /* last block written in a commit */
91 struct reiserfs_journal_commit {
92 __u32 j_trans_id; /* must match j_trans_id from the desc block */
93 __u32 j_len; /* ditto */
94 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the last blocks */
95 char j_digest[16]; /* md5 sum of all the blocks involved, including desc and commit. not used, kill it */
98 /* this header block gets written whenever a transaction is considered
99 fully flushed, and is more recent than the last fully flushed
100 transaction.
101 fully flushed means all the log blocks and all the real blocks are
102 on disk, and this transaction does not need to be replayed.
104 struct reiserfs_journal_header {
105 /* id of last fully flushed transaction */
106 __u32 j_last_flush_trans_id;
107 /* offset in the log of where to start replay after a crash */
108 __u32 j_first_unflushed_offset;
109 /* mount id to detect very old transactions */
110 __u32 j_mount_id;
113 /* magic string to find desc blocks in the journal */
114 #define JOURNAL_DESC_MAGIC "ReIsErLB"
118 * directories use this key as well as old files
120 struct offset_v1
123 * for regular files this is the offset to the first byte of the
124 * body, contained in the object-item, as measured from the start of
125 * the entire body of the object.
127 * for directory entries, k_offset consists of hash derived from
128 * hashing the name and using few bits (23 or more) of the resulting
129 * hash, and generation number that allows distinguishing names with
130 * hash collisions. If number of collisions overflows generation
131 * number, we return EEXIST. High order bit is 0 always
133 __u32 k_offset;
134 __u32 k_uniqueness;
137 struct offset_v2
140 * for regular files this is the offset to the first byte of the
141 * body, contained in the object-item, as measured from the start of
142 * the entire body of the object.
144 * for directory entries, k_offset consists of hash derived from
145 * hashing the name and using few bits (23 or more) of the resulting
146 * hash, and generation number that allows distinguishing names with
147 * hash collisions. If number of collisions overflows generation
148 * number, we return EEXIST. High order bit is 0 always
150 __u64 k_offset:60;
151 __u64 k_type: 4;
155 struct key
157 /* packing locality: by default parent directory object id */
158 __u32 k_dir_id;
159 /* object identifier */
160 __u32 k_objectid;
161 /* the offset and node type (old and new form) */
162 union
164 struct offset_v1 v1;
165 struct offset_v2 v2;
170 #define KEY_SIZE (sizeof (struct key))
172 /* Header of a disk block. More precisely, header of a formatted leaf
173 or internal node, and not the header of an unformatted node. */
174 struct block_head
176 __u16 blk_level; /* Level of a block in the tree. */
177 __u16 blk_nr_item; /* Number of keys/items in a block. */
178 __u16 blk_free_space; /* Block free space in bytes. */
179 struct key blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes
180 only) */
182 #define BLKH_SIZE (sizeof (struct block_head))
183 #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */
185 struct item_head
187 struct key ih_key; /* Everything in the tree is found by searching for it based on its key.*/
189 union
191 __u16 ih_free_space; /* The free space in the last unformatted node of an indirect item if this
192 is an indirect item. This equals 0xFFFF iff this is a direct item or
193 stat data item. Note that the key, not this field, is used to determine
194 the item type, and thus which field this union contains. */
195 __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory
196 entries in the directory item. */
199 __u16 ih_item_len; /* total size of the item body */
200 __u16 ih_item_location; /* an offset to the item body within the block */
201 __u16 ih_version; /* ITEM_VERSION_1 for all old items,
202 ITEM_VERSION_2 for new ones.
203 Highest bit is set by fsck
204 temporary, cleaned after all done */
206 /* size of item header */
207 #define IH_SIZE (sizeof (struct item_head))
209 #define ITEM_VERSION_1 0
210 #define ITEM_VERSION_2 1
211 #define IH_KEY_OFFSET(ih) ((ih)->ih_version == ITEM_VERSION_1 \
212 ? (ih)->ih_key.u.v1.k_offset \
213 : (ih)->ih_key.u.v2.k_offset)
215 #define IH_KEY_ISTYPE(ih, type) ((ih)->ih_version == ITEM_VERSION_1 \
216 ? (ih)->ih_key.u.v1.k_uniqueness == V1_##type \
217 : (ih)->ih_key.u.v2.k_type == V2_##type)
219 /* FIXME these types look wrong. */
220 struct disk_child
222 unsigned long dc_block_number; /* Disk child's block number. */
223 unsigned short dc_size; /* Disk child's used space. */
226 #define DC_SIZE (sizeof (struct disk_child))
228 /* Stat Data on disk.
230 * Note that reiserfs has two different forms of stat data. Luckily
231 * the fields needed by grub are at the same position.
233 struct stat_data
235 __u16 sd_mode; /* file type, permissions */
236 __u16 sd_notused1[3]; /* fields not needed by reiserfs */
237 __u32 sd_size; /* file size */
238 __u32 sd_size_hi; /* file size high 32 bits (since version 2) */
241 struct reiserfs_de_head
243 __u32 deh_offset; /* third component of the directory entry key */
244 __u32 deh_dir_id; /* objectid of the parent directory of the
245 object, that is referenced by directory entry */
246 __u32 deh_objectid;/* objectid of the object, that is referenced by
247 directory entry */
248 __u16 deh_location;/* offset of name in the whole item */
249 __u16 deh_state; /* whether 1) entry contains stat data (for
250 future), and 2) whether entry is hidden
251 (unlinked) */
254 #define DEH_SIZE (sizeof (struct reiserfs_de_head))
256 #define DEH_Statdata (1 << 0) /* not used now */
257 #define DEH_Visible (1 << 2)
259 #define SD_OFFSET 0
260 #define SD_UNIQUENESS 0
261 #define DOT_OFFSET 1
262 #define DOT_DOT_OFFSET 2
263 #define DIRENTRY_UNIQUENESS 500
265 #define V1_TYPE_STAT_DATA 0x0
266 #define V1_TYPE_DIRECT 0xffffffff
267 #define V1_TYPE_INDIRECT 0xfffffffe
268 #define V1_TYPE_DIRECTORY_MAX 0xfffffffd
269 #define V2_TYPE_STAT_DATA 0
270 #define V2_TYPE_INDIRECT 1
271 #define V2_TYPE_DIRECT 2
272 #define V2_TYPE_DIRENTRY 3
274 #define REISERFS_ROOT_OBJECTID 2
275 #define REISERFS_ROOT_PARENT_OBJECTID 1
276 #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
277 /* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */
278 #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
279 #define REISERFS_OLD_BLOCKSIZE 4096
281 #define S_ISREG(mode) (((mode) & 0170000) == 0100000)
282 #define S_ISDIR(mode) (((mode) & 0170000) == 0040000)
283 #define S_ISLNK(mode) (((mode) & 0170000) == 0120000)
285 #define PATH_MAX 1024 /* include/linux/limits.h */
286 #define MAX_LINK_COUNT 5 /* number of symbolic links to follow */
288 /* The size of the node cache */
289 #define FSYSREISER_CACHE_SIZE 24*1024
290 #define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE
291 #define FSYSREISER_MAX_BLOCKSIZE FSYSREISER_CACHE_SIZE / 3
293 /* Info about currently opened file */
294 struct fsys_reiser_fileinfo
296 __u32 k_dir_id;
297 __u32 k_objectid;
300 /* In memory info about the currently mounted filesystem */
301 struct fsys_reiser_info
303 /* The last read item head */
304 struct item_head *current_ih;
305 /* The last read item */
306 char *current_item;
307 /* The information for the currently opened file */
308 struct fsys_reiser_fileinfo fileinfo;
309 /* The start of the journal */
310 __u32 journal_block;
311 /* The size of the journal */
312 __u32 journal_block_count;
313 /* The first valid descriptor block in journal
314 (relative to journal_block) */
315 __u32 journal_first_desc;
317 /* The ReiserFS version. */
318 __u16 version;
319 /* The current depth of the reiser tree. */
320 __u16 tree_depth;
321 /* SECTOR_SIZE << blocksize_shift == blocksize. */
322 __u8 blocksize_shift;
323 /* 1 << full_blocksize_shift == blocksize. */
324 __u8 fullblocksize_shift;
325 /* The reiserfs block size (must be a power of 2) */
326 __u16 blocksize;
327 /* The number of cached tree nodes */
328 __u16 cached_slots;
329 /* The number of valid transactions in journal */
330 __u16 journal_transactions;
332 unsigned int blocks[MAX_HEIGHT];
333 unsigned int next_key_nr[MAX_HEIGHT];
336 /* The cached s+tree blocks in FSYS_BUF, see below
337 * for a more detailed description.
339 #define ROOT ((char *)FSYS_BUF)
340 #define CACHE(i) (ROOT + ((i) << INFO->fullblocksize_shift))
341 #define LEAF CACHE (DISK_LEAF_NODE_LEVEL)
343 #define BLOCKHEAD(cache) ((struct block_head *) cache)
344 #define ITEMHEAD ((struct item_head *) ((char *) LEAF + BLKH_SIZE))
345 #define KEY(cache) ((struct key *) ((char *) cache + BLKH_SIZE))
346 #define DC(cache) ((struct disk_child *) \
347 ((char *) cache + BLKH_SIZE + KEY_SIZE * nr_item))
348 /* The fsys_reiser_info block.
350 #define INFO \
351 ((struct fsys_reiser_info *) ((char *) FSYS_BUF + FSYSREISER_CACHE_SIZE))
353 * The journal cache. For each transaction it contains the number of
354 * blocks followed by the real block numbers of this transaction.
356 * If the block numbers of some transaction won't fit in this space,
357 * this list is stopped with a 0xffffffff marker and the remaining
358 * uncommitted transactions aren't cached.
360 #define JOURNAL_START ((__u32 *) (INFO + 1))
361 #define JOURNAL_END ((__u32 *) (FSYS_BUF + FSYS_BUFLEN))
363 static __inline__ int
364 is_power_of_two (unsigned long word)
366 return (word & -word) == word;
369 static int
370 journal_read (int block, int len, char *buffer)
372 return devread ((INFO->journal_block + block) << INFO->blocksize_shift,
373 0, len, buffer);
376 /* Read a block from ReiserFS file system, taking the journal into
377 * account. If the block nr is in the journal, the block from the
378 * journal taken.
380 static int
381 block_read (int blockNr, int start, int len, char *buffer)
383 int transactions = INFO->journal_transactions;
384 int desc_block = INFO->journal_first_desc;
385 int journal_mask = INFO->journal_block_count - 1;
386 int translatedNr = blockNr;
387 __u32 *journal_table = JOURNAL_START;
388 while (transactions-- > 0)
390 int i = 0;
391 int j_len;
392 if (*journal_table != 0xffffffff)
394 /* Search for the blockNr in cached journal */
395 j_len = *journal_table++;
396 while (i++ < j_len)
398 if (*journal_table++ == blockNr)
400 journal_table += j_len - i;
401 goto found;
405 else
407 /* This is the end of cached journal marker. The remaining
408 * transactions are still on disk.
410 struct reiserfs_journal_desc desc;
411 struct reiserfs_journal_commit commit;
413 if (! journal_read (desc_block, sizeof (desc), (char *) &desc))
414 return 0;
416 j_len = desc.j_len;
417 while (i < j_len && i < JOURNAL_TRANS_HALF)
418 if (desc.j_realblock[i++] == blockNr)
419 goto found;
421 if (j_len >= JOURNAL_TRANS_HALF)
423 int commit_block = (desc_block + 1 + j_len) & journal_mask;
424 if (! journal_read (commit_block,
425 sizeof (commit), (char *) &commit))
426 return 0;
427 while (i < j_len)
428 if (commit.j_realblock[i++ - JOURNAL_TRANS_HALF] == blockNr)
429 goto found;
432 goto not_found;
434 found:
435 translatedNr = INFO->journal_block + ((desc_block + i) & journal_mask);
436 #ifdef REISERDEBUG
437 printf ("block_read: block %d is mapped to journal block %d.\n",
438 blockNr, translatedNr - INFO->journal_block);
439 #endif
440 /* We must continue the search, as this block may be overwritten
441 * in later transactions.
443 not_found:
444 desc_block = (desc_block + 2 + j_len) & journal_mask;
446 return devread (translatedNr << INFO->blocksize_shift, start, len, buffer);
449 /* Init the journal data structure. We try to cache as much as
450 * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
451 * we can still read the rest from the disk on demand.
453 * The first number of valid transactions and the descriptor block of the
454 * first valid transaction are held in INFO. The transactions are all
455 * adjacent, but we must take care of the journal wrap around.
457 static int
458 journal_init (void)
460 unsigned int block_count = INFO->journal_block_count;
461 unsigned int desc_block;
462 unsigned int commit_block;
463 unsigned int next_trans_id;
464 struct reiserfs_journal_header header;
465 struct reiserfs_journal_desc desc;
466 struct reiserfs_journal_commit commit;
467 __u32 *journal_table = JOURNAL_START;
469 journal_read (block_count, sizeof (header), (char *) &header);
470 desc_block = header.j_first_unflushed_offset;
471 if (desc_block >= block_count)
472 return 0;
474 INFO->journal_first_desc = desc_block;
475 next_trans_id = header.j_last_flush_trans_id + 1;
477 #ifdef REISERDEBUG
478 printf ("journal_init: last flushed %d\n",
479 header.j_last_flush_trans_id);
480 #endif
482 while (1)
484 journal_read (desc_block, sizeof (desc), (char *) &desc);
485 if (substring (JOURNAL_DESC_MAGIC, desc.j_magic) > 0
486 || desc.j_trans_id != next_trans_id
487 || desc.j_mount_id != header.j_mount_id)
488 /* no more valid transactions */
489 break;
491 commit_block = (desc_block + desc.j_len + 1) & (block_count - 1);
492 journal_read (commit_block, sizeof (commit), (char *) &commit);
493 if (desc.j_trans_id != commit.j_trans_id
494 || desc.j_len != commit.j_len)
495 /* no more valid transactions */
496 break;
498 #ifdef REISERDEBUG
499 printf ("Found valid transaction %d/%d at %d.\n",
500 desc.j_trans_id, desc.j_mount_id, desc_block);
501 #endif
503 next_trans_id++;
504 if (journal_table < JOURNAL_END)
506 if ((journal_table + 1 + desc.j_len) >= JOURNAL_END)
508 /* The table is almost full; mark the end of the cached
509 * journal.*/
510 *journal_table = 0xffffffff;
511 journal_table = JOURNAL_END;
513 else
515 int i;
516 /* Cache the length and the realblock numbers in the table.
517 * The block number of descriptor can easily be computed.
518 * and need not to be stored here.
520 *journal_table++ = desc.j_len;
521 for (i = 0; i < desc.j_len && i < JOURNAL_TRANS_HALF; i++)
523 *journal_table++ = desc.j_realblock[i];
524 #ifdef REISERDEBUG
525 printf ("block %d is in journal %d.\n",
526 desc.j_realblock[i], desc_block);
527 #endif
529 for ( ; i < desc.j_len; i++)
531 *journal_table++ = commit.j_realblock[i-JOURNAL_TRANS_HALF];
532 #ifdef REISERDEBUG
533 printf ("block %d is in journal %d.\n",
534 commit.j_realblock[i-JOURNAL_TRANS_HALF],
535 desc_block);
536 #endif
540 desc_block = (commit_block + 1) & (block_count - 1);
542 #ifdef REISERDEBUG
543 printf ("Transaction %d/%d at %d isn't valid.\n",
544 desc.j_trans_id, desc.j_mount_id, desc_block);
545 #endif
547 INFO->journal_transactions
548 = next_trans_id - header.j_last_flush_trans_id - 1;
549 return errnum == 0;
552 /* check filesystem types and read superblock into memory buffer */
554 reiserfs_mount (void)
556 struct reiserfs_super_block super;
557 int superblock = REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
559 if (part_length < superblock + (sizeof (super) >> SECTOR_BITS)
560 || ! devread (superblock, 0, sizeof (struct reiserfs_super_block),
561 (char *) &super)
562 || (substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
563 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
564 || (/* check that this is not a copy inside the journal log */
565 super.s_journal_block * super.s_blocksize
566 <= REISERFS_DISK_OFFSET_IN_BYTES))
568 /* Try old super block position */
569 superblock = REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
570 if (part_length < superblock + (sizeof (super) >> SECTOR_BITS)
571 || ! devread (superblock, 0, sizeof (struct reiserfs_super_block),
572 (char *) &super))
573 return 0;
575 if (substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
576 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
578 /* pre journaling super block ? */
579 if (substring (REISERFS_SUPER_MAGIC_STRING,
580 (char*) ((char *) &super + 20)) > 0)
581 return 0;
583 super.s_blocksize = REISERFS_OLD_BLOCKSIZE;
584 super.s_journal_block = 0;
585 super.s_version = 0;
589 /* check the version number. */
590 if (super.s_version > REISERFS_MAX_SUPPORTED_VERSION)
591 return 0;
593 INFO->version = super.s_version;
594 INFO->blocksize = super.s_blocksize;
595 INFO->fullblocksize_shift = log2 (super.s_blocksize);
596 INFO->blocksize_shift = INFO->fullblocksize_shift - SECTOR_BITS;
597 INFO->cached_slots =
598 (FSYSREISER_CACHE_SIZE >> INFO->fullblocksize_shift) - 1;
600 #ifdef REISERDEBUG
601 printf ("reiserfs_mount: version=%d, blocksize=%d\n",
602 INFO->version, INFO->blocksize);
603 #endif /* REISERDEBUG */
605 /* Clear node cache. */
606 memset (INFO->blocks, 0, sizeof (INFO->blocks));
608 if (super.s_blocksize < FSYSREISER_MIN_BLOCKSIZE
609 || super.s_blocksize > FSYSREISER_MAX_BLOCKSIZE
610 || (SECTOR_SIZE << INFO->blocksize_shift) != super.s_blocksize)
611 return 0;
613 /* Initialize journal code. If something fails we end with zero
614 * journal_transactions, so we don't access the journal at all.
616 INFO->journal_transactions = 0;
617 if (super.s_journal_block != 0 && super.s_journal_dev == 0)
619 INFO->journal_block = super.s_journal_block;
620 INFO->journal_block_count = super.s_journal_size;
621 if (is_power_of_two (INFO->journal_block_count))
622 journal_init ();
624 /* Read in super block again, maybe it is in the journal */
625 block_read (superblock >> INFO->blocksize_shift,
626 0, sizeof (struct reiserfs_super_block), (char *) &super);
629 if (! block_read (super.s_root_block, 0, INFO->blocksize, (char*) ROOT))
630 return 0;
632 INFO->tree_depth = BLOCKHEAD (ROOT)->blk_level;
634 #ifdef REISERDEBUG
635 printf ("root read_in: block=%d, depth=%d\n",
636 super.s_root_block, INFO->tree_depth);
637 #endif /* REISERDEBUG */
639 if (INFO->tree_depth >= MAX_HEIGHT)
640 return 0;
641 if (INFO->tree_depth == DISK_LEAF_NODE_LEVEL)
643 /* There is only one node in the whole filesystem,
644 * which is simultanously leaf and root */
645 memcpy (LEAF, ROOT, INFO->blocksize);
647 return 1;
650 /***************** TREE ACCESSING METHODS *****************************/
652 /* I assume you are familiar with the ReiserFS tree, if not go to
653 * http://www.namesys.com/content_table.html
655 * My tree node cache is organized as following
656 * 0 ROOT node
657 * 1 LEAF node (if the ROOT is also a LEAF it is copied here
658 * 2-n other nodes on current path from bottom to top.
659 * if there is not enough space in the cache, the top most are
660 * omitted.
662 * I have only two methods to find a key in the tree:
663 * search_stat(dir_id, objectid) searches for the stat entry (always
664 * the first entry) of an object.
665 * next_key() gets the next key in tree order.
667 * This means, that I can only sequential reads of files are
668 * efficient, but this really doesn't hurt for grub.
671 /* Read in the node at the current path and depth into the node cache.
672 * You must set INFO->blocks[depth] before.
674 static char *
675 read_tree_node (unsigned int blockNr, int depth)
677 char* cache = CACHE(depth);
678 int num_cached = INFO->cached_slots;
679 if (depth < num_cached)
681 /* This is the cached part of the path. Check if same block is
682 * needed.
684 if (blockNr == INFO->blocks[depth])
685 return cache;
687 else
688 cache = CACHE(num_cached);
690 #ifdef REISERDEBUG
691 printf (" next read_in: block=%d (depth=%d)\n",
692 blockNr, depth);
693 #endif /* REISERDEBUG */
694 if (! block_read (blockNr, 0, INFO->blocksize, cache))
695 return NULL;
696 /* Make sure it has the right node level */
697 if (BLOCKHEAD (cache)->blk_level != depth)
699 errnum = ERR_FSYS_CORRUPT;
700 return NULL;
703 INFO->blocks[depth] = blockNr;
704 return cache;
707 /* Get the next key, i.e. the key following the last retrieved key in
708 * tree order. INFO->current_ih and
709 * INFO->current_info are adapted accordingly. */
710 static int
711 next_key (void)
713 int depth;
714 struct item_head *ih = INFO->current_ih + 1;
715 char *cache;
717 #ifdef REISERDEBUG
718 printf ("next_key:\n old ih: key %d:%d:%d:%d version:%d\n",
719 INFO->current_ih->ih_key.k_dir_id,
720 INFO->current_ih->ih_key.k_objectid,
721 INFO->current_ih->ih_key.u.v1.k_offset,
722 INFO->current_ih->ih_key.u.v1.k_uniqueness,
723 INFO->current_ih->ih_version);
724 #endif /* REISERDEBUG */
726 if (ih == &ITEMHEAD[BLOCKHEAD (LEAF)->blk_nr_item])
728 depth = DISK_LEAF_NODE_LEVEL;
729 /* The last item, was the last in the leaf node.
730 * Read in the next block
734 if (depth == INFO->tree_depth)
736 /* There are no more keys at all.
737 * Return a dummy item with MAX_KEY */
738 ih = (struct item_head *) &BLOCKHEAD (LEAF)->blk_right_delim_key;
739 goto found;
741 depth++;
742 #ifdef REISERDEBUG
743 printf (" depth=%d, i=%d\n", depth, INFO->next_key_nr[depth]);
744 #endif /* REISERDEBUG */
746 while (INFO->next_key_nr[depth] == 0);
748 if (depth == INFO->tree_depth)
749 cache = ROOT;
750 else if (depth <= INFO->cached_slots)
751 cache = CACHE (depth);
752 else
754 cache = read_tree_node (INFO->blocks[depth], depth);
755 if (! cache)
756 return 0;
761 int nr_item = BLOCKHEAD (cache)->blk_nr_item;
762 int key_nr = INFO->next_key_nr[depth]++;
763 #ifdef REISERDEBUG
764 printf (" depth=%d, i=%d/%d\n", depth, key_nr, nr_item);
765 #endif /* REISERDEBUG */
766 if (key_nr == nr_item)
767 /* This is the last item in this block, set the next_key_nr to 0 */
768 INFO->next_key_nr[depth] = 0;
770 cache = read_tree_node (DC (cache)[key_nr].dc_block_number, --depth);
771 if (! cache)
772 return 0;
774 while (depth > DISK_LEAF_NODE_LEVEL);
776 ih = ITEMHEAD;
778 found:
779 INFO->current_ih = ih;
780 INFO->current_item = &LEAF[ih->ih_item_location];
781 #ifdef REISERDEBUG
782 printf (" new ih: key %d:%d:%d:%d version:%d\n",
783 INFO->current_ih->ih_key.k_dir_id,
784 INFO->current_ih->ih_key.k_objectid,
785 INFO->current_ih->ih_key.u.v1.k_offset,
786 INFO->current_ih->ih_key.u.v1.k_uniqueness,
787 INFO->current_ih->ih_version);
788 #endif /* REISERDEBUG */
789 return 1;
792 /* preconditions: reiserfs_mount already executed, therefore
793 * INFO block is valid
794 * returns: 0 if error (errnum is set),
795 * nonzero iff we were able to find the key successfully.
796 * postconditions: on a nonzero return, the current_ih and
797 * current_item fields describe the key that equals the
798 * searched key. INFO->next_key contains the next key after
799 * the searched key.
800 * side effects: messes around with the cache.
802 static int
803 search_stat (__u32 dir_id, __u32 objectid)
805 char *cache;
806 int depth;
807 int nr_item;
808 int i;
809 struct item_head *ih;
810 #ifdef REISERDEBUG
811 printf ("search_stat:\n key %d:%d:0:0\n", dir_id, objectid);
812 #endif /* REISERDEBUG */
814 depth = INFO->tree_depth;
815 cache = ROOT;
817 while (depth > DISK_LEAF_NODE_LEVEL)
819 struct key *key;
820 nr_item = BLOCKHEAD (cache)->blk_nr_item;
822 key = KEY (cache);
824 for (i = 0; i < nr_item; i++)
826 if (key->k_dir_id > dir_id
827 || (key->k_dir_id == dir_id
828 && (key->k_objectid > objectid
829 || (key->k_objectid == objectid
830 && (key->u.v1.k_offset
831 | key->u.v1.k_uniqueness) > 0))))
832 break;
833 key++;
836 #ifdef REISERDEBUG
837 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item);
838 #endif /* REISERDEBUG */
839 INFO->next_key_nr[depth] = (i == nr_item) ? 0 : i+1;
840 cache = read_tree_node (DC (cache)[i].dc_block_number, --depth);
841 if (! cache)
842 return 0;
845 /* cache == LEAF */
846 nr_item = BLOCKHEAD (LEAF)->blk_nr_item;
847 ih = ITEMHEAD;
848 for (i = 0; i < nr_item; i++)
850 if (ih->ih_key.k_dir_id == dir_id
851 && ih->ih_key.k_objectid == objectid
852 && ih->ih_key.u.v1.k_offset == 0
853 && ih->ih_key.u.v1.k_uniqueness == 0)
855 #ifdef REISERDEBUG
856 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item);
857 #endif /* REISERDEBUG */
858 INFO->current_ih = ih;
859 INFO->current_item = &LEAF[ih->ih_item_location];
860 return 1;
862 ih++;
864 errnum = ERR_FSYS_CORRUPT;
865 return 0;
869 reiserfs_read (char *buf, int len)
871 unsigned int blocksize;
872 unsigned int offset;
873 unsigned int to_read;
874 char *prev_buf = buf;
876 #ifdef REISERDEBUG
877 printf ("reiserfs_read: filepos=%d len=%d, offset=%x:%x\n",
878 filepos, len, (__u64) IH_KEY_OFFSET (INFO->current_ih) - 1);
879 #endif /* REISERDEBUG */
881 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
882 || IH_KEY_OFFSET (INFO->current_ih) > filepos + 1)
884 search_stat (INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid);
885 goto get_next_key;
888 while (! errnum)
890 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid)
891 break;
893 offset = filepos - IH_KEY_OFFSET (INFO->current_ih) + 1;
894 blocksize = INFO->current_ih->ih_item_len;
896 #ifdef REISERDEBUG
897 printf (" loop: filepos=%d len=%d, offset=%d blocksize=%d\n",
898 filepos, len, offset, blocksize);
899 #endif /* REISERDEBUG */
901 if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_DIRECT)
902 && offset < blocksize)
904 #ifdef REISERDEBUG
905 printf ("direct_read: offset=%d, blocksize=%d\n",
906 offset, blocksize);
907 #endif /* REISERDEBUG */
908 to_read = blocksize - offset;
909 if (to_read > len)
910 to_read = len;
912 if (disk_read_hook != NULL)
914 disk_read_func = disk_read_hook;
916 block_read (INFO->blocks[DISK_LEAF_NODE_LEVEL],
917 (INFO->current_item - LEAF + offset), to_read, buf);
919 disk_read_func = NULL;
921 else
922 memcpy (buf, INFO->current_item + offset, to_read);
923 goto update_buf_len;
925 else if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_INDIRECT))
927 blocksize = (blocksize >> 2) << INFO->fullblocksize_shift;
928 #ifdef REISERDEBUG
929 printf ("indirect_read: offset=%d, blocksize=%d\n",
930 offset, blocksize);
931 #endif /* REISERDEBUG */
933 while (offset < blocksize)
935 __u32 blocknr = ((__u32 *) INFO->current_item)
936 [offset >> INFO->fullblocksize_shift];
937 int blk_offset = offset & (INFO->blocksize-1);
939 to_read = INFO->blocksize - blk_offset;
940 if (to_read > len)
941 to_read = len;
943 disk_read_func = disk_read_hook;
945 /* Journal is only for meta data. Data blocks can be read
946 * directly without using block_read
948 devread (blocknr << INFO->blocksize_shift,
949 blk_offset, to_read, buf);
951 disk_read_func = NULL;
952 update_buf_len:
953 len -= to_read;
954 buf += to_read;
955 offset += to_read;
956 filepos += to_read;
957 if (len == 0)
958 goto done;
961 get_next_key:
962 next_key ();
964 done:
965 return errnum ? 0 : buf - prev_buf;
969 /* preconditions: reiserfs_mount already executed, therefore
970 * INFO block is valid
971 * returns: 0 if error, nonzero iff we were able to find the file successfully
972 * postconditions: on a nonzero return, INFO->fileinfo contains the info
973 * of the file we were trying to look up, filepos is 0 and filemax is
974 * the size of the file.
977 reiserfs_dir (char *dirname)
979 struct reiserfs_de_head *de_head;
980 char *rest, ch;
981 __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
982 #ifndef STAGE1_5
983 int do_possibilities = 0;
984 #endif /* ! STAGE1_5 */
985 char linkbuf[PATH_MAX]; /* buffer for following symbolic links */
986 int link_count = 0;
987 int mode;
989 dir_id = REISERFS_ROOT_PARENT_OBJECTID;
990 objectid = REISERFS_ROOT_OBJECTID;
992 while (1)
994 #ifdef REISERDEBUG
995 printf ("dirname=%s\n", dirname);
996 #endif /* REISERDEBUG */
998 /* Search for the stat info first. */
999 if (! search_stat (dir_id, objectid))
1000 return 0;
1002 #ifdef REISERDEBUG
1003 printf ("sd_mode=%x sd_size=%d\n",
1004 ((struct stat_data *) INFO->current_item)->sd_mode,
1005 ((struct stat_data *) INFO->current_item)->sd_size);
1006 #endif /* REISERDEBUG */
1008 mode = ((struct stat_data *) INFO->current_item)->sd_mode;
1010 /* If we've got a symbolic link, then chase it. */
1011 if (S_ISLNK (mode))
1013 int len;
1014 if (++link_count > MAX_LINK_COUNT)
1016 errnum = ERR_SYMLINK_LOOP;
1017 return 0;
1020 /* Get the symlink size. */
1021 filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1023 /* Find out how long our remaining name is. */
1024 len = 0;
1025 while (dirname[len] && !isspace (dirname[len]))
1026 len++;
1028 if (filemax + len > sizeof (linkbuf) - 1)
1030 errnum = ERR_FILELENGTH;
1031 return 0;
1034 /* Copy the remaining name to the end of the symlink data.
1035 Note that DIRNAME and LINKBUF may overlap! */
1036 grub_memmove (linkbuf + filemax, dirname, len+1);
1038 INFO->fileinfo.k_dir_id = dir_id;
1039 INFO->fileinfo.k_objectid = objectid;
1040 filepos = 0;
1041 if (! next_key ()
1042 || reiserfs_read (linkbuf, filemax) != filemax)
1044 if (! errnum)
1045 errnum = ERR_FSYS_CORRUPT;
1046 return 0;
1049 #ifdef REISERDEBUG
1050 printf ("symlink=%s\n", linkbuf);
1051 #endif /* REISERDEBUG */
1053 dirname = linkbuf;
1054 if (*dirname == '/')
1056 /* It's an absolute link, so look it up in root. */
1057 dir_id = REISERFS_ROOT_PARENT_OBJECTID;
1058 objectid = REISERFS_ROOT_OBJECTID;
1060 else
1062 /* Relative, so look it up in our parent directory. */
1063 dir_id = parent_dir_id;
1064 objectid = parent_objectid;
1067 /* Now lookup the new name. */
1068 continue;
1071 /* if we have a real file (and we're not just printing possibilities),
1072 then this is where we want to exit */
1074 if (! *dirname || isspace (*dirname))
1076 if (! S_ISREG (mode))
1078 errnum = ERR_BAD_FILETYPE;
1079 return 0;
1082 filepos = 0;
1083 filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1085 /* If this is a new stat data and size is > 4GB set filemax to
1086 * maximum
1088 if (INFO->current_ih->ih_version == ITEM_VERSION_2
1089 && ((struct stat_data *) INFO->current_item)->sd_size_hi > 0)
1090 filemax = 0xffffffff;
1092 INFO->fileinfo.k_dir_id = dir_id;
1093 INFO->fileinfo.k_objectid = objectid;
1094 return next_key ();
1097 /* continue with the file/directory name interpretation */
1098 while (*dirname == '/')
1099 dirname++;
1100 if (! S_ISDIR (mode))
1102 errnum = ERR_BAD_FILETYPE;
1103 return 0;
1105 for (rest = dirname; (ch = *rest) && ! isspace (ch) && ch != '/'; rest++);
1106 *rest = 0;
1108 # ifndef STAGE1_5
1109 if (print_possibilities && ch != '/')
1110 do_possibilities = 1;
1111 # endif /* ! STAGE1_5 */
1113 while (1)
1115 char *name_end;
1116 int num_entries;
1118 if (! next_key ())
1119 return 0;
1120 #ifdef REISERDEBUG
1121 printf ("ih: key %d:%d:%d:%d version:%d\n",
1122 INFO->current_ih->ih_key.k_dir_id,
1123 INFO->current_ih->ih_key.k_objectid,
1124 INFO->current_ih->ih_key.u.v1.k_offset,
1125 INFO->current_ih->ih_key.u.v1.k_uniqueness,
1126 INFO->current_ih->ih_version);
1127 #endif /* REISERDEBUG */
1129 if (INFO->current_ih->ih_key.k_objectid != objectid)
1130 break;
1132 name_end = INFO->current_item + INFO->current_ih->ih_item_len;
1133 de_head = (struct reiserfs_de_head *) INFO->current_item;
1134 num_entries = INFO->current_ih->u.ih_entry_count;
1135 while (num_entries > 0)
1137 char *filename = INFO->current_item + de_head->deh_location;
1138 char tmp = *name_end;
1139 if ((de_head->deh_state & DEH_Visible))
1141 int cmp;
1142 /* Directory names in ReiserFS are not null
1143 * terminated. We write a temporary 0 behind it.
1144 * NOTE: that this may overwrite the first block in
1145 * the tree cache. That doesn't hurt as long as we
1146 * don't call next_key () in between.
1148 *name_end = 0;
1149 cmp = substring (dirname, filename);
1150 *name_end = tmp;
1151 # ifndef STAGE1_5
1152 if (do_possibilities)
1154 if (cmp <= 0)
1156 if (print_possibilities > 0)
1157 print_possibilities = -print_possibilities;
1158 *name_end = 0;
1159 print_a_completion (filename);
1160 *name_end = tmp;
1163 else
1164 # endif /* ! STAGE1_5 */
1165 if (cmp == 0)
1166 goto found;
1168 /* The beginning of this name marks the end of the next name.
1170 name_end = filename;
1171 de_head++;
1172 num_entries--;
1176 # ifndef STAGE1_5
1177 if (print_possibilities < 0)
1178 return 1;
1179 # endif /* ! STAGE1_5 */
1181 errnum = ERR_FILE_NOT_FOUND;
1182 *rest = ch;
1183 return 0;
1185 found:
1187 *rest = ch;
1188 dirname = rest;
1190 parent_dir_id = dir_id;
1191 parent_objectid = objectid;
1192 dir_id = de_head->deh_dir_id;
1193 objectid = de_head->deh_objectid;
1198 reiserfs_embed (int *start_sector, int needed_sectors)
1200 struct reiserfs_super_block super;
1201 int num_sectors;
1203 if (! devread (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS, 0,
1204 sizeof (struct reiserfs_super_block), (char *) &super))
1205 return 0;
1207 *start_sector = 1; /* reserve first sector for stage1 */
1208 if ((substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1209 || substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) <= 0)
1210 && (/* check that this is not a super block copy inside
1211 * the journal log */
1212 super.s_journal_block * super.s_blocksize
1213 > REISERFS_DISK_OFFSET_IN_BYTES))
1214 num_sectors = (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1215 else
1216 num_sectors = (REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1218 return (needed_sectors <= num_sectors);
1220 #endif /* FSYS_REISERFS */