Import 2.4.0-test6pre3
[davej-history.git] / fs / ext2 / inode.c
blob5dfe2cf55edd1a61c4678ff97f69ccba7bc39e28
1 /*
2 * linux/fs/ext2/inode.c
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
9 * from
11 * linux/fs/minix/inode.c
13 * Copyright (C) 1991, 1992 Linus Torvalds
15 * Goal-directed block allocation by Stephen Tweedie
16 * (sct@dcs.ed.ac.uk), 1993, 1998
17 * Big-endian to little-endian byte-swapping/bitmaps by
18 * David S. Miller (davem@caip.rutgers.edu), 1995
19 * 64-bit file support on 64-bit platforms by Jakub Jelinek
20 * (jj@sunsite.ms.mff.cuni.cz)
23 #include <linux/fs.h>
24 #include <linux/locks.h>
25 #include <linux/smp_lock.h>
26 #include <linux/sched.h>
27 #include <linux/highuid.h>
31 static int ext2_update_inode(struct inode * inode, int do_sync);
34 * Called at each iput()
36 void ext2_put_inode (struct inode * inode)
38 ext2_discard_prealloc (inode);
42 * Called at the last iput() if i_nlink is zero.
44 void ext2_delete_inode (struct inode * inode)
46 lock_kernel();
48 if (is_bad_inode(inode) ||
49 inode->i_ino == EXT2_ACL_IDX_INO ||
50 inode->i_ino == EXT2_ACL_DATA_INO)
51 goto no_delete;
52 inode->u.ext2_i.i_dtime = CURRENT_TIME;
53 mark_inode_dirty(inode);
54 ext2_update_inode(inode, IS_SYNC(inode));
55 inode->i_size = 0;
56 if (inode->i_blocks)
57 ext2_truncate (inode);
58 ext2_free_inode (inode);
60 unlock_kernel();
61 return;
62 no_delete:
63 unlock_kernel();
64 clear_inode(inode); /* We must guarantee clearing of inode... */
67 /*
68 * ext2_discard_prealloc and ext2_alloc_block are atomic wrt. the
69 * superblock in the same manner as are ext2_free_blocks and
70 * ext2_new_block. We just wait on the super rather than locking it
71 * here, since ext2_new_block will do the necessary locking and we
72 * can't block until then.
74 void ext2_discard_prealloc (struct inode * inode)
76 #ifdef EXT2_PREALLOCATE
77 unsigned short total;
79 lock_kernel();
80 if (inode->u.ext2_i.i_prealloc_count) {
81 total = inode->u.ext2_i.i_prealloc_count;
82 inode->u.ext2_i.i_prealloc_count = 0;
83 ext2_free_blocks (inode, inode->u.ext2_i.i_prealloc_block, total);
85 unlock_kernel();
86 #endif
89 static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err)
91 #ifdef EXT2FS_DEBUG
92 static unsigned long alloc_hits = 0, alloc_attempts = 0;
93 #endif
94 unsigned long result;
96 wait_on_super (inode->i_sb);
98 #ifdef EXT2_PREALLOCATE
99 if (inode->u.ext2_i.i_prealloc_count &&
100 (goal == inode->u.ext2_i.i_prealloc_block ||
101 goal + 1 == inode->u.ext2_i.i_prealloc_block))
103 result = inode->u.ext2_i.i_prealloc_block++;
104 inode->u.ext2_i.i_prealloc_count--;
105 ext2_debug ("preallocation hit (%lu/%lu).\n",
106 ++alloc_hits, ++alloc_attempts);
108 } else {
109 ext2_discard_prealloc (inode);
110 ext2_debug ("preallocation miss (%lu/%lu).\n",
111 alloc_hits, ++alloc_attempts);
112 if (S_ISREG(inode->i_mode))
113 result = ext2_new_block (inode, goal,
114 &inode->u.ext2_i.i_prealloc_count,
115 &inode->u.ext2_i.i_prealloc_block, err);
116 else
117 result = ext2_new_block (inode, goal, 0, 0, err);
119 #else
120 result = ext2_new_block (inode, goal, 0, 0, err);
121 #endif
122 return result;
125 typedef struct {
126 u32 *p;
127 u32 key;
128 struct buffer_head *bh;
129 } Indirect;
131 static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v)
133 p->key = *(p->p = v);
134 p->bh = bh;
137 static inline int verify_chain(Indirect *from, Indirect *to)
139 while (from <= to && from->key == *from->p)
140 from++;
141 return (from > to);
145 * ext2_block_to_path - parse the block number into array of offsets
146 * @inode: inode in question (we are only interested in its superblock)
147 * @i_block: block number to be parsed
148 * @offsets: array to store the offsets in
150 * To store the locations of file's data ext2 uses a data structure common
151 * for UNIX filesystems - tree of pointers anchored in the inode, with
152 * data blocks at leaves and indirect blocks in intermediate nodes.
153 * This function translates the block number into path in that tree -
154 * return value is the path length and @offsets[n] is the offset of
155 * pointer to (n+1)th node in the nth one. If @block is out of range
156 * (negative or too large) warning is printed and zero returned.
158 * Note: function doesn't find node addresses, so no IO is needed. All
159 * we need to know is the capacity of indirect blocks (taken from the
160 * inode->i_sb).
164 * Portability note: the last comparison (check that we fit into triple
165 * indirect block) is spelled differently, because otherwise on an
166 * architecture with 32-bit longs and 8Kb pages we might get into trouble
167 * if our filesystem had 8Kb blocks. We might use long long, but that would
168 * kill us on x86. Oh, well, at least the sign propagation does not matter -
169 * i_block would have to be negative in the very beginning, so we would not
170 * get there at all.
173 static int ext2_block_to_path(struct inode *inode, long i_block, int offsets[4])
175 int ptrs = EXT2_ADDR_PER_BLOCK(inode->i_sb);
176 int ptrs_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb);
177 const long direct_blocks = EXT2_NDIR_BLOCKS,
178 indirect_blocks = ptrs,
179 double_blocks = (1 << (ptrs_bits * 2));
180 int n = 0;
182 if (i_block < 0) {
183 ext2_warning (inode->i_sb, "ext2_block_to_path", "block < 0");
184 } else if (i_block < direct_blocks) {
185 offsets[n++] = i_block;
186 } else if ( (i_block -= direct_blocks) < indirect_blocks) {
187 offsets[n++] = EXT2_IND_BLOCK;
188 offsets[n++] = i_block;
189 } else if ((i_block -= indirect_blocks) < double_blocks) {
190 offsets[n++] = EXT2_DIND_BLOCK;
191 offsets[n++] = i_block >> ptrs_bits;
192 offsets[n++] = i_block & (ptrs - 1);
193 } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
194 offsets[n++] = EXT2_TIND_BLOCK;
195 offsets[n++] = i_block >> (ptrs_bits * 2);
196 offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
197 offsets[n++] = i_block & (ptrs - 1);
198 } else {
199 ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big");
201 return n;
205 * ext2_get_branch - read the chain of indirect blocks leading to data
206 * @inode: inode in question
207 * @depth: depth of the chain (1 - direct pointer, etc.)
208 * @offsets: offsets of pointers in inode/indirect blocks
209 * @chain: place to store the result
210 * @err: here we store the error value
212 * Function fills the array of triples <key, p, bh> and returns %NULL
213 * if everything went OK or the pointer to the last filled triple
214 * (incomplete one) otherwise. Upon the return chain[i].key contains
215 * the number of (i+1)-th block in the chain (as it is stored in memory,
216 * i.e. little-endian 32-bit), chain[i].p contains the address of that
217 * number (it points into struct inode for i==0 and into the bh->b_data
218 * for i>0) and chain[i].bh points to the buffer_head of i-th indirect
219 * block for i>0 and NULL for i==0. In other words, it holds the block
220 * numbers of the chain, addresses they were taken from (and where we can
221 * verify that chain did not change) and buffer_heads hosting these
222 * numbers.
224 * Function stops when it stumbles upon zero pointer (absent block)
225 * (pointer to last triple returned, *@err == 0)
226 * or when it gets an IO error reading an indirect block
227 * (ditto, *@err == -EIO)
228 * or when it notices that chain had been changed while it was reading
229 * (ditto, *@err == -EAGAIN)
230 * or when it reads all @depth-1 indirect blocks successfully and finds
231 * the whole chain, all way to the data (returns %NULL, *err == 0).
233 static inline Indirect *ext2_get_branch(struct inode *inode,
234 int depth,
235 int *offsets,
236 Indirect chain[4],
237 int *err)
239 kdev_t dev = inode->i_dev;
240 int size = inode->i_sb->s_blocksize;
241 Indirect *p = chain;
242 struct buffer_head *bh;
244 *err = 0;
245 /* i_data is not going away, no lock needed */
246 add_chain (chain, NULL, inode->u.ext2_i.i_data + *offsets);
247 if (!p->key)
248 goto no_block;
250 * switch below is merely an unrolled loop - body should be
251 * repeated depth-1 times. Maybe loop would be actually better,
252 * but that way we get straight execution path in normal cases.
253 * Easy to change, anyway - all cases in switch are literally
254 * identical.
256 switch (depth) {
257 case 4:
258 bh = bread(dev, le32_to_cpu(p->key), size);
259 if (!bh)
260 goto failure;
261 /* Reader: pointers */
262 if (!verify_chain(chain, p))
263 goto changed;
264 add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
265 /* Reader: end */
266 if (!p->key)
267 goto no_block;
268 case 3:
269 bh = bread(dev, le32_to_cpu(p->key), size);
270 if (!bh)
271 goto failure;
272 /* Reader: pointers */
273 if (!verify_chain(chain, p))
274 goto changed;
275 add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
276 /* Reader: end */
277 if (!p->key)
278 goto no_block;
279 case 2:
280 bh = bread(dev, le32_to_cpu(p->key), size);
281 if (!bh)
282 goto failure;
283 /* Reader: pointers */
284 if (!verify_chain(chain, p))
285 goto changed;
286 add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
287 /* Reader: end */
288 if (!p->key)
289 goto no_block;
291 return NULL;
293 changed:
294 *err = -EAGAIN;
295 goto no_block;
296 failure:
297 *err = -EIO;
298 no_block:
299 return p;
302 static struct buffer_head * inode_getblk (struct inode * inode, int nr,
303 int new_block, int * err, int metadata, long *phys, int *new)
305 u32 * p;
306 int tmp, goal = 0;
307 struct buffer_head * result;
308 int blocksize = inode->i_sb->s_blocksize;
310 p = inode->u.ext2_i.i_data + nr;
311 repeat:
312 tmp = le32_to_cpu(*p);
313 if (tmp) {
314 if (metadata) {
315 result = getblk (inode->i_dev, tmp, blocksize);
316 if (tmp == le32_to_cpu(*p))
317 return result;
318 brelse (result);
319 goto repeat;
320 } else {
321 *phys = tmp;
322 return NULL;
326 if (inode->u.ext2_i.i_next_alloc_block == new_block)
327 goal = inode->u.ext2_i.i_next_alloc_goal;
329 ext2_debug ("hint = %d,", goal);
331 if (!goal) {
332 for (tmp = nr - 1; tmp >= 0; tmp--) {
333 if (inode->u.ext2_i.i_data[tmp]) {
334 goal = le32_to_cpu(inode->u.ext2_i.i_data[tmp]);
335 break;
338 if (!goal)
339 goal = (inode->u.ext2_i.i_block_group *
340 EXT2_BLOCKS_PER_GROUP(inode->i_sb)) +
341 le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_first_data_block);
344 ext2_debug ("goal = %d.\n", goal);
346 tmp = ext2_alloc_block (inode, goal, err);
347 if (!tmp)
348 return NULL;
350 if (metadata) {
351 result = getblk (inode->i_dev, tmp, blocksize);
352 if (!buffer_uptodate(result))
353 wait_on_buffer(result);
354 memset(result->b_data, 0, blocksize);
355 mark_buffer_uptodate(result, 1);
356 mark_buffer_dirty(result, 1);
357 if (*p) {
358 ext2_free_blocks (inode, tmp, 1);
359 bforget (result);
360 goto repeat;
362 } else {
363 if (*p) {
365 * Nobody is allowed to change block allocation
366 * state from under us:
368 ext2_error (inode->i_sb, "block_getblk",
369 "data block filled under us");
370 BUG();
371 ext2_free_blocks (inode, tmp, 1);
372 goto repeat;
374 *phys = tmp;
375 result = NULL;
376 *err = 0;
377 *new = 1;
379 *p = cpu_to_le32(tmp);
381 inode->u.ext2_i.i_next_alloc_block = new_block;
382 inode->u.ext2_i.i_next_alloc_goal = tmp;
383 inode->i_ctime = CURRENT_TIME;
384 inode->i_blocks += blocksize/512;
385 if (IS_SYNC(inode) || inode->u.ext2_i.i_osync)
386 ext2_sync_inode (inode);
387 else
388 mark_inode_dirty(inode);
389 return result;
393 * metadata / data
394 * possibly create / access
395 * can fail due to: - not present
396 * - out of space
398 * NULL return in the data case is mandatory.
400 static struct buffer_head * block_getblk (struct inode * inode,
401 struct buffer_head * bh, int nr,
402 int new_block, int * err, int metadata, long *phys, int *new)
404 int tmp, goal = 0;
405 u32 * p;
406 struct buffer_head * result;
407 int blocksize = inode->i_sb->s_blocksize;
409 result = NULL;
410 if (!bh)
411 goto out;
412 if (!buffer_uptodate(bh)) {
413 ll_rw_block (READ, 1, &bh);
414 wait_on_buffer (bh);
415 if (!buffer_uptodate(bh))
416 goto out;
418 p = (u32 *) bh->b_data + nr;
419 repeat:
420 tmp = le32_to_cpu(*p);
421 if (tmp) {
422 if (metadata) {
423 result = getblk (bh->b_dev, tmp, blocksize);
424 if (tmp == le32_to_cpu(*p))
425 goto out;
426 brelse (result);
427 goto repeat;
428 } else {
429 *phys = tmp;
430 /* result == NULL */
431 goto out;
435 if (inode->u.ext2_i.i_next_alloc_block == new_block)
436 goal = inode->u.ext2_i.i_next_alloc_goal;
437 if (!goal) {
438 for (tmp = nr - 1; tmp >= 0; tmp--) {
439 if (le32_to_cpu(((u32 *) bh->b_data)[tmp])) {
440 goal = le32_to_cpu(((u32 *)bh->b_data)[tmp]);
441 break;
444 if (!goal)
445 goal = bh->b_blocknr;
447 tmp = ext2_alloc_block (inode, goal, err);
448 if (!tmp)
449 goto out;
450 if (metadata) {
451 result = getblk (bh->b_dev, tmp, blocksize);
452 if (!buffer_uptodate(result))
453 wait_on_buffer(result);
454 memset(result->b_data, 0, inode->i_sb->s_blocksize);
455 mark_buffer_uptodate(result, 1);
456 mark_buffer_dirty(result, 1);
457 if (*p) {
458 ext2_free_blocks (inode, tmp, 1);
459 bforget (result);
460 goto repeat;
462 } else {
463 if (*p) {
465 * Nobody is allowed to change block allocation
466 * state from under us:
468 ext2_error (inode->i_sb, "block_getblk",
469 "data block filled under us");
470 BUG();
471 ext2_free_blocks (inode, tmp, 1);
472 goto repeat;
474 *phys = tmp;
475 *new = 1;
477 *p = le32_to_cpu(tmp);
478 mark_buffer_dirty(bh, 1);
479 if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) {
480 ll_rw_block (WRITE, 1, &bh);
481 wait_on_buffer (bh);
483 inode->i_ctime = CURRENT_TIME;
484 inode->i_blocks += blocksize/512;
485 mark_inode_dirty(inode);
486 inode->u.ext2_i.i_next_alloc_block = new_block;
487 inode->u.ext2_i.i_next_alloc_goal = tmp;
488 *err = 0;
489 out:
490 brelse (bh);
491 return result;
494 static int ext2_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create)
496 int ret, err, new;
497 struct buffer_head *bh;
498 unsigned long phys;
499 int offsets[4];
500 int *p;
501 Indirect chain[4];
502 Indirect *partial;
503 int depth;
505 depth = ext2_block_to_path(inode, iblock, offsets);
506 if (depth == 0)
507 goto abort;
509 lock_kernel();
510 partial = ext2_get_branch(inode, depth, offsets, chain, &err);
512 if (!partial) {
513 unlock_kernel();
514 for (partial = chain + depth - 1; partial > chain; partial--)
515 brelse(partial->bh);
516 bh_result->b_dev = inode->i_dev;
517 bh_result->b_blocknr = le32_to_cpu(chain[depth-1].key);
518 bh_result->b_state |= (1UL << BH_Mapped);
519 return 0;
522 while (partial > chain) {
523 brelse(partial->bh);
524 partial--;
527 if (!create) {
528 unlock_kernel();
529 return 0;
532 err = -EIO;
533 new = 0;
534 ret = 0;
535 bh = NULL;
538 * If this is a sequential block allocation, set the next_alloc_block
539 * to this block now so that all the indblock and data block
540 * allocations use the same goal zone
543 ext2_debug ("block %lu, next %lu, goal %lu.\n", iblock,
544 inode->u.ext2_i.i_next_alloc_block,
545 inode->u.ext2_i.i_next_alloc_goal);
547 if (iblock == inode->u.ext2_i.i_next_alloc_block + 1) {
548 inode->u.ext2_i.i_next_alloc_block++;
549 inode->u.ext2_i.i_next_alloc_goal++;
552 err = 0;
555 * ok, these macros clean the logic up a bit and make
556 * it much more readable:
558 #define GET_INODE_DATABLOCK(x) \
559 inode_getblk(inode, x, iblock, &err, 0, &phys, &new)
560 #define GET_INODE_PTR(x) \
561 inode_getblk(inode, x, iblock, &err, 1, NULL, NULL)
562 #define GET_INDIRECT_DATABLOCK(x) \
563 block_getblk (inode, bh, x, iblock, &err, 0, &phys, &new);
564 #define GET_INDIRECT_PTR(x) \
565 block_getblk (inode, bh, x, iblock, &err, 1, NULL, NULL);
567 p = offsets;
568 if (depth == 1) {
569 bh = GET_INODE_DATABLOCK(*p);
570 goto out;
572 bh = GET_INODE_PTR(*p);
573 switch (depth) {
574 default: /* case 4: */
575 bh = GET_INDIRECT_PTR(*++p);
576 case 3:
577 bh = GET_INDIRECT_PTR(*++p);
578 case 2:
579 bh = GET_INDIRECT_DATABLOCK(*++p);
582 #undef GET_INODE_DATABLOCK
583 #undef GET_INODE_PTR
584 #undef GET_INDIRECT_DATABLOCK
585 #undef GET_INDIRECT_PTR
587 out:
588 if (bh)
589 BUG(); // temporary debugging check
590 if (err)
591 goto abort;
592 if (!phys)
593 BUG(); // must not happen either
595 bh_result->b_dev = inode->i_dev;
596 bh_result->b_blocknr = phys;
597 bh_result->b_state |= (1UL << BH_Mapped); /* safe */
598 if (new)
599 bh_result->b_state |= (1UL << BH_New);
600 unlock_kernel();
601 abort:
602 return err;
605 struct buffer_head * ext2_getblk(struct inode * inode, long block, int create, int * err)
607 struct buffer_head dummy;
608 int error;
610 dummy.b_state = 0;
611 dummy.b_blocknr = -1000;
612 error = ext2_get_block(inode, block, &dummy, create);
613 *err = error;
614 if (!error && buffer_mapped(&dummy)) {
615 struct buffer_head *bh;
616 bh = getblk(dummy.b_dev, dummy.b_blocknr, inode->i_sb->s_blocksize);
617 if (buffer_new(&dummy)) {
618 if (!buffer_uptodate(bh))
619 wait_on_buffer(bh);
620 memset(bh->b_data, 0, inode->i_sb->s_blocksize);
621 mark_buffer_uptodate(bh, 1);
622 mark_buffer_dirty(bh, 1);
624 return bh;
626 return NULL;
629 struct buffer_head * ext2_bread (struct inode * inode, int block,
630 int create, int *err)
632 struct buffer_head * bh;
633 int prev_blocks;
635 prev_blocks = inode->i_blocks;
637 bh = ext2_getblk (inode, block, create, err);
638 if (!bh)
639 return bh;
642 * If the inode has grown, and this is a directory, then perform
643 * preallocation of a few more blocks to try to keep directory
644 * fragmentation down.
646 if (create &&
647 S_ISDIR(inode->i_mode) &&
648 inode->i_blocks > prev_blocks &&
649 EXT2_HAS_COMPAT_FEATURE(inode->i_sb,
650 EXT2_FEATURE_COMPAT_DIR_PREALLOC)) {
651 int i;
652 struct buffer_head *tmp_bh;
654 for (i = 1;
655 i < EXT2_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks;
656 i++) {
658 * ext2_getblk will zero out the contents of the
659 * directory for us
661 tmp_bh = ext2_getblk(inode, block+i, create, err);
662 if (!tmp_bh) {
663 brelse (bh);
664 return 0;
666 brelse (tmp_bh);
670 if (buffer_uptodate(bh))
671 return bh;
672 ll_rw_block (READ, 1, &bh);
673 wait_on_buffer (bh);
674 if (buffer_uptodate(bh))
675 return bh;
676 brelse (bh);
677 *err = -EIO;
678 return NULL;
681 static int ext2_writepage(struct file *file, struct page *page)
683 return block_write_full_page(page,ext2_get_block);
685 static int ext2_readpage(struct file *file, struct page *page)
687 return block_read_full_page(page,ext2_get_block);
689 static int ext2_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
691 return block_prepare_write(page,from,to,ext2_get_block);
693 static int ext2_bmap(struct address_space *mapping, long block)
695 return generic_block_bmap(mapping,block,ext2_get_block);
697 struct address_space_operations ext2_aops = {
698 readpage: ext2_readpage,
699 writepage: ext2_writepage,
700 sync_page: block_sync_page,
701 prepare_write: ext2_prepare_write,
702 commit_write: generic_commit_write,
703 bmap: ext2_bmap
706 void ext2_read_inode (struct inode * inode)
708 struct buffer_head * bh;
709 struct ext2_inode * raw_inode;
710 unsigned long block_group;
711 unsigned long group_desc;
712 unsigned long desc;
713 unsigned long block;
714 unsigned long offset;
715 struct ext2_group_desc * gdp;
717 if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO &&
718 inode->i_ino != EXT2_ACL_DATA_INO &&
719 inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) ||
720 inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) {
721 ext2_error (inode->i_sb, "ext2_read_inode",
722 "bad inode number: %lu", inode->i_ino);
723 goto bad_inode;
725 block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
726 if (block_group >= inode->i_sb->u.ext2_sb.s_groups_count) {
727 ext2_error (inode->i_sb, "ext2_read_inode",
728 "group >= groups count");
729 goto bad_inode;
731 group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(inode->i_sb);
732 desc = block_group & (EXT2_DESC_PER_BLOCK(inode->i_sb) - 1);
733 bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc];
734 if (!bh) {
735 ext2_error (inode->i_sb, "ext2_read_inode",
736 "Descriptor not loaded");
737 goto bad_inode;
740 gdp = (struct ext2_group_desc *) bh->b_data;
742 * Figure out the offset within the block group inode table
744 offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
745 EXT2_INODE_SIZE(inode->i_sb);
746 block = le32_to_cpu(gdp[desc].bg_inode_table) +
747 (offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
748 if (!(bh = bread (inode->i_dev, block, inode->i_sb->s_blocksize))) {
749 ext2_error (inode->i_sb, "ext2_read_inode",
750 "unable to read inode block - "
751 "inode=%lu, block=%lu", inode->i_ino, block);
752 goto bad_inode;
754 offset &= (EXT2_BLOCK_SIZE(inode->i_sb) - 1);
755 raw_inode = (struct ext2_inode *) (bh->b_data + offset);
757 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
758 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
759 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
760 if(!(test_opt (inode->i_sb, NO_UID32))) {
761 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
762 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
764 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
765 inode->i_size = le32_to_cpu(raw_inode->i_size);
766 inode->i_atime = le32_to_cpu(raw_inode->i_atime);
767 inode->i_ctime = le32_to_cpu(raw_inode->i_ctime);
768 inode->i_mtime = le32_to_cpu(raw_inode->i_mtime);
769 inode->u.ext2_i.i_dtime = le32_to_cpu(raw_inode->i_dtime);
770 /* We now have enough fields to check if the inode was active or not.
771 * This is needed because nfsd might try to access dead inodes
772 * the test is that same one that e2fsck uses
773 * NeilBrown 1999oct15
775 if (inode->i_nlink == 0 && (inode->i_mode == 0 || inode->u.ext2_i.i_dtime)) {
776 /* this inode is deleted */
777 brelse (bh);
778 goto bad_inode;
780 inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
781 inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
782 inode->i_version = ++event;
783 inode->u.ext2_i.i_new_inode = 0;
784 inode->u.ext2_i.i_flags = le32_to_cpu(raw_inode->i_flags);
785 inode->u.ext2_i.i_faddr = le32_to_cpu(raw_inode->i_faddr);
786 inode->u.ext2_i.i_frag_no = raw_inode->i_frag;
787 inode->u.ext2_i.i_frag_size = raw_inode->i_fsize;
788 inode->u.ext2_i.i_osync = 0;
789 inode->u.ext2_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
790 if (S_ISDIR(inode->i_mode))
791 inode->u.ext2_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
792 else {
793 inode->u.ext2_i.i_dir_acl = 0;
794 inode->u.ext2_i.i_high_size = le32_to_cpu(raw_inode->i_size_high);
795 inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
797 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
798 inode->u.ext2_i.i_block_group = block_group;
799 inode->u.ext2_i.i_next_alloc_block = 0;
800 inode->u.ext2_i.i_next_alloc_goal = 0;
801 if (inode->u.ext2_i.i_prealloc_count)
802 ext2_error (inode->i_sb, "ext2_read_inode",
803 "New inode has non-zero prealloc count!");
806 * NOTE! The in-memory inode i_blocks array is in little-endian order
807 * even on big-endian machines: we do NOT byteswap the block numbers!
809 for (block = 0; block < EXT2_N_BLOCKS; block++)
810 inode->u.ext2_i.i_data[block] = raw_inode->i_block[block];
812 if (inode->i_ino == EXT2_ACL_IDX_INO ||
813 inode->i_ino == EXT2_ACL_DATA_INO)
814 /* Nothing to do */ ;
815 else if (S_ISREG(inode->i_mode)) {
816 inode->i_op = &ext2_file_inode_operations;
817 inode->i_fop = &ext2_file_operations;
818 inode->i_mapping->a_ops = &ext2_aops;
819 } else if (S_ISDIR(inode->i_mode)) {
820 inode->i_op = &ext2_dir_inode_operations;
821 inode->i_fop = &ext2_dir_operations;
822 } else if (S_ISLNK(inode->i_mode)) {
823 if (!inode->i_blocks)
824 inode->i_op = &ext2_fast_symlink_inode_operations;
825 else {
826 inode->i_op = &page_symlink_inode_operations;
827 inode->i_mapping->a_ops = &ext2_aops;
829 } else
830 init_special_inode(inode, inode->i_mode,
831 le32_to_cpu(raw_inode->i_block[0]));
832 brelse (bh);
833 inode->i_attr_flags = 0;
834 if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) {
835 inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS;
836 inode->i_flags |= S_SYNC;
838 if (inode->u.ext2_i.i_flags & EXT2_APPEND_FL) {
839 inode->i_attr_flags |= ATTR_FLAG_APPEND;
840 inode->i_flags |= S_APPEND;
842 if (inode->u.ext2_i.i_flags & EXT2_IMMUTABLE_FL) {
843 inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE;
844 inode->i_flags |= S_IMMUTABLE;
846 if (inode->u.ext2_i.i_flags & EXT2_NOATIME_FL) {
847 inode->i_attr_flags |= ATTR_FLAG_NOATIME;
848 inode->i_flags |= S_NOATIME;
850 return;
852 bad_inode:
853 make_bad_inode(inode);
854 return;
857 static int ext2_update_inode(struct inode * inode, int do_sync)
859 struct buffer_head * bh;
860 struct ext2_inode * raw_inode;
861 unsigned long block_group;
862 unsigned long group_desc;
863 unsigned long desc;
864 unsigned long block;
865 unsigned long offset;
866 int err = 0;
867 struct ext2_group_desc * gdp;
869 if ((inode->i_ino != EXT2_ROOT_INO &&
870 inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) ||
871 inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) {
872 ext2_error (inode->i_sb, "ext2_write_inode",
873 "bad inode number: %lu", inode->i_ino);
874 return -EIO;
876 block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
877 if (block_group >= inode->i_sb->u.ext2_sb.s_groups_count) {
878 ext2_error (inode->i_sb, "ext2_write_inode",
879 "group >= groups count");
880 return -EIO;
882 group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(inode->i_sb);
883 desc = block_group & (EXT2_DESC_PER_BLOCK(inode->i_sb) - 1);
884 bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc];
885 if (!bh) {
886 ext2_error (inode->i_sb, "ext2_write_inode",
887 "Descriptor not loaded");
888 return -EIO;
890 gdp = (struct ext2_group_desc *) bh->b_data;
892 * Figure out the offset within the block group inode table
894 offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
895 EXT2_INODE_SIZE(inode->i_sb);
896 block = le32_to_cpu(gdp[desc].bg_inode_table) +
897 (offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
898 if (!(bh = bread (inode->i_dev, block, inode->i_sb->s_blocksize))) {
899 ext2_error (inode->i_sb, "ext2_write_inode",
900 "unable to read inode block - "
901 "inode=%lu, block=%lu", inode->i_ino, block);
902 return -EIO;
904 offset &= EXT2_BLOCK_SIZE(inode->i_sb) - 1;
905 raw_inode = (struct ext2_inode *) (bh->b_data + offset);
907 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
908 if(!(test_opt(inode->i_sb, NO_UID32))) {
909 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
910 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
912 * Fix up interoperability with old kernels. Otherwise, old inodes get
913 * re-used with the upper 16 bits of the uid/gid intact
915 if(!inode->u.ext2_i.i_dtime) {
916 raw_inode->i_uid_high = cpu_to_le16(high_16_bits(inode->i_uid));
917 raw_inode->i_gid_high = cpu_to_le16(high_16_bits(inode->i_gid));
918 } else {
919 raw_inode->i_uid_high = 0;
920 raw_inode->i_gid_high = 0;
922 } else {
923 raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(inode->i_uid));
924 raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(inode->i_gid));
925 raw_inode->i_uid_high = 0;
926 raw_inode->i_gid_high = 0;
928 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
929 raw_inode->i_size = cpu_to_le32(inode->i_size);
930 raw_inode->i_atime = cpu_to_le32(inode->i_atime);
931 raw_inode->i_ctime = cpu_to_le32(inode->i_ctime);
932 raw_inode->i_mtime = cpu_to_le32(inode->i_mtime);
933 raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
934 raw_inode->i_dtime = cpu_to_le32(inode->u.ext2_i.i_dtime);
935 raw_inode->i_flags = cpu_to_le32(inode->u.ext2_i.i_flags);
936 raw_inode->i_faddr = cpu_to_le32(inode->u.ext2_i.i_faddr);
937 raw_inode->i_frag = inode->u.ext2_i.i_frag_no;
938 raw_inode->i_fsize = inode->u.ext2_i.i_frag_size;
939 raw_inode->i_file_acl = cpu_to_le32(inode->u.ext2_i.i_file_acl);
940 if (S_ISDIR(inode->i_mode))
941 raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext2_i.i_dir_acl);
942 else
943 raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32);
945 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
946 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
947 raw_inode->i_block[0] = cpu_to_le32(kdev_t_to_nr(inode->i_rdev));
948 else for (block = 0; block < EXT2_N_BLOCKS; block++)
949 raw_inode->i_block[block] = inode->u.ext2_i.i_data[block];
950 mark_buffer_dirty(bh, 1);
951 if (do_sync) {
952 ll_rw_block (WRITE, 1, &bh);
953 wait_on_buffer (bh);
954 if (buffer_req(bh) && !buffer_uptodate(bh)) {
955 printk ("IO error syncing ext2 inode ["
956 "%s:%08lx]\n",
957 bdevname(inode->i_dev), inode->i_ino);
958 err = -EIO;
961 brelse (bh);
962 return err;
965 void ext2_write_inode (struct inode * inode, int wait)
967 lock_kernel();
968 ext2_update_inode (inode, 0);
969 unlock_kernel();
972 int ext2_sync_inode (struct inode *inode)
974 return ext2_update_inode (inode, 1);
977 int ext2_notify_change(struct dentry *dentry, struct iattr *iattr)
979 struct inode *inode = dentry->d_inode;
980 int retval;
981 unsigned int flags;
983 retval = -EPERM;
984 if (iattr->ia_valid & ATTR_ATTR_FLAG &&
985 ((!(iattr->ia_attr_flags & ATTR_FLAG_APPEND) !=
986 !(inode->u.ext2_i.i_flags & EXT2_APPEND_FL)) ||
987 (!(iattr->ia_attr_flags & ATTR_FLAG_IMMUTABLE) !=
988 !(inode->u.ext2_i.i_flags & EXT2_IMMUTABLE_FL)))) {
989 if (!capable(CAP_LINUX_IMMUTABLE))
990 goto out;
991 } else if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
992 goto out;
994 retval = inode_change_ok(inode, iattr);
995 if (retval != 0)
996 goto out;
998 inode_setattr(inode, iattr);
1000 flags = iattr->ia_attr_flags;
1001 if (flags & ATTR_FLAG_SYNCRONOUS) {
1002 inode->i_flags |= S_SYNC;
1003 inode->u.ext2_i.i_flags |= EXT2_SYNC_FL;
1004 } else {
1005 inode->i_flags &= ~S_SYNC;
1006 inode->u.ext2_i.i_flags &= ~EXT2_SYNC_FL;
1008 if (flags & ATTR_FLAG_NOATIME) {
1009 inode->i_flags |= S_NOATIME;
1010 inode->u.ext2_i.i_flags |= EXT2_NOATIME_FL;
1011 } else {
1012 inode->i_flags &= ~S_NOATIME;
1013 inode->u.ext2_i.i_flags &= ~EXT2_NOATIME_FL;
1015 if (flags & ATTR_FLAG_APPEND) {
1016 inode->i_flags |= S_APPEND;
1017 inode->u.ext2_i.i_flags |= EXT2_APPEND_FL;
1018 } else {
1019 inode->i_flags &= ~S_APPEND;
1020 inode->u.ext2_i.i_flags &= ~EXT2_APPEND_FL;
1022 if (flags & ATTR_FLAG_IMMUTABLE) {
1023 inode->i_flags |= S_IMMUTABLE;
1024 inode->u.ext2_i.i_flags |= EXT2_IMMUTABLE_FL;
1025 } else {
1026 inode->i_flags &= ~S_IMMUTABLE;
1027 inode->u.ext2_i.i_flags &= ~EXT2_IMMUTABLE_FL;
1029 mark_inode_dirty(inode);
1030 out:
1031 return retval;