2 * modified for Lites 1.1
4 * Aug 1995, Godmar Back (gback@cs.utah.edu)
5 * University of Utah, Department of Computer Science
8 * Copyright (c) 1982, 1986, 1989, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * @(#)ext2_alloc.c 8.8 (Berkeley) 2/21/94
36 * $FreeBSD: src/sys/gnu/ext2fs/ext2_alloc.c,v 1.28.2.2 2002/07/01 00:18:51 iedowse Exp $
39 #include "opt_quota.h"
41 #include <sys/param.h>
42 #include <sys/systm.h>
44 #include <sys/vnode.h>
46 #include <sys/mount.h>
47 #include <sys/syslog.h>
49 #include <machine/inttypes.h>
55 #include "ext2_mount.h"
58 #include "ext2_fs_sb.h"
60 #include "ext2_extern.h"
62 static void ext2_fserr (struct ext2_sb_info
*, u_int
, char *);
65 * Linux calls this functions at the following locations:
66 * (1) the inode is freed
67 * (2) a preallocation miss occurs
68 * (3) truncate is called
69 * (4) release_file is called and f_mode & 2
71 * I call it in ext2_inactive, ext2_truncate, ext2_vfree and in (2)
72 * the call in vfree might be redundant
75 ext2_discard_prealloc(struct inode
*ip
)
77 #ifdef EXT2_PREALLOCATE
78 if (ip
->i_prealloc_count
) {
79 int i
= ip
->i_prealloc_count
;
80 ip
->i_prealloc_count
= 0;
81 ext2_free_blocks (ITOV(ip
)->v_mount
,
89 * Allocate a block in the file system.
91 * this takes the framework from ffs_alloc. To implement the
92 * actual allocation, it calls ext2_new_block, the ported version
93 * of the same Linux routine.
95 * we note that this is always called in connection with ext2_blkpref
97 * preallocation is done as Linux does it
100 ext2_alloc(struct inode
*ip
, daddr_t lbn
, daddr_t bpref
, int size
,
101 struct ucred
*cred
, daddr_t
*bnp
)
103 struct ext2_sb_info
*fs
;
112 if ((u_int
)size
> fs
->s_blocksize
|| blkoff(fs
, size
) != 0) {
113 kprintf("dev = %s, bsize = %lu, size = %d, fs = %s\n",
114 devtoname(ip
->i_dev
), fs
->s_blocksize
, size
, fs
->fs_fsmnt
);
115 panic("ext2_alloc: bad size");
118 panic("ext2_alloc: missing credential");
119 #endif /* DIAGNOSTIC */
120 if (size
== fs
->s_blocksize
&& fs
->s_es
->s_free_blocks_count
== 0)
122 if (cred
->cr_uid
!= 0 &&
123 fs
->s_es
->s_free_blocks_count
< fs
->s_es
->s_r_blocks_count
)
126 if ((error
= ext2_chkdq(ip
, (long)btodb(size
), cred
, 0)) != 0)
129 if (bpref
>= fs
->s_es
->s_blocks_count
)
131 /* call the Linux code */
132 #ifdef EXT2_PREALLOCATE
133 /* To have a preallocation hit, we must
134 * - have at least one block preallocated
135 * - and our preferred block must have that block number or one below
137 if (ip
->i_prealloc_count
&&
138 (bpref
== ip
->i_prealloc_block
||
139 bpref
+ 1 == ip
->i_prealloc_block
))
141 bno
= ip
->i_prealloc_block
++;
142 ip
->i_prealloc_count
--;
143 /* ext2_debug ("preallocation hit (%lu/%lu).\n",
144 ++alloc_hits, ++alloc_attempts); */
146 /* Linux gets, clears, and releases the buffer at this
147 point - we don't have to that; we leave it to the caller
150 ext2_discard_prealloc (ip
);
151 /* ext2_debug ("preallocation miss (%lu/%lu).\n",
152 alloc_hits, ++alloc_attempts); */
153 if (S_ISREG(ip
->i_mode
))
155 (ITOV(ip
)->v_mount
, bpref
,
156 &ip
->i_prealloc_count
,
157 &ip
->i_prealloc_block
);
159 bno
= (daddr_t
)ext2_new_block(ITOV(ip
)->v_mount
,
163 bno
= (daddr_t
)ext2_new_block(ITOV(ip
)->v_mount
, bpref
, 0, 0);
167 /* set next_alloc fields as done in block_getblk */
168 ip
->i_next_alloc_block
= lbn
;
169 ip
->i_next_alloc_goal
= bno
;
171 ip
->i_blocks
+= btodb(size
);
172 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
178 * Restore user's disk quota because allocation failed.
180 ext2_chkdq(ip
, (long)-btodb(size
), cred
, FORCE
);
183 ext2_fserr(fs
, cred
->cr_uid
, "file system full");
184 uprintf("\n%s: write failed, file system is full\n", fs
->fs_fsmnt
);
189 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
191 * The vnode and an array of buffer pointers for a range of sequential
192 * logical blocks to be made contiguous is given. The allocator attempts
193 * to find a range of sequential blocks starting as close as possible to
194 * an fs_rotdelay offset from the end of the allocation for the logical
195 * block immediately preceeding the current range. If successful, the
196 * physical block numbers in the buffer pointers and in the inode are
197 * changed to reflect the new allocation. If unsuccessful, the allocation
198 * is left unchanged. The success in doing the reallocation is returned.
199 * Note that the error return is not reflected back to the user. Rather
200 * the previous block allocation will be used.
204 #include <sys/sysctl.h>
205 static int doasyncfree
= 1;
207 SYSCTL_INT(_debug
, 14, doasyncfree
, CTLFLAG_RW
, &doasyncfree
, 0, "");
208 #endif /* OPT_DEBUG */
212 * ext2_reallocblks(struct vnode *a_vp, struct cluster_save *a_buflist)
215 ext2_reallocblks(struct vop_reallocblks_args
*ap
)
217 #ifndef FANCY_REALLOC
218 /* kprintf("ext2_reallocblks not implemented\n"); */
222 struct ext2_sb_info
*fs
;
225 struct buf
*sbp
, *ebp
;
226 daddr_t
*bap
, *sbap
, *ebap
;
227 struct cluster_save
*buflist
;
228 daddr_t start_lbn
, end_lbn
, soff
, eoff
, newblk
, blkno
;
229 struct indir start_ap
[NIADDR
+ 1], end_ap
[NIADDR
+ 1], *idp
;
230 int i
, len
, start_lvl
, end_lvl
, pref
, ssize
;
236 if (fs
->fs_contigsumsize
<= 0)
239 buflist
= ap
->a_buflist
;
240 len
= buflist
->bs_nchildren
;
241 start_lbn
= lblkno(fs
, buflist
->bs_children
[0]->b_loffset
);
242 end_lbn
= start_lbn
+ len
- 1;
244 for (i
= 1; i
< len
; i
++) {
245 if (buflist
->bs_children
[i
]->b_loffset
!= lblktodoff(fs
, start_lbn
) + lblktodoff(fs
, i
))
246 panic("ext2_reallocblks: non-cluster");
250 * If the latest allocation is in a new block group, assume that
251 * the filesystem has decided to move and do not force it back to
252 * the previous block group.
254 if (dtog(fs
, dofftofsb(fs
, buflist
->bs_children
[0]->b_bio2
.bio_offset
)) !=
255 dtog(fs
, dofftofsb(fs
, buflist
->bs_children
[len
- 1]->b_bio2
.bio_offset
)))
257 if (ext2_getlbns(vp
, start_lbn
, start_ap
, &start_lvl
) ||
258 ext2_getlbns(vp
, end_lbn
, end_ap
, &end_lvl
))
261 * Get the starting offset and block map for the first block.
263 if (start_lvl
== 0) {
267 idp
= &start_ap
[start_lvl
- 1];
268 if (bread(vp
, lblktodoff(fs
, idp
->in_lbn
), (int)fs
->s_blocksize
, NOCRED
, &sbp
)) {
272 sbap
= (daddr_t
*)sbp
->b_data
;
276 * Find the preferred location for the cluster.
278 pref
= ext2_blkpref(ip
, start_lbn
, soff
, sbap
);
280 * If the block range spans two block maps, get the second map.
282 if (end_lvl
== 0 || (idp
= &end_ap
[end_lvl
- 1])->in_off
+ 1 >= len
) {
286 if (start_ap
[start_lvl
-1].in_lbn
== idp
->in_lbn
)
287 panic("ext2_reallocblk: start == end");
289 ssize
= len
- (idp
->in_off
+ 1);
290 if (bread(vp
, lblktodoff(fs
, idp
->in_lbn
), (int)fs
->s_blocksize
, NOCRED
, &ebp
))
292 ebap
= (daddr_t
*)ebp
->b_data
;
295 * Search the block map looking for an allocation of the desired size.
297 if ((newblk
= (daddr_t
)ext2_hashalloc(ip
, dtog(fs
, pref
), (long)pref
,
298 len
, (u_long (*)())ext2_clusteralloc
)) == 0)
301 * We have found a new contiguous block.
303 * First we have to replace the old block pointers with the new
304 * block pointers in the inode and indirect blocks associated
308 for (bap
= &sbap
[soff
], i
= 0; i
< len
; i
++, blkno
+= fs
->s_frags_per_block
) {
312 if (buflist
->bs_children
[i
]->b_bio2
.bio_offset
!= fsbtodoff(fs
, *bap
))
313 panic("ext2_reallocblks: alloc mismatch");
318 * Next we must write out the modified inode and indirect blocks.
319 * For strict correctness, the writes should be synchronous since
320 * the old block values may have been written to disk. In practise
321 * they are almost never written, but if we are concerned about
322 * strict correctness, the `doasyncfree' flag should be set to zero.
324 * The test on `doasyncfree' should be changed to test a flag
325 * that shows whether the associated buffers and inodes have
326 * been written. The flag should be set when the cluster is
327 * started and cleared whenever the buffer or inode is flushed.
328 * We can then check below to see if it is set, and do the
329 * synchronous write only when it has been cleared.
331 if (sbap
!= &ip
->i_db
[0]) {
337 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
347 * Last, free the old blocks and assign the new blocks to the buffers.
349 for (blkno
= newblk
, i
= 0; i
< len
; i
++, blkno
+= fs
->s_frags_per_block
) {
350 ext2_blkfree(ip
, dofftofsb(fs
, buflist
->bs_children
[i
]->b_bio2
.bio_offset
),
352 buflist
->bs_children
[i
]->b_bio2
.bio_offset
= fsbtodoff(fs
, blkno
);
359 if (sbap
!= &ip
->i_db
[0])
363 #endif /* FANCY_REALLOC */
367 * Allocate an inode in the file system.
369 * we leave the actual allocation strategy to the (modified)
370 * ext2_new_inode(), to make sure we get the policies right
373 ext2_valloc(struct vnode
*pvp
, int mode
, struct ucred
*cred
, struct vnode
**vpp
)
376 struct ext2_sb_info
*fs
;
384 if (fs
->s_es
->s_free_inodes_count
== 0)
387 /* call the Linux routine - it returns the inode number only */
388 ino
= ext2_new_inode(pip
, mode
);
392 error
= VFS_VGET(pvp
->v_mount
, NULL
, ino
, vpp
);
394 EXT2_VFREE(pvp
, ino
, mode
);
400 the question is whether using VGET was such good idea at all -
401 Linux doesn't read the old inode in when it's allocating a
402 new one. I will set at least i_size & i_blocks the zero.
408 /* now we want to make sure that the block pointers are zeroed out */
409 for (i
= 0; i
< NDADDR
; i
++)
411 for (i
= 0; i
< NIADDR
; i
++)
415 * Set up a new generation number for this inode.
416 * XXX check if this makes sense in ext2
418 if (ip
->i_gen
== 0 || ++ip
->i_gen
== 0)
419 ip
->i_gen
= krandom() / 2 + 1;
421 kprintf("ext2_valloc: allocated inode %d\n", ino);
425 ext2_fserr(fs
, cred
->cr_uid
, "out of inodes");
426 uprintf("\n%s: create/symlink failed, no inodes free\n", fs
->fs_fsmnt
);
431 * Select the desired position for the next block in a file.
433 * we try to mimic what Remy does in inode_getblk/block_getblk
435 * we note: blocknr == 0 means that we're about to allocate either
436 * a direct block or a pointer block at the first level of indirection
437 * (In other words, stuff that will go in i_db[] or i_ib[])
439 * blocknr != 0 means that we're allocating a block that is none
440 * of the above. Then, blocknr tells us the number of the block
441 * that will hold the pointer
444 ext2_blkpref(struct inode
*ip
, daddr_t lbn
, int indx
, daddr_t
*bap
,
450 * if the next block is actually what we thought it is,
451 * then set the goal to what we thought it should be
453 if (ip
->i_next_alloc_block
== lbn
&& ip
->i_next_alloc_goal
!= 0)
454 return ip
->i_next_alloc_goal
;
456 /* now check whether we were provided with an array that basically
457 tells us previous blocks to which we want to stay closeby
460 for (tmp
= indx
- 1; tmp
>= 0; tmp
--)
465 * else let's fall back to the blocknr, or, if there is none,
466 * follow the rule that a block should be allocated near its inode
468 return blocknr
? blocknr
:
469 (daddr_t
)(ip
->i_block_group
*
470 EXT2_BLOCKS_PER_GROUP(ip
->i_e2fs
)) +
471 ip
->i_e2fs
->s_es
->s_first_data_block
;
475 * Free a block or fragment.
477 * pass on to the Linux code
480 ext2_blkfree(struct inode
*ip
, daddr_t bno
, long size
)
482 struct ext2_sb_info
*fs
;
486 * call Linux code with mount *, block number, count
488 ext2_free_blocks(ITOV(ip
)->v_mount
, bno
, size
/ fs
->s_frag_size
);
494 * the maintenance of the actual bitmaps is again up to the linux code
497 ext2_vfree(struct vnode
*pvp
, ino_t ino
, int mode
)
499 struct ext2_sb_info
*fs
;
505 if ((u_int
)ino
> fs
->s_inodes_per_group
* fs
->s_groups_count
)
506 panic("ext2_vfree: range: dev = (%d, %d), ino = %"PRId64
", fs = %s",
507 major(pip
->i_dev
), minor(pip
->i_dev
), ino
, fs
->fs_fsmnt
);
509 /* ext2_debug("ext2_vfree (%d, %d) called\n", pip->i_number, mode);
511 ext2_discard_prealloc(pip
);
513 /* we need to make sure that ext2_free_inode can adjust the
514 used_dir_counts in the group summary information - I'd
515 really like to know what the rationale behind this
516 'set i_mode to zero to denote an unused inode' is
518 save_i_mode
= pip
->i_mode
;
520 ext2_free_inode(pip
);
521 pip
->i_mode
= save_i_mode
;
526 * Fserr prints the name of a file system with an error diagnostic.
528 * The form of the error message is:
532 ext2_fserr(struct ext2_sb_info
*fs
, u_int uid
, char *cp
)
534 log(LOG_ERR
, "uid %d on %s: %s\n", uid
, fs
->fs_fsmnt
, cp
);