2 * modified for Lites 1.1
4 * Aug 1995, Godmar Back (gback@cs.utah.edu)
5 * University of Utah, Department of Computer Science
8 * Copyright (c) 1982, 1986, 1989, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * @(#)ext2_alloc.c 8.8 (Berkeley) 2/21/94
40 * $FreeBSD: src/sys/gnu/ext2fs/ext2_alloc.c,v 1.28.2.2 2002/07/01 00:18:51 iedowse Exp $
41 * $DragonFly: src/sys/vfs/gnu/ext2fs/ext2_alloc.c,v 1.13 2006/12/23 00:41:29 swildner Exp $
44 #include "opt_quota.h"
46 #include <sys/param.h>
47 #include <sys/systm.h>
49 #include <sys/vnode.h>
51 #include <sys/mount.h>
52 #include <sys/syslog.h>
54 #include <machine/inttypes.h>
58 #include "ext2mount.h"
61 #include "ext2_fs_sb.h"
63 #include "ext2_extern.h"
65 static void ext2_fserr (struct ext2_sb_info
*, u_int
, char *);
68 * Linux calls this functions at the following locations:
69 * (1) the inode is freed
70 * (2) a preallocation miss occurs
71 * (3) truncate is called
72 * (4) release_file is called and f_mode & 2
74 * I call it in ext2_inactive, ext2_truncate, ext2_vfree and in (2)
75 * the call in vfree might be redundant
78 ext2_discard_prealloc(struct inode
*ip
)
80 #ifdef EXT2_PREALLOCATE
81 if (ip
->i_prealloc_count
) {
82 int i
= ip
->i_prealloc_count
;
83 ip
->i_prealloc_count
= 0;
84 ext2_free_blocks (ITOV(ip
)->v_mount
,
92 * Allocate a block in the file system.
94 * this takes the framework from ffs_alloc. To implement the
95 * actual allocation, it calls ext2_new_block, the ported version
96 * of the same Linux routine.
98 * we note that this is always called in connection with ext2_blkpref
100 * preallocation is done as Linux does it
103 ext2_alloc(struct inode
*ip
, daddr_t lbn
, daddr_t bpref
, int size
,
104 struct ucred
*cred
, daddr_t
*bnp
)
106 struct ext2_sb_info
*fs
;
115 if ((u_int
)size
> fs
->s_blocksize
|| blkoff(fs
, size
) != 0) {
116 kprintf("dev = %s, bsize = %lu, size = %d, fs = %s\n",
117 devtoname(ip
->i_dev
), fs
->s_blocksize
, size
, fs
->fs_fsmnt
);
118 panic("ext2_alloc: bad size");
121 panic("ext2_alloc: missing credential");
122 #endif /* DIAGNOSTIC */
123 if (size
== fs
->s_blocksize
&& fs
->s_es
->s_free_blocks_count
== 0)
125 if (cred
->cr_uid
!= 0 &&
126 fs
->s_es
->s_free_blocks_count
< fs
->s_es
->s_r_blocks_count
)
129 if ((error
= ext2_chkdq(ip
, (long)btodb(size
), cred
, 0)) != 0)
132 if (bpref
>= fs
->s_es
->s_blocks_count
)
134 /* call the Linux code */
135 #ifdef EXT2_PREALLOCATE
136 /* To have a preallocation hit, we must
137 * - have at least one block preallocated
138 * - and our preferred block must have that block number or one below
140 if (ip
->i_prealloc_count
&&
141 (bpref
== ip
->i_prealloc_block
||
142 bpref
+ 1 == ip
->i_prealloc_block
))
144 bno
= ip
->i_prealloc_block
++;
145 ip
->i_prealloc_count
--;
146 /* ext2_debug ("preallocation hit (%lu/%lu).\n",
147 ++alloc_hits, ++alloc_attempts); */
149 /* Linux gets, clears, and releases the buffer at this
150 point - we don't have to that; we leave it to the caller
153 ext2_discard_prealloc (ip
);
154 /* ext2_debug ("preallocation miss (%lu/%lu).\n",
155 alloc_hits, ++alloc_attempts); */
156 if (S_ISREG(ip
->i_mode
))
158 (ITOV(ip
)->v_mount
, bpref
,
159 &ip
->i_prealloc_count
,
160 &ip
->i_prealloc_block
);
162 bno
= (daddr_t
)ext2_new_block(ITOV(ip
)->v_mount
,
166 bno
= (daddr_t
)ext2_new_block(ITOV(ip
)->v_mount
, bpref
, 0, 0);
170 /* set next_alloc fields as done in block_getblk */
171 ip
->i_next_alloc_block
= lbn
;
172 ip
->i_next_alloc_goal
= bno
;
174 ip
->i_blocks
+= btodb(size
);
175 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
181 * Restore user's disk quota because allocation failed.
183 ext2_chkdq(ip
, (long)-btodb(size
), cred
, FORCE
);
186 ext2_fserr(fs
, cred
->cr_uid
, "file system full");
187 uprintf("\n%s: write failed, file system is full\n", fs
->fs_fsmnt
);
192 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
194 * The vnode and an array of buffer pointers for a range of sequential
195 * logical blocks to be made contiguous is given. The allocator attempts
196 * to find a range of sequential blocks starting as close as possible to
197 * an fs_rotdelay offset from the end of the allocation for the logical
198 * block immediately preceeding the current range. If successful, the
199 * physical block numbers in the buffer pointers and in the inode are
200 * changed to reflect the new allocation. If unsuccessful, the allocation
201 * is left unchanged. The success in doing the reallocation is returned.
202 * Note that the error return is not reflected back to the user. Rather
203 * the previous block allocation will be used.
207 #include <sys/sysctl.h>
208 static int doasyncfree
= 1;
210 SYSCTL_INT(_debug
, 14, doasyncfree
, CTLFLAG_RW
, &doasyncfree
, 0, "");
211 #endif /* OPT_DEBUG */
215 * ext2_reallocblks(struct vnode *a_vp, struct cluster_save *a_buflist)
218 ext2_reallocblks(struct vop_reallocblks_args
*ap
)
220 #ifndef FANCY_REALLOC
221 /* kprintf("ext2_reallocblks not implemented\n"); */
225 struct ext2_sb_info
*fs
;
228 struct buf
*sbp
, *ebp
;
229 daddr_t
*bap
, *sbap
, *ebap
;
230 struct cluster_save
*buflist
;
231 daddr_t start_lbn
, end_lbn
, soff
, eoff
, newblk
, blkno
;
232 struct indir start_ap
[NIADDR
+ 1], end_ap
[NIADDR
+ 1], *idp
;
233 int i
, len
, start_lvl
, end_lvl
, pref
, ssize
;
239 if (fs
->fs_contigsumsize
<= 0)
242 buflist
= ap
->a_buflist
;
243 len
= buflist
->bs_nchildren
;
244 start_lbn
= lblkno(fs
, buflist
->bs_children
[0]->b_loffset
);
245 end_lbn
= start_lbn
+ len
- 1;
247 for (i
= 1; i
< len
; i
++) {
248 if (buflist
->bs_children
[i
]->b_loffset
!= lblktodoff(fs
, start_lbn
) + lblktodoff(fs
, i
))
249 panic("ext2_reallocblks: non-cluster");
253 * If the latest allocation is in a new cylinder group, assume that
254 * the filesystem has decided to move and do not force it back to
255 * the previous cylinder group.
257 if (dtog(fs
, dofftofsb(fs
, buflist
->bs_children
[0]->b_bio2
.bio_offset
)) !=
258 dtog(fs
, dofftofsb(fs
, buflist
->bs_children
[len
- 1]->b_bio2
.bio_offset
)))
260 if (ext2_getlbns(vp
, start_lbn
, start_ap
, &start_lvl
) ||
261 ext2_getlbns(vp
, end_lbn
, end_ap
, &end_lvl
))
264 * Get the starting offset and block map for the first block.
266 if (start_lvl
== 0) {
270 idp
= &start_ap
[start_lvl
- 1];
271 if (bread(vp
, lblktodoff(fs
, idp
->in_lbn
), (int)fs
->s_blocksize
, NOCRED
, &sbp
)) {
275 sbap
= (daddr_t
*)sbp
->b_data
;
279 * Find the preferred location for the cluster.
281 pref
= ext2_blkpref(ip
, start_lbn
, soff
, sbap
);
283 * If the block range spans two block maps, get the second map.
285 if (end_lvl
== 0 || (idp
= &end_ap
[end_lvl
- 1])->in_off
+ 1 >= len
) {
289 if (start_ap
[start_lvl
-1].in_lbn
== idp
->in_lbn
)
290 panic("ext2_reallocblk: start == end");
292 ssize
= len
- (idp
->in_off
+ 1);
293 if (bread(vp
, lblktodoff(fs
, idp
->in_lbn
), (int)fs
->s_blocksize
, NOCRED
, &ebp
))
295 ebap
= (daddr_t
*)ebp
->b_data
;
298 * Search the block map looking for an allocation of the desired size.
300 if ((newblk
= (daddr_t
)ext2_hashalloc(ip
, dtog(fs
, pref
), (long)pref
,
301 len
, (u_long (*)())ext2_clusteralloc
)) == 0)
304 * We have found a new contiguous block.
306 * First we have to replace the old block pointers with the new
307 * block pointers in the inode and indirect blocks associated
311 for (bap
= &sbap
[soff
], i
= 0; i
< len
; i
++, blkno
+= fs
->s_frags_per_block
) {
315 if (buflist
->bs_children
[i
]->b_bio2
.bio_offset
!= fsbtodoff(fs
, *bap
))
316 panic("ext2_reallocblks: alloc mismatch");
321 * Next we must write out the modified inode and indirect blocks.
322 * For strict correctness, the writes should be synchronous since
323 * the old block values may have been written to disk. In practise
324 * they are almost never written, but if we are concerned about
325 * strict correctness, the `doasyncfree' flag should be set to zero.
327 * The test on `doasyncfree' should be changed to test a flag
328 * that shows whether the associated buffers and inodes have
329 * been written. The flag should be set when the cluster is
330 * started and cleared whenever the buffer or inode is flushed.
331 * We can then check below to see if it is set, and do the
332 * synchronous write only when it has been cleared.
334 if (sbap
!= &ip
->i_db
[0]) {
340 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
350 * Last, free the old blocks and assign the new blocks to the buffers.
352 for (blkno
= newblk
, i
= 0; i
< len
; i
++, blkno
+= fs
->s_frags_per_block
) {
353 ext2_blkfree(ip
, dofftofsb(fs
, buflist
->bs_children
[i
]->b_bio2
.bio_offset
),
355 buflist
->bs_children
[i
]->b_bio2
.bio_offset
= fsbtodoff(fs
, blkno
);
362 if (sbap
!= &ip
->i_db
[0])
366 #endif /* FANCY_REALLOC */
370 * Allocate an inode in the file system.
372 * we leave the actual allocation strategy to the (modified)
373 * ext2_new_inode(), to make sure we get the policies right
376 ext2_valloc(struct vnode
*pvp
, int mode
, struct ucred
*cred
, struct vnode
**vpp
)
379 struct ext2_sb_info
*fs
;
387 if (fs
->s_es
->s_free_inodes_count
== 0)
390 /* call the Linux routine - it returns the inode number only */
391 ino
= ext2_new_inode(pip
, mode
);
395 error
= VFS_VGET(pvp
->v_mount
, NULL
, ino
, vpp
);
397 EXT2_VFREE(pvp
, ino
, mode
);
403 the question is whether using VGET was such good idea at all -
404 Linux doesn't read the old inode in when it's allocating a
405 new one. I will set at least i_size & i_blocks the zero.
411 /* now we want to make sure that the block pointers are zeroed out */
412 for (i
= 0; i
< NDADDR
; i
++)
414 for (i
= 0; i
< NIADDR
; i
++)
418 * Set up a new generation number for this inode.
419 * XXX check if this makes sense in ext2
421 if (ip
->i_gen
== 0 || ++ip
->i_gen
== 0)
422 ip
->i_gen
= krandom() / 2 + 1;
424 kprintf("ext2_valloc: allocated inode %d\n", ino);
428 ext2_fserr(fs
, cred
->cr_uid
, "out of inodes");
429 uprintf("\n%s: create/symlink failed, no inodes free\n", fs
->fs_fsmnt
);
434 * Select the desired position for the next block in a file.
436 * we try to mimic what Remy does in inode_getblk/block_getblk
438 * we note: blocknr == 0 means that we're about to allocate either
439 * a direct block or a pointer block at the first level of indirection
440 * (In other words, stuff that will go in i_db[] or i_ib[])
442 * blocknr != 0 means that we're allocating a block that is none
443 * of the above. Then, blocknr tells us the number of the block
444 * that will hold the pointer
447 ext2_blkpref(struct inode
*ip
, daddr_t lbn
, int indx
, daddr_t
*bap
,
453 * if the next block is actually what we thought it is,
454 * then set the goal to what we thought it should be
456 if (ip
->i_next_alloc_block
== lbn
&& ip
->i_next_alloc_goal
!= 0)
457 return ip
->i_next_alloc_goal
;
459 /* now check whether we were provided with an array that basically
460 tells us previous blocks to which we want to stay closeby
463 for (tmp
= indx
- 1; tmp
>= 0; tmp
--)
468 * else let's fall back to the blocknr, or, if there is none,
469 * follow the rule that a block should be allocated near its inode
471 return blocknr
? blocknr
:
472 (daddr_t
)(ip
->i_block_group
*
473 EXT2_BLOCKS_PER_GROUP(ip
->i_e2fs
)) +
474 ip
->i_e2fs
->s_es
->s_first_data_block
;
478 * Free a block or fragment.
480 * pass on to the Linux code
483 ext2_blkfree(struct inode
*ip
, daddr_t bno
, long size
)
485 struct ext2_sb_info
*fs
;
489 * call Linux code with mount *, block number, count
491 ext2_free_blocks(ITOV(ip
)->v_mount
, bno
, size
/ fs
->s_frag_size
);
497 * the maintenance of the actual bitmaps is again up to the linux code
500 ext2_vfree(struct vnode
*pvp
, ino_t ino
, int mode
)
502 struct ext2_sb_info
*fs
;
508 if ((u_int
)ino
> fs
->s_inodes_per_group
* fs
->s_groups_count
)
509 panic("ext2_vfree: range: dev = (%d, %d), ino = %"PRId64
", fs = %s",
510 major(pip
->i_dev
), minor(pip
->i_dev
), ino
, fs
->fs_fsmnt
);
512 /* ext2_debug("ext2_vfree (%d, %d) called\n", pip->i_number, mode);
514 ext2_discard_prealloc(pip
);
516 /* we need to make sure that ext2_free_inode can adjust the
517 used_dir_counts in the group summary information - I'd
518 really like to know what the rationale behind this
519 'set i_mode to zero to denote an unused inode' is
521 save_i_mode
= pip
->i_mode
;
523 ext2_free_inode(pip
);
524 pip
->i_mode
= save_i_mode
;
529 * Fserr prints the name of a file system with an error diagnostic.
531 * The form of the error message is:
535 ext2_fserr(struct ext2_sb_info
*fs
, u_int uid
, char *cp
)
537 log(LOG_ERR
, "uid %d on %s: %s\n", uid
, fs
->fs_fsmnt
, cp
);