snj doesn't like my accent, so use proper English month names.
[netbsd-mini2440.git] / sbin / resize_ffs / resize_ffs.c
blob72f0b7f046223d97ae16aeca3bb5f6fb2eb25896
1 /* $NetBSD: resize_ffs.c,v 1.11 2007/12/15 16:32:06 perry Exp $ */
2 /* From sources sent on February 17, 2003 */
3 /*-
4 * As its sole author, I explicitly place this code in the public
5 * domain. Anyone may use it for any purpose (though I would
6 * appreciate credit where it is due).
8 * der Mouse
10 * mouse@rodents.montreal.qc.ca
11 * 7D C8 61 52 5D E7 2D 39 4E F1 31 3E E8 B3 27 4B
14 * resize_ffs:
16 * Resize a filesystem. Is capable of both growing and shrinking.
18 * Usage: resize_ffs filesystem newsize
20 * Example: resize_ffs /dev/rsd1e 29574
22 * newsize is in DEV_BSIZE units (ie, disk sectors, usually 512 bytes
23 * each).
25 * Note: this currently requires gcc to build, since it is written
26 * depending on gcc-specific features, notably nested function
27 * definitions (which in at least a few cases depend on the lexical
28 * scoping gcc provides, so they can't be trivially moved outside).
30 * It will not do anything useful with filesystems in other than
31 * host-native byte order. This really should be fixed (it's largely
32 * a historical accident; the original version of this program is
33 * older than bi-endian support in FFS).
35 * Many thanks go to John Kohl <jtk@NetBSD.org> for finding bugs: the
36 * one responsible for the "realloccgblk: can't find blk in cyl"
37 * problem and a more minor one which left fs_dsize wrong when
38 * shrinking. (These actually indicate bugs in fsck too - it should
39 * have caught and fixed them.)
43 #include <sys/cdefs.h>
44 #include <stdio.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <stdlib.h>
48 #include <unistd.h>
49 #include <strings.h>
50 #include <err.h>
51 #include <sys/stat.h>
52 #include <sys/mman.h>
53 #include <sys/param.h> /* MAXFRAG */
54 #include <ufs/ffs/fs.h>
55 #include <ufs/ufs/dir.h>
56 #include <ufs/ufs/dinode.h>
57 #include <ufs/ufs/ufs_bswap.h> /* ufs_rw32 */
59 /* Suppress warnings about unused arguments */
60 #if defined(__GNUC__) && \
61 ( (__GNUC__ > 2) || \
62 ( (__GNUC__ == 2) && \
63 defined(__GNUC_MINOR__) && \
64 (__GNUC_MINOR__ >= 7) ) )
65 #define UNUSED_ARG(x) x __unused
66 #define INLINE inline
67 #else
68 #define UNUSED_ARG(x) x
69 #define INLINE /**/
70 #endif
72 /* new size of filesystem, in sectors */
73 static int newsize;
75 /* fd open onto disk device */
76 static int fd;
78 /* must we break up big I/O operations - see checksmallio() */
79 static int smallio;
81 /* size of a cg, in bytes, rounded up to a frag boundary */
82 static int cgblksz;
84 /* possible superblock localtions */
85 static int search[] = SBLOCKSEARCH;
86 /* location of the superblock */
87 static off_t where;
89 /* Superblocks. */
90 static struct fs *oldsb; /* before we started */
91 static struct fs *newsb; /* copy to work with */
92 /* Buffer to hold the above. Make sure it's aligned correctly. */
93 static char sbbuf[2 * SBLOCKSIZE] __attribute__((__aligned__(__alignof__(struct fs))));
95 /* a cg's worth of brand new squeaky-clean inodes */
96 static struct ufs1_dinode *zinodes;
98 /* pointers to the in-core cgs, read off disk and possibly modified */
99 static struct cg **cgs;
101 /* pointer to csum array - the stuff pointed to on-disk by fs_csaddr */
102 static struct csum *csums;
104 /* per-cg flags, indexed by cg number */
105 static unsigned char *cgflags;
106 #define CGF_DIRTY 0x01 /* needs to be written to disk */
107 #define CGF_BLKMAPS 0x02 /* block bitmaps need rebuilding */
108 #define CGF_INOMAPS 0x04 /* inode bitmaps need rebuilding */
110 /* when shrinking, these two arrays record how we want blocks to move. */
111 /* if blkmove[i] is j, the frag that started out as frag #i should end */
112 /* up as frag #j. inomove[i]=j means, similarly, that the inode that */
113 /* started out as inode i should end up as inode j. */
114 static unsigned int *blkmove;
115 static unsigned int *inomove;
117 /* in-core copies of all inodes in the fs, indexed by inumber */
118 static struct ufs1_dinode *inodes;
120 /* per-inode flags, indexed by inumber */
121 static unsigned char *iflags;
122 #define IF_DIRTY 0x01 /* needs to be written to disk */
123 #define IF_BDIRTY 0x02 /* like DIRTY, but is set on first inode in a
124 * block of inodes, and applies to the whole
125 * block. */
127 /* Old FFS1 macros */
128 #define cg_blktot(cgp, ns) \
129 (cg_chkmagic(cgp, ns) ? \
130 ((int32_t *)((u_int8_t *)(cgp) + ufs_rw32((cgp)->cg_old_btotoff, (ns)))) \
131 : (((struct ocg *)(cgp))->cg_btot))
132 #define cg_blks(fs, cgp, cylno, ns) \
133 (cg_chkmagic(cgp, ns) ? \
134 ((int16_t *)((u_int8_t *)(cgp) + ufs_rw32((cgp)->cg_old_boff, (ns))) + \
135 (cylno) * (fs)->fs_old_nrpos) \
136 : (((struct ocg *)(cgp))->cg_b[cylno]))
137 #define cbtocylno(fs, bno) \
138 (fsbtodb(fs, bno) / (fs)->fs_old_spc)
139 #define cbtorpos(fs, bno) \
140 ((fs)->fs_old_nrpos <= 1 ? 0 : \
141 (fsbtodb(fs, bno) % (fs)->fs_old_spc / \
142 (fs)->fs_old_nsect * (fs)->fs_old_trackskew + \
143 fsbtodb(fs, bno) % (fs)->fs_old_spc % \
144 (fs)->fs_old_nsect * (fs)->fs_old_interleave) %\
145 (fs)->fs_old_nsect * (fs)->fs_old_nrpos / (fs)->fs_old_npsect)
146 #define dblksize(fs, dip, lbn) \
147 (((lbn) >= NDADDR || (dip)->di_size >= lblktosize(fs, (lbn) + 1)) \
148 ? (fs)->fs_bsize \
149 : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
153 * Number of disk sectors per block/fragment; assumes DEV_BSIZE byte
154 * sector size.
156 #define NSPB(fs) ((fs)->fs_old_nspf << (fs)->fs_fragshift)
157 #define NSPF(fs) ((fs)->fs_old_nspf)
160 * See if we need to break up large I/O operations. This should never
161 * be needed, but under at least one <version,platform> combination,
162 * large enough disk transfers to the raw device hang. So if we're
163 * talking to a character special device, play it safe; in this case,
164 * readat() and writeat() break everything up into pieces no larger
165 * than 8K, doing multiple syscalls for larger operations.
167 static void
168 checksmallio(void)
170 struct stat stb;
172 fstat(fd, &stb);
173 smallio = ((stb.st_mode & S_IFMT) == S_IFCHR);
176 * Read size bytes starting at blkno into buf. blkno is in DEV_BSIZE
177 * units, ie, after fsbtodb(); size is in bytes.
179 static void
180 readat(off_t blkno, void *buf, int size)
182 /* Seek to the correct place. */
183 if (lseek(fd, blkno * DEV_BSIZE, L_SET) < 0)
184 err(1, "lseek failed");
186 /* See if we have to break up the transfer... */
187 if (smallio) {
188 char *bp; /* pointer into buf */
189 int left; /* bytes left to go */
190 int n; /* number to do this time around */
191 int rv; /* syscall return value */
192 bp = buf;
193 left = size;
194 while (left > 0) {
195 n = (left > 8192) ? 8192 : left;
196 rv = read(fd, bp, n);
197 if (rv < 0)
198 err(1, "read failed");
199 if (rv != n)
200 errx(1, "read: wanted %d, got %d", n, rv);
201 bp += n;
202 left -= n;
204 } else {
205 int rv;
206 rv = read(fd, buf, size);
207 if (rv < 0)
208 err(1, "read failed");
209 if (rv != size)
210 errx(1, "read: wanted %d, got %d", size, rv);
214 * Write size bytes from buf starting at blkno. blkno is in DEV_BSIZE
215 * units, ie, after fsbtodb(); size is in bytes.
217 static void
218 writeat(off_t blkno, const void *buf, int size)
220 /* Seek to the correct place. */
221 if (lseek(fd, blkno * DEV_BSIZE, L_SET) < 0)
222 err(1, "lseek failed");
223 /* See if we have to break up the transfer... */
224 if (smallio) {
225 const char *bp; /* pointer into buf */
226 int left; /* bytes left to go */
227 int n; /* number to do this time around */
228 int rv; /* syscall return value */
229 bp = buf;
230 left = size;
231 while (left > 0) {
232 n = (left > 8192) ? 8192 : left;
233 rv = write(fd, bp, n);
234 if (rv < 0)
235 err(1, "write failed");
236 if (rv != n)
237 errx(1, "write: wanted %d, got %d", n, rv);
238 bp += n;
239 left -= n;
241 } else {
242 int rv;
243 rv = write(fd, buf, size);
244 if (rv < 0)
245 err(1, "write failed");
246 if (rv != size)
247 errx(1, "write: wanted %d, got %d", size, rv);
251 * Never-fail versions of malloc() and realloc(), and an allocation
252 * routine (which also never fails) for allocating memory that will
253 * never be freed until exit.
257 * Never-fail malloc.
259 static void *
260 nfmalloc(size_t nb, const char *tag)
262 void *rv;
264 rv = malloc(nb);
265 if (rv)
266 return (rv);
267 err(1, "Can't allocate %lu bytes for %s",
268 (unsigned long int) nb, tag);
271 * Never-fail realloc.
273 static void *
274 nfrealloc(void *blk, size_t nb, const char *tag)
276 void *rv;
278 rv = realloc(blk, nb);
279 if (rv)
280 return (rv);
281 err(1, "Can't re-allocate %lu bytes for %s",
282 (unsigned long int) nb, tag);
285 * Allocate memory that will never be freed or reallocated. Arguably
286 * this routine should handle small allocations by chopping up pages,
287 * but that's not worth the bother; it's not called more than a
288 * handful of times per run, and if the allocations are that small the
289 * waste in giving each one its own page is ignorable.
291 static void *
292 alloconce(size_t nb, const char *tag)
294 void *rv;
296 rv = mmap(0, nb, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
297 if (rv != MAP_FAILED)
298 return (rv);
299 err(1, "Can't map %lu bytes for %s",
300 (unsigned long int) nb, tag);
303 * Load the cgs and csums off disk. Also allocates the space to load
304 * them into and initializes the per-cg flags.
306 static void
307 loadcgs(void)
309 int cg;
310 char *cgp;
312 cgblksz = roundup(oldsb->fs_cgsize, oldsb->fs_fsize);
313 cgs = nfmalloc(oldsb->fs_ncg * sizeof(struct cg *), "cg pointers");
314 cgp = alloconce(oldsb->fs_ncg * cgblksz, "cgs");
315 cgflags = nfmalloc(oldsb->fs_ncg, "cg flags");
316 csums = nfmalloc(oldsb->fs_cssize, "cg summary");
317 for (cg = 0; cg < oldsb->fs_ncg; cg++) {
318 cgs[cg] = (struct cg *) cgp;
319 readat(fsbtodb(oldsb, cgtod(oldsb, cg)), cgp, cgblksz);
320 cgflags[cg] = 0;
321 cgp += cgblksz;
323 readat(fsbtodb(oldsb, oldsb->fs_csaddr), csums, oldsb->fs_cssize);
326 * Set n bits, starting with bit #base, in the bitmap pointed to by
327 * bitvec (which is assumed to be large enough to include bits base
328 * through base+n-1).
330 static void
331 set_bits(unsigned char *bitvec, unsigned int base, unsigned int n)
333 if (n < 1)
334 return; /* nothing to do */
335 if (base & 7) { /* partial byte at beginning */
336 if (n <= 8 - (base & 7)) { /* entirely within one byte */
337 bitvec[base >> 3] |= (~((~0U) << n)) << (base & 7);
338 return;
340 bitvec[base >> 3] |= (~0U) << (base & 7);
341 n -= 8 - (base & 7);
342 base = (base & ~7) + 8;
344 if (n >= 8) { /* do full bytes */
345 memset(bitvec + (base >> 3), 0xff, n >> 3);
346 base += n & ~7;
347 n &= 7;
349 if (n) { /* partial byte at end */
350 bitvec[base >> 3] |= ~((~0U) << n);
354 * Clear n bits, starting with bit #base, in the bitmap pointed to by
355 * bitvec (which is assumed to be large enough to include bits base
356 * through base+n-1). Code parallels set_bits().
358 static void
359 clr_bits(unsigned char *bitvec, int base, int n)
361 if (n < 1)
362 return;
363 if (base & 7) {
364 if (n <= 8 - (base & 7)) {
365 bitvec[base >> 3] &= ~((~((~0U) << n)) << (base & 7));
366 return;
368 bitvec[base >> 3] &= ~((~0U) << (base & 7));
369 n -= 8 - (base & 7);
370 base = (base & ~7) + 8;
372 if (n >= 8) {
373 bzero(bitvec + (base >> 3), n >> 3);
374 base += n & ~7;
375 n &= 7;
377 if (n) {
378 bitvec[base >> 3] &= (~0U) << n;
382 * Test whether bit #bit is set in the bitmap pointed to by bitvec.
384 INLINE static int
385 bit_is_set(unsigned char *bitvec, int bit)
387 return (bitvec[bit >> 3] & (1 << (bit & 7)));
390 * Test whether bit #bit is clear in the bitmap pointed to by bitvec.
392 INLINE static int
393 bit_is_clr(unsigned char *bitvec, int bit)
395 return (!bit_is_set(bitvec, bit));
398 * Test whether a whole block of bits is set in a bitmap. This is
399 * designed for testing (aligned) disk blocks in a bit-per-frag
400 * bitmap; it has assumptions wired into it based on that, essentially
401 * that the entire block fits into a single byte. This returns true
402 * iff _all_ the bits are set; it is not just the complement of
403 * blk_is_clr on the same arguments (unless blkfrags==1).
405 INLINE static int
406 blk_is_set(unsigned char *bitvec, int blkbase, int blkfrags)
408 unsigned int mask;
410 mask = (~((~0U) << blkfrags)) << (blkbase & 7);
411 return ((bitvec[blkbase >> 3] & mask) == mask);
414 * Test whether a whole block of bits is clear in a bitmap. See
415 * blk_is_set (above) for assumptions. This returns true iff _all_
416 * the bits are clear; it is not just the complement of blk_is_set on
417 * the same arguments (unless blkfrags==1).
419 INLINE static int
420 blk_is_clr(unsigned char *bitvec, int blkbase, int blkfrags)
422 unsigned int mask;
424 mask = (~((~0U) << blkfrags)) << (blkbase & 7);
425 return ((bitvec[blkbase >> 3] & mask) == 0);
428 * Initialize a new cg. Called when growing. Assumes memory has been
429 * allocated but not otherwise set up. This code sets the fields of
430 * the cg, initializes the bitmaps (and cluster summaries, if
431 * applicable), updates both per-cylinder summary info and the global
432 * summary info in newsb; it also writes out new inodes for the cg.
434 * This code knows it can never be called for cg 0, which makes it a
435 * bit simpler than it would otherwise be.
437 static void
438 initcg(int cgn)
440 struct cg *cg; /* The in-core cg, of course */
441 int base; /* Disk address of cg base */
442 int dlow; /* Size of pre-cg data area */
443 int dhigh; /* Offset of post-inode data area, from base */
444 int dmax; /* Offset of end of post-inode data area */
445 int i; /* Generic loop index */
446 int n; /* Generic count */
448 cg = cgs[cgn];
449 /* Place the data areas */
450 base = cgbase(newsb, cgn);
451 dlow = cgsblock(newsb, cgn) - base;
452 dhigh = cgdmin(newsb, cgn) - base;
453 dmax = newsb->fs_size - base;
454 if (dmax > newsb->fs_fpg)
455 dmax = newsb->fs_fpg;
457 * Clear out the cg - assumes all-0-bytes is the correct way
458 * to initialize fields we don't otherwise touch, which is
459 * perhaps not the right thing to do, but it's what fsck and
460 * mkfs do.
462 bzero(cg, newsb->fs_cgsize);
463 cg->cg_time = newsb->fs_time;
464 cg->cg_magic = CG_MAGIC;
465 cg->cg_cgx = cgn;
466 cg->cg_old_ncyl = newsb->fs_old_cpg;
467 /* fsck whines if the cg->cg_old_ncyl value in the last cg is fs_old_cpg
468 * instead of zero, when fs_old_cpg is the correct value. */
469 /* XXX fix once fsck is fixed */
470 if ((cgn == newsb->fs_ncg - 1) /* && (newsb->fs_old_ncyl % newsb->fs_old_cpg) */ ) {
471 cg->cg_old_ncyl = newsb->fs_old_ncyl % newsb->fs_old_cpg;
473 cg->cg_niblk = newsb->fs_ipg;
474 cg->cg_ndblk = dmax;
475 /* Set up the bitmap pointers. We have to be careful to lay out the
476 * cg _exactly_ the way mkfs and fsck do it, since fsck compares the
477 * _entire_ cg against a recomputed cg, and whines if there is any
478 * mismatch, including the bitmap offsets. */
479 /* XXX update this comment when fsck is fixed */
480 cg->cg_old_btotoff = &cg->cg_space[0] - (unsigned char *) cg;
481 cg->cg_old_boff = cg->cg_old_btotoff
482 + (newsb->fs_old_cpg * sizeof(int32_t));
483 cg->cg_iusedoff = cg->cg_old_boff +
484 (newsb->fs_old_cpg * newsb->fs_old_nrpos * sizeof(int16_t));
485 cg->cg_freeoff = cg->cg_iusedoff + howmany(newsb->fs_ipg, NBBY);
486 if (newsb->fs_contigsumsize > 0) {
487 cg->cg_nclusterblks = cg->cg_ndblk / newsb->fs_frag;
488 cg->cg_clustersumoff = cg->cg_freeoff +
489 howmany(newsb->fs_old_cpg * newsb->fs_old_spc / NSPF(newsb),
490 NBBY) - sizeof(int32_t);
491 cg->cg_clustersumoff =
492 roundup(cg->cg_clustersumoff, sizeof(int32_t));
493 cg->cg_clusteroff = cg->cg_clustersumoff +
494 ((newsb->fs_contigsumsize + 1) * sizeof(int32_t));
495 cg->cg_nextfreeoff = cg->cg_clusteroff +
496 howmany(newsb->fs_old_cpg * newsb->fs_old_spc / NSPB(newsb),
497 NBBY);
498 n = dlow / newsb->fs_frag;
499 if (n > 0) {
500 set_bits(cg_clustersfree(cg, 0), 0, n);
501 cg_clustersum(cg, 0)[(n > newsb->fs_contigsumsize) ?
502 newsb->fs_contigsumsize : n]++;
504 } else {
505 cg->cg_nextfreeoff = cg->cg_freeoff +
506 howmany(newsb->fs_old_cpg * newsb->fs_old_spc / NSPF(newsb),
507 NBBY);
509 /* Mark the data areas as free; everything else is marked busy by the
510 * bzero up at the top. */
511 set_bits(cg_blksfree(cg, 0), 0, dlow);
512 set_bits(cg_blksfree(cg, 0), dhigh, dmax - dhigh);
513 /* Initialize summary info */
514 cg->cg_cs.cs_ndir = 0;
515 cg->cg_cs.cs_nifree = newsb->fs_ipg;
516 cg->cg_cs.cs_nbfree = dlow / newsb->fs_frag;
517 cg->cg_cs.cs_nffree = 0;
519 /* This is the simplest way of doing this; we perhaps could compute
520 * the correct cg_blktot()[] and cg_blks()[] values other ways, but it
521 * would be complicated and hardly seems worth the effort. (The
522 * reason there isn't frag-at-beginning and frag-at-end code here,
523 * like the code below for the post-inode data area, is that the
524 * pre-sb data area always starts at 0, and thus is block-aligned, and
525 * always ends at the sb, which is block-aligned.) */
526 for (i = 0; i < dlow; i += newsb->fs_frag) {
527 cg_blktot(cg, 0)[cbtocylno(newsb, i)]++;
528 cg_blks(newsb, cg, cbtocylno(newsb, i), 0)[cbtorpos(newsb, i)]++;
530 /* Deal with a partial block at the beginning of the post-inode area.
531 * I'm not convinced this can happen - I think the inodes are always
532 * block-aligned and always an integral number of blocks - but it's
533 * cheap to do the right thing just in case. */
534 if (dhigh % newsb->fs_frag) {
535 n = newsb->fs_frag - (dhigh % newsb->fs_frag);
536 cg->cg_frsum[n]++;
537 cg->cg_cs.cs_nffree += n;
538 dhigh += n;
540 n = (dmax - dhigh) / newsb->fs_frag;
541 /* We have n full-size blocks in the post-inode data area. */
542 if (n > 0) {
543 cg->cg_cs.cs_nbfree += n;
544 if (newsb->fs_contigsumsize > 0) {
545 i = dhigh / newsb->fs_frag;
546 set_bits(cg_clustersfree(cg, 0), i, n);
547 cg_clustersum(cg, 0)[(n > newsb->fs_contigsumsize) ?
548 newsb->fs_contigsumsize : n]++;
550 for (i = n; i > 0; i--) {
551 cg_blktot(cg, 0)[cbtocylno(newsb, dhigh)]++;
552 cg_blks(newsb, cg,
553 cbtocylno(newsb, dhigh), 0)[cbtorpos(newsb,
554 dhigh)]++;
555 dhigh += newsb->fs_frag;
558 /* Deal with any leftover frag at the end of the cg. */
559 i = dmax - dhigh;
560 if (i) {
561 cg->cg_frsum[i]++;
562 cg->cg_cs.cs_nffree += i;
564 /* Update the csum info. */
565 csums[cgn] = cg->cg_cs;
566 newsb->fs_cstotal.cs_nffree += cg->cg_cs.cs_nffree;
567 newsb->fs_cstotal.cs_nbfree += cg->cg_cs.cs_nbfree;
568 newsb->fs_cstotal.cs_nifree += cg->cg_cs.cs_nifree;
569 /* Write out the cleared inodes. */
570 writeat(fsbtodb(newsb, cgimin(newsb, cgn)), zinodes,
571 newsb->fs_ipg * sizeof(struct ufs1_dinode));
572 /* Dirty the cg. */
573 cgflags[cgn] |= CGF_DIRTY;
576 * Find free space, at least nfrags consecutive frags of it. Pays no
577 * attention to block boundaries, but refuses to straddle cg
578 * boundaries, even if the disk blocks involved are in fact
579 * consecutive. Return value is the frag number of the first frag of
580 * the block, or -1 if no space was found. Uses newsb for sb values,
581 * and assumes the cgs[] structures correctly describe the area to be
582 * searched.
584 * XXX is there a bug lurking in the ignoring of block boundaries by
585 * the routine used by fragmove() in evict_data()? Can an end-of-file
586 * frag legally straddle a block boundary? If not, this should be
587 * cloned and fixed to stop at block boundaries for that use. The
588 * current one may still be needed for csum info motion, in case that
589 * takes up more than a whole block (is the csum info allowed to begin
590 * partway through a block and continue into the following block?).
592 * If we wrap off the end of the filesystem back to the beginning, we
593 * can end up searching the end of the filesystem twice. I ignore
594 * this inefficiency, since if that happens we're going to croak with
595 * a no-space error anyway, so it happens at most once.
597 static int
598 find_freespace(unsigned int nfrags)
600 static int hand = 0; /* hand rotates through all frags in the fs */
601 int cgsize; /* size of the cg hand currently points into */
602 int cgn; /* number of cg hand currently points into */
603 int fwc; /* frag-within-cg number of frag hand points
604 * to */
605 int run; /* length of run of free frags seen so far */
606 int secondpass; /* have we wrapped from end of fs to
607 * beginning? */
608 unsigned char *bits; /* cg_blksfree()[] for cg hand points into */
610 cgn = dtog(newsb, hand);
611 fwc = dtogd(newsb, hand);
612 secondpass = (hand == 0);
613 run = 0;
614 bits = cg_blksfree(cgs[cgn], 0);
615 cgsize = cgs[cgn]->cg_ndblk;
616 while (1) {
617 if (bit_is_set(bits, fwc)) {
618 run++;
619 if (run >= nfrags)
620 return (hand + 1 - run);
621 } else {
622 run = 0;
624 hand++;
625 fwc++;
626 if (fwc >= cgsize) {
627 fwc = 0;
628 cgn++;
629 if (cgn >= newsb->fs_ncg) {
630 hand = 0;
631 if (secondpass)
632 return (-1);
633 secondpass = 1;
634 cgn = 0;
636 bits = cg_blksfree(cgs[cgn], 0);
637 cgsize = cgs[cgn]->cg_ndblk;
638 run = 0;
643 * Find a free block of disk space. Finds an entire block of frags,
644 * all of which are free. Return value is the frag number of the
645 * first frag of the block, or -1 if no space was found. Uses newsb
646 * for sb values, and assumes the cgs[] structures correctly describe
647 * the area to be searched.
649 * See find_freespace(), above, for remarks about hand wrapping around.
651 static int
652 find_freeblock(void)
654 static int hand = 0; /* hand rotates through all frags in fs */
655 int cgn; /* cg number of cg hand points into */
656 int fwc; /* frag-within-cg number of frag hand points
657 * to */
658 int cgsize; /* size of cg hand points into */
659 int secondpass; /* have we wrapped from end to beginning? */
660 unsigned char *bits; /* cg_blksfree()[] for cg hand points into */
662 cgn = dtog(newsb, hand);
663 fwc = dtogd(newsb, hand);
664 secondpass = (hand == 0);
665 bits = cg_blksfree(cgs[cgn], 0);
666 cgsize = blknum(newsb, cgs[cgn]->cg_ndblk);
667 while (1) {
668 if (blk_is_set(bits, fwc, newsb->fs_frag))
669 return (hand);
670 fwc += newsb->fs_frag;
671 hand += newsb->fs_frag;
672 if (fwc >= cgsize) {
673 fwc = 0;
674 cgn++;
675 if (cgn >= newsb->fs_ncg) {
676 hand = 0;
677 if (secondpass)
678 return (-1);
679 secondpass = 1;
680 cgn = 0;
682 bits = cg_blksfree(cgs[cgn], 0);
683 cgsize = blknum(newsb, cgs[cgn]->cg_ndblk);
688 * Find a free inode, returning its inumber or -1 if none was found.
689 * Uses newsb for sb values, and assumes the cgs[] structures
690 * correctly describe the area to be searched.
692 * See find_freespace(), above, for remarks about hand wrapping around.
694 static int
695 find_freeinode(void)
697 static int hand = 0; /* hand rotates through all inodes in fs */
698 int cgn; /* cg number of cg hand points into */
699 int iwc; /* inode-within-cg number of inode hand points
700 * to */
701 int secondpass; /* have we wrapped from end to beginning? */
702 unsigned char *bits; /* cg_inosused()[] for cg hand points into */
704 cgn = hand / newsb->fs_ipg;
705 iwc = hand % newsb->fs_ipg;
706 secondpass = (hand == 0);
707 bits = cg_inosused(cgs[cgn], 0);
708 while (1) {
709 if (bit_is_clr(bits, iwc))
710 return (hand);
711 hand++;
712 iwc++;
713 if (iwc >= newsb->fs_ipg) {
714 iwc = 0;
715 cgn++;
716 if (cgn >= newsb->fs_ncg) {
717 hand = 0;
718 if (secondpass)
719 return (-1);
720 secondpass = 1;
721 cgn = 0;
723 bits = cg_inosused(cgs[cgn], 0);
728 * Mark a frag as free. Sets the frag's bit in the cg_blksfree bitmap
729 * for the appropriate cg, and marks the cg as dirty.
731 static void
732 free_frag(int fno)
734 int cgn;
736 cgn = dtog(newsb, fno);
737 set_bits(cg_blksfree(cgs[cgn], 0), dtogd(newsb, fno), 1);
738 cgflags[cgn] |= CGF_DIRTY | CGF_BLKMAPS;
741 * Allocate a frag. Clears the frag's bit in the cg_blksfree bitmap
742 * for the appropriate cg, and marks the cg as dirty.
744 static void
745 alloc_frag(int fno)
747 int cgn;
749 cgn = dtog(newsb, fno);
750 clr_bits(cg_blksfree(cgs[cgn], 0), dtogd(newsb, fno), 1);
751 cgflags[cgn] |= CGF_DIRTY | CGF_BLKMAPS;
754 * Fix up the csum array. If shrinking, this involves freeing zero or
755 * more frags; if growing, it involves allocating them, or if the
756 * frags being grown into aren't free, finding space elsewhere for the
757 * csum info. (If the number of occupied frags doesn't change,
758 * nothing happens here.)
760 static void
761 csum_fixup(void)
763 int nold; /* # frags in old csum info */
764 int ntot; /* # frags in new csum info */
765 int nnew; /* ntot-nold */
766 int newloc; /* new location for csum info, if necessary */
767 int i; /* generic loop index */
768 int j; /* generic loop index */
769 int f; /* "from" frag number, if moving */
770 int t; /* "to" frag number, if moving */
771 int cgn; /* cg number, used when shrinking */
773 ntot = howmany(newsb->fs_cssize, newsb->fs_fsize);
774 nold = howmany(oldsb->fs_cssize, newsb->fs_fsize);
775 nnew = ntot - nold;
776 /* First, if there's no change in frag counts, it's easy. */
777 if (nnew == 0)
778 return;
779 /* Next, if we're shrinking, it's almost as easy. Just free up any
780 * frags in the old area we no longer need. */
781 if (nnew < 0) {
782 for ((i = newsb->fs_csaddr + ntot - 1), (j = nnew);
783 j < 0;
784 i--, j++) {
785 free_frag(i);
787 return;
789 /* We must be growing. Check to see that the new csum area fits
790 * within the filesystem. I think this can never happen, since for
791 * the csum area to grow, we must be adding at least one cg, so the
792 * old csum area can't be this close to the end of the new filesystem.
793 * But it's a cheap check. */
794 /* XXX what if csum info is at end of cg and grows into next cg, what
795 * if it spills over onto the next cg's backup superblock? Can this
796 * happen? */
797 if (newsb->fs_csaddr + ntot <= newsb->fs_size) {
798 /* Okay, it fits - now, see if the space we want is free. */
799 for ((i = newsb->fs_csaddr + nold), (j = nnew);
800 j > 0;
801 i++, j--) {
802 cgn = dtog(newsb, i);
803 if (bit_is_clr(cg_blksfree(cgs[cgn], 0),
804 dtogd(newsb, i)))
805 break;
807 if (j <= 0) {
808 /* Win win - all the frags we want are free. Allocate
809 * 'em and we're all done. */
810 for ((i = newsb->fs_csaddr + ntot - nnew), (j = nnew); j > 0; i++, j--) {
811 alloc_frag(i);
813 return;
816 /* We have to move the csum info, sigh. Look for new space, free old
817 * space, and allocate new. Update fs_csaddr. We don't copy anything
818 * on disk at this point; the csum info will be written to the
819 * then-current fs_csaddr as part of the final flush. */
820 newloc = find_freespace(ntot);
821 if (newloc < 0) {
822 printf("Sorry, no space available for new csums\n");
823 exit(1);
825 for (i = 0, f = newsb->fs_csaddr, t = newloc; i < ntot; i++, f++, t++) {
826 if (i < nold) {
827 free_frag(f);
829 alloc_frag(t);
831 newsb->fs_csaddr = newloc;
834 * Recompute newsb->fs_dsize. Just scans all cgs, adding the number of
835 * data blocks in that cg to the total.
837 static void
838 recompute_fs_dsize(void)
840 int i;
842 newsb->fs_dsize = 0;
843 for (i = 0; i < newsb->fs_ncg; i++) {
844 int dlow; /* size of before-sb data area */
845 int dhigh; /* offset of post-inode data area */
846 int dmax; /* total size of cg */
847 int base; /* base of cg, since cgsblock() etc add it in */
848 base = cgbase(newsb, i);
849 dlow = cgsblock(newsb, i) - base;
850 dhigh = cgdmin(newsb, i) - base;
851 dmax = newsb->fs_size - base;
852 if (dmax > newsb->fs_fpg)
853 dmax = newsb->fs_fpg;
854 newsb->fs_dsize += dlow + dmax - dhigh;
856 /* Space in cg 0 before cgsblock is boot area, not free space! */
857 newsb->fs_dsize -= cgsblock(newsb, 0) - cgbase(newsb, 0);
858 /* And of course the csum info takes up space. */
859 newsb->fs_dsize -= howmany(newsb->fs_cssize, newsb->fs_fsize);
862 * Return the current time. We call this and assign, rather than
863 * calling time() directly, as insulation against OSes where fs_time
864 * is not a time_t.
866 static time_t
867 timestamp(void)
869 time_t t;
871 time(&t);
872 return (t);
875 * Grow the filesystem.
877 static void
878 grow(void)
880 int i;
882 /* Update the timestamp. */
883 newsb->fs_time = timestamp();
884 /* Allocate and clear the new-inode area, in case we add any cgs. */
885 zinodes = alloconce(newsb->fs_ipg * sizeof(struct ufs1_dinode),
886 "zeroed inodes");
887 bzero(zinodes, newsb->fs_ipg * sizeof(struct ufs1_dinode));
888 /* Update the size. */
889 newsb->fs_size = dbtofsb(newsb, newsize);
890 /* Did we actually not grow? (This can happen if newsize is less than
891 * a frag larger than the old size - unlikely, but no excuse to
892 * misbehave if it happens.) */
893 if (newsb->fs_size == oldsb->fs_size)
894 return;
895 /* Check that the new last sector (frag, actually) is writable. Since
896 * it's at least one frag larger than it used to be, we know we aren't
897 * overwriting anything important by this. (The choice of sbbuf as
898 * what to write is irrelevant; it's just something handy that's known
899 * to be at least one frag in size.) */
900 writeat(newsb->fs_size - 1, &sbbuf, newsb->fs_fsize);
901 /* Update fs_old_ncyl and fs_ncg. */
902 newsb->fs_old_ncyl = (newsb->fs_size * NSPF(newsb)) / newsb->fs_old_spc;
903 newsb->fs_ncg = howmany(newsb->fs_old_ncyl, newsb->fs_old_cpg);
904 /* Does the last cg end before the end of its inode area? There is no
905 * reason why this couldn't be handled, but it would complicate a lot
906 * of code (in all filesystem code - fsck, kernel, etc) because of the
907 * potential partial inode area, and the gain in space would be
908 * minimal, at most the pre-sb data area. */
909 if (cgdmin(newsb, newsb->fs_ncg - 1) > newsb->fs_size) {
910 newsb->fs_ncg--;
911 newsb->fs_old_ncyl = newsb->fs_ncg * newsb->fs_old_cpg;
912 newsb->fs_size = (newsb->fs_old_ncyl * newsb->fs_old_spc) / NSPF(newsb);
913 printf("Warning: last cylinder group is too small;\n");
914 printf(" dropping it. New size = %lu.\n",
915 (unsigned long int) fsbtodb(newsb, newsb->fs_size));
917 /* Find out how big the csum area is, and realloc csums if bigger. */
918 newsb->fs_cssize = fragroundup(newsb,
919 newsb->fs_ncg * sizeof(struct csum));
920 if (newsb->fs_cssize > oldsb->fs_cssize)
921 csums = nfrealloc(csums, newsb->fs_cssize, "new cg summary");
922 /* If we're adding any cgs, realloc structures and set up the new cgs. */
923 if (newsb->fs_ncg > oldsb->fs_ncg) {
924 char *cgp;
925 cgs = nfrealloc(cgs, newsb->fs_ncg * sizeof(struct cg *),
926 "cg pointers");
927 cgflags = nfrealloc(cgflags, newsb->fs_ncg, "cg flags");
928 bzero(cgflags + oldsb->fs_ncg, newsb->fs_ncg - oldsb->fs_ncg);
929 cgp = alloconce((newsb->fs_ncg - oldsb->fs_ncg) * cgblksz,
930 "cgs");
931 for (i = oldsb->fs_ncg; i < newsb->fs_ncg; i++) {
932 cgs[i] = (struct cg *) cgp;
933 initcg(i);
934 cgp += cgblksz;
936 cgs[oldsb->fs_ncg - 1]->cg_old_ncyl = oldsb->fs_old_cpg;
937 cgflags[oldsb->fs_ncg - 1] |= CGF_DIRTY;
939 /* If the old fs ended partway through a cg, we have to update the old
940 * last cg (though possibly not to a full cg!). */
941 if (oldsb->fs_size % oldsb->fs_fpg) {
942 struct cg *cg;
943 int newcgsize;
944 int prevcgtop;
945 int oldcgsize;
946 cg = cgs[oldsb->fs_ncg - 1];
947 cgflags[oldsb->fs_ncg - 1] |= CGF_DIRTY | CGF_BLKMAPS;
948 prevcgtop = oldsb->fs_fpg * (oldsb->fs_ncg - 1);
949 newcgsize = newsb->fs_size - prevcgtop;
950 if (newcgsize > newsb->fs_fpg)
951 newcgsize = newsb->fs_fpg;
952 oldcgsize = oldsb->fs_size % oldsb->fs_fpg;
953 set_bits(cg_blksfree(cg, 0), oldcgsize, newcgsize - oldcgsize);
954 cg->cg_old_ncyl = howmany(newcgsize * NSPF(newsb), newsb->fs_old_spc);
955 cg->cg_ndblk = newcgsize;
957 /* Fix up the csum info, if necessary. */
958 csum_fixup();
959 /* Make fs_dsize match the new reality. */
960 recompute_fs_dsize();
963 * Call (*fn)() for each inode, passing the inode and its inumber. The
964 * number of cylinder groups is pased in, so this can be used to map
965 * over either the old or the new filesystem's set of inodes.
967 static void
968 map_inodes(void (*fn) (struct ufs1_dinode * di, unsigned int, void *arg), int ncg, void *cbarg) {
969 int i;
970 int ni;
972 ni = oldsb->fs_ipg * ncg;
973 for (i = 0; i < ni; i++)
974 (*fn) (inodes + i, i, cbarg);
976 /* Values for the third argument to the map function for
977 * map_inode_data_blocks. MDB_DATA indicates the block is contains
978 * file data; MDB_INDIR_PRE and MDB_INDIR_POST indicate that it's an
979 * indirect block. The MDB_INDIR_PRE call is made before the indirect
980 * block pointers are followed and the pointed-to blocks scanned,
981 * MDB_INDIR_POST after.
983 #define MDB_DATA 1
984 #define MDB_INDIR_PRE 2
985 #define MDB_INDIR_POST 3
987 typedef void (*mark_callback_t) (unsigned int blocknum, unsigned int nfrags, unsigned int blksize, int opcode);
989 /* Helper function - handles a data block. Calls the callback
990 * function and returns number of bytes occupied in file (actually,
991 * rounded up to a frag boundary). The name is historical. */
992 static int
993 markblk(mark_callback_t fn, struct ufs1_dinode * di, int bn, off_t o)
995 int sz;
996 int nb;
997 if (o >= di->di_size)
998 return (0);
999 sz = dblksize(newsb, di, lblkno(newsb, o));
1000 nb = (sz > di->di_size - o) ? di->di_size - o : sz;
1001 if (bn)
1002 (*fn) (bn, numfrags(newsb, sz), nb, MDB_DATA);
1003 return (sz);
1005 /* Helper function - handles an indirect block. Makes the
1006 * MDB_INDIR_PRE callback for the indirect block, loops over the
1007 * pointers and recurses, and makes the MDB_INDIR_POST callback.
1008 * Returns the number of bytes occupied in file, as does markblk().
1009 * For the sake of update_for_data_move(), we read the indirect block
1010 * _after_ making the _PRE callback. The name is historical. */
1011 static int
1012 markiblk(mark_callback_t fn, struct ufs1_dinode * di, int bn, off_t o, int lev)
1014 int i;
1015 int j;
1016 int tot;
1017 static int32_t indirblk1[howmany(MAXBSIZE, sizeof(int32_t))];
1018 static int32_t indirblk2[howmany(MAXBSIZE, sizeof(int32_t))];
1019 static int32_t indirblk3[howmany(MAXBSIZE, sizeof(int32_t))];
1020 static int32_t *indirblks[3] = {
1021 &indirblk1[0], &indirblk2[0], &indirblk3[0]
1023 if (lev < 0)
1024 return (markblk(fn, di, bn, o));
1025 if (bn == 0) {
1026 for (i = newsb->fs_bsize;
1027 lev >= 0;
1028 i *= NINDIR(newsb), lev--);
1029 return (i);
1031 (*fn) (bn, newsb->fs_frag, newsb->fs_bsize, MDB_INDIR_PRE);
1032 readat(fsbtodb(newsb, bn), indirblks[lev], newsb->fs_bsize);
1033 tot = 0;
1034 for (i = 0; i < NINDIR(newsb); i++) {
1035 j = markiblk(fn, di, indirblks[lev][i], o, lev - 1);
1036 if (j == 0)
1037 break;
1038 o += j;
1039 tot += j;
1041 (*fn) (bn, newsb->fs_frag, newsb->fs_bsize, MDB_INDIR_POST);
1042 return (tot);
1047 * Call (*fn)() for each data block for an inode. This routine assumes
1048 * the inode is known to be of a type that has data blocks (file,
1049 * directory, or non-fast symlink). The called function is:
1051 * (*fn)(unsigned int blkno, unsigned int nf, unsigned int nb, int op)
1053 * where blkno is the frag number, nf is the number of frags starting
1054 * at blkno (always <= fs_frag), nb is the number of bytes that belong
1055 * to the file (usually nf*fs_frag, often less for the last block/frag
1056 * of a file).
1058 static void
1059 map_inode_data_blocks(struct ufs1_dinode * di, mark_callback_t fn)
1061 off_t o; /* offset within inode */
1062 int inc; /* increment for o - maybe should be off_t? */
1063 int b; /* index within di_db[] and di_ib[] arrays */
1065 /* Scan the direct blocks... */
1066 o = 0;
1067 for (b = 0; b < NDADDR; b++) {
1068 inc = markblk(fn, di, di->di_db[b], o);
1069 if (inc == 0)
1070 break;
1071 o += inc;
1073 /* ...and the indirect blocks. */
1074 if (inc) {
1075 for (b = 0; b < NIADDR; b++) {
1076 inc = markiblk(fn, di, di->di_ib[b], o, b);
1077 if (inc == 0)
1078 return;
1079 o += inc;
1084 static void
1085 dblk_callback(struct ufs1_dinode * di, unsigned int inum, void *arg)
1087 mark_callback_t fn;
1088 fn = (mark_callback_t) arg;
1089 switch (di->di_mode & IFMT) {
1090 case IFLNK:
1091 if (di->di_size > newsb->fs_maxsymlinklen) {
1092 case IFDIR:
1093 case IFREG:
1094 map_inode_data_blocks(di, fn);
1096 break;
1100 * Make a callback call, a la map_inode_data_blocks, for all data
1101 * blocks in the entire fs. This is used only once, in
1102 * update_for_data_move, but it's out at top level because the complex
1103 * downward-funarg nesting that would otherwise result seems to give
1104 * gcc gastric distress.
1106 static void
1107 map_data_blocks(mark_callback_t fn, int ncg)
1109 map_inodes(&dblk_callback, ncg, (void *) fn);
1112 * Initialize the blkmove array.
1114 static void
1115 blkmove_init(void)
1117 int i;
1119 blkmove = alloconce(oldsb->fs_size * sizeof(*blkmove), "blkmove");
1120 for (i = 0; i < oldsb->fs_size; i++)
1121 blkmove[i] = i;
1124 * Load the inodes off disk. Allocates the structures and initializes
1125 * them - the inodes from disk, the flags to zero.
1127 static void
1128 loadinodes(void)
1130 int cg;
1131 struct ufs1_dinode *iptr;
1133 inodes = alloconce(oldsb->fs_ncg * oldsb->fs_ipg * sizeof(struct ufs1_dinode), "inodes");
1134 iflags = alloconce(oldsb->fs_ncg * oldsb->fs_ipg, "inode flags");
1135 bzero(iflags, oldsb->fs_ncg * oldsb->fs_ipg);
1136 iptr = inodes;
1137 for (cg = 0; cg < oldsb->fs_ncg; cg++) {
1138 readat(fsbtodb(oldsb, cgimin(oldsb, cg)), iptr,
1139 oldsb->fs_ipg * sizeof(struct ufs1_dinode));
1140 iptr += oldsb->fs_ipg;
1144 * Report a filesystem-too-full problem.
1146 static void
1147 toofull(void)
1149 printf("Sorry, would run out of data blocks\n");
1150 exit(1);
1153 * Record a desire to move "n" frags from "from" to "to".
1155 static void
1156 mark_move(unsigned int from, unsigned int to, unsigned int n)
1158 for (; n > 0; n--)
1159 blkmove[from++] = to++;
1161 /* Helper function - evict n frags, starting with start (cg-relative).
1162 * The free bitmap is scanned, unallocated frags are ignored, and
1163 * each block of consecutive allocated frags is moved as a unit.
1165 static void
1166 fragmove(struct cg * cg, int base, unsigned int start, unsigned int n)
1168 int i;
1169 int run;
1170 run = 0;
1171 for (i = 0; i <= n; i++) {
1172 if ((i < n) && bit_is_clr(cg_blksfree(cg, 0), start + i)) {
1173 run++;
1174 } else {
1175 if (run > 0) {
1176 int off;
1177 off = find_freespace(run);
1178 if (off < 0)
1179 toofull();
1180 mark_move(base + start + i - run, off, run);
1181 set_bits(cg_blksfree(cg, 0), start + i - run,
1182 run);
1183 clr_bits(cg_blksfree(cgs[dtog(oldsb, off)], 0),
1184 dtogd(oldsb, off), run);
1186 run = 0;
1191 * Evict all data blocks from the given cg, starting at minfrag (based
1192 * at the beginning of the cg), for length nfrag. The eviction is
1193 * assumed to be entirely data-area; this should not be called with a
1194 * range overlapping the metadata structures in the cg. It also
1195 * assumes minfrag points into the given cg; it will misbehave if this
1196 * is not true.
1198 * See the comment header on find_freespace() for one possible bug
1199 * lurking here.
1201 static void
1202 evict_data(struct cg * cg, unsigned int minfrag, unsigned int nfrag)
1204 int base; /* base of cg (in frags from beginning of fs) */
1207 base = cgbase(oldsb, cg->cg_cgx);
1208 /* Does the boundary fall in the middle of a block? To avoid breaking
1209 * between frags allocated as consecutive, we always evict the whole
1210 * block in this case, though one could argue we should check to see
1211 * if the frag before or after the break is unallocated. */
1212 if (minfrag % oldsb->fs_frag) {
1213 int n;
1214 n = minfrag % oldsb->fs_frag;
1215 minfrag -= n;
1216 nfrag += n;
1218 /* Do whole blocks. If a block is wholly free, skip it; if wholly
1219 * allocated, move it in toto. If neither, call fragmove() to move
1220 * the frags to new locations. */
1221 while (nfrag >= oldsb->fs_frag) {
1222 if (!blk_is_set(cg_blksfree(cg, 0), minfrag, oldsb->fs_frag)) {
1223 if (blk_is_clr(cg_blksfree(cg, 0), minfrag,
1224 oldsb->fs_frag)) {
1225 int off;
1226 off = find_freeblock();
1227 if (off < 0)
1228 toofull();
1229 mark_move(base + minfrag, off, oldsb->fs_frag);
1230 set_bits(cg_blksfree(cg, 0), minfrag,
1231 oldsb->fs_frag);
1232 clr_bits(cg_blksfree(cgs[dtog(oldsb, off)], 0),
1233 dtogd(oldsb, off), oldsb->fs_frag);
1234 } else {
1235 fragmove(cg, base, minfrag, oldsb->fs_frag);
1238 minfrag += oldsb->fs_frag;
1239 nfrag -= oldsb->fs_frag;
1241 /* Clean up any sub-block amount left over. */
1242 if (nfrag) {
1243 fragmove(cg, base, minfrag, nfrag);
1247 * Move all data blocks according to blkmove. We have to be careful,
1248 * because we may be updating indirect blocks that will themselves be
1249 * getting moved, or inode int32_t arrays that point to indirect
1250 * blocks that will be moved. We call this before
1251 * update_for_data_move, and update_for_data_move does inodes first,
1252 * then indirect blocks in preorder, so as to make sure that the
1253 * filesystem is self-consistent at all points, for better crash
1254 * tolerance. (We can get away with this only because all the writes
1255 * done by perform_data_move() are writing into space that's not used
1256 * by the old filesystem.) If we crash, some things may point to the
1257 * old data and some to the new, but both copies are the same. The
1258 * only wrong things should be csum info and free bitmaps, which fsck
1259 * is entirely capable of cleaning up.
1261 * Since blkmove_init() initializes all blocks to move to their current
1262 * locations, we can have two blocks marked as wanting to move to the
1263 * same location, but only two and only when one of them is the one
1264 * that was already there. So if blkmove[i]==i, we ignore that entry
1265 * entirely - for unallocated blocks, we don't want it (and may be
1266 * putting something else there), and for allocated blocks, we don't
1267 * want to copy it anywhere.
1269 static void
1270 perform_data_move(void)
1272 int i;
1273 int run;
1274 int maxrun;
1275 char buf[65536];
1277 maxrun = sizeof(buf) / newsb->fs_fsize;
1278 run = 0;
1279 for (i = 0; i < oldsb->fs_size; i++) {
1280 if ((blkmove[i] == i) ||
1281 (run >= maxrun) ||
1282 ((run > 0) &&
1283 (blkmove[i] != blkmove[i - 1] + 1))) {
1284 if (run > 0) {
1285 readat(fsbtodb(oldsb, i - run), &buf[0],
1286 run << oldsb->fs_fshift);
1287 writeat(fsbtodb(oldsb, blkmove[i - run]),
1288 &buf[0], run << oldsb->fs_fshift);
1290 run = 0;
1292 if (blkmove[i] != i)
1293 run++;
1295 if (run > 0) {
1296 readat(fsbtodb(oldsb, i - run), &buf[0],
1297 run << oldsb->fs_fshift);
1298 writeat(fsbtodb(oldsb, blkmove[i - run]), &buf[0],
1299 run << oldsb->fs_fshift);
1303 * This modifies an array of int32_t, according to blkmove. This is
1304 * used to update inode block arrays and indirect blocks to point to
1305 * the new locations of data blocks.
1307 * Return value is the number of int32_ts that needed updating; in
1308 * particular, the return value is zero iff nothing was modified.
1310 static int
1311 movemap_blocks(int32_t * vec, int n)
1313 int rv;
1315 rv = 0;
1316 for (; n > 0; n--, vec++) {
1317 if (blkmove[*vec] != *vec) {
1318 *vec = blkmove[*vec];
1319 rv++;
1322 return (rv);
1324 static void
1325 moveblocks_callback(struct ufs1_dinode * di, unsigned int inum, void *arg)
1327 switch (di->di_mode & IFMT) {
1328 case IFLNK:
1329 if (di->di_size > oldsb->fs_maxsymlinklen) {
1330 case IFDIR:
1331 case IFREG:
1332 /* don't || these two calls; we need their
1333 * side-effects */
1334 if (movemap_blocks(&di->di_db[0], NDADDR)) {
1335 iflags[inum] |= IF_DIRTY;
1337 if (movemap_blocks(&di->di_ib[0], NIADDR)) {
1338 iflags[inum] |= IF_DIRTY;
1341 break;
1345 static void
1346 moveindir_callback(unsigned int off, unsigned int nfrag, unsigned int nbytes, int kind)
1348 if (kind == MDB_INDIR_PRE) {
1349 int32_t blk[howmany(MAXBSIZE, sizeof(int32_t))];
1350 readat(fsbtodb(oldsb, off), &blk[0], oldsb->fs_bsize);
1351 if (movemap_blocks(&blk[0], NINDIR(oldsb))) {
1352 writeat(fsbtodb(oldsb, off), &blk[0], oldsb->fs_bsize);
1357 * Update all inode data arrays and indirect blocks to point to the new
1358 * locations of data blocks. See the comment header on
1359 * perform_data_move for some ordering considerations.
1361 static void
1362 update_for_data_move(void)
1364 map_inodes(&moveblocks_callback, oldsb->fs_ncg, NULL);
1365 map_data_blocks(&moveindir_callback, oldsb->fs_ncg);
1368 * Initialize the inomove array.
1370 static void
1371 inomove_init(void)
1373 int i;
1375 inomove = alloconce(oldsb->fs_ipg * oldsb->fs_ncg * sizeof(*inomove),
1376 "inomove");
1377 for (i = (oldsb->fs_ipg * oldsb->fs_ncg) - 1; i >= 0; i--)
1378 inomove[i] = i;
1381 * Flush all dirtied inodes to disk. Scans the inode flags array; for
1382 * each dirty inode, it sets the BDIRTY bit on the first inode in the
1383 * block containing the dirty inode. Then it scans by blocks, and for
1384 * each marked block, writes it.
1386 static void
1387 flush_inodes(void)
1389 int i;
1390 int ni;
1391 int m;
1393 ni = newsb->fs_ipg * newsb->fs_ncg;
1394 m = INOPB(newsb) - 1;
1395 for (i = 0; i < ni; i++) {
1396 if (iflags[i] & IF_DIRTY) {
1397 iflags[i & ~m] |= IF_BDIRTY;
1400 m++;
1401 for (i = 0; i < ni; i += m) {
1402 if (iflags[i] & IF_BDIRTY) {
1403 writeat(fsbtodb(newsb, ino_to_fsba(newsb, i)),
1404 inodes + i, newsb->fs_bsize);
1409 * Evict all inodes from the specified cg. shrink() already checked
1410 * that there were enough free inodes, so the no-free-inodes check is
1411 * a can't-happen. If it does trip, the filesystem should be in good
1412 * enough shape for fsck to fix; see the comment on perform_data_move
1413 * for the considerations in question.
1415 static void
1416 evict_inodes(struct cg * cg)
1418 int inum;
1419 int i;
1420 int fi;
1422 inum = newsb->fs_ipg * cg->cg_cgx;
1423 for (i = 0; i < newsb->fs_ipg; i++, inum++) {
1424 if (inodes[inum].di_mode != 0) {
1425 fi = find_freeinode();
1426 if (fi < 0) {
1427 printf("Sorry, inodes evaporated - "
1428 "filesystem probably needs fsck\n");
1429 exit(1);
1431 inomove[inum] = fi;
1432 clr_bits(cg_inosused(cg, 0), i, 1);
1433 set_bits(cg_inosused(cgs[ino_to_cg(newsb, fi)], 0),
1434 fi % newsb->fs_ipg, 1);
1439 * Move inodes from old locations to new. Does not actually write
1440 * anything to disk; just copies in-core and sets dirty bits.
1442 * We have to be careful here for reasons similar to those mentioned in
1443 * the comment header on perform_data_move, above: for the sake of
1444 * crash tolerance, we want to make sure everything is present at both
1445 * old and new locations before we update pointers. So we call this
1446 * first, then flush_inodes() to get them out on disk, then update
1447 * directories to match.
1449 static void
1450 perform_inode_move(void)
1452 int i;
1453 int ni;
1455 ni = oldsb->fs_ipg * oldsb->fs_ncg;
1456 for (i = 0; i < ni; i++) {
1457 if (inomove[i] != i) {
1458 inodes[inomove[i]] = inodes[i];
1459 iflags[inomove[i]] = iflags[i] | IF_DIRTY;
1464 * Update the directory contained in the nb bytes at buf, to point to
1465 * inodes' new locations.
1467 static int
1468 update_dirents(char *buf, int nb)
1470 int rv;
1471 #define d ((struct direct *)buf)
1473 rv = 0;
1474 while (nb > 0) {
1475 if (inomove[d->d_ino] != d->d_ino) {
1476 rv++;
1477 d->d_ino = inomove[d->d_ino];
1479 nb -= d->d_reclen;
1480 buf += d->d_reclen;
1482 return (rv);
1483 #undef d
1486 * Callback function for map_inode_data_blocks, for updating a
1487 * directory to point to new inode locations.
1489 static void
1490 update_dir_data(unsigned int bn, unsigned int size, unsigned int nb, int kind)
1492 if (kind == MDB_DATA) {
1493 union {
1494 struct direct d;
1495 char ch[MAXBSIZE];
1496 } buf;
1497 readat(fsbtodb(oldsb, bn), &buf, size << oldsb->fs_fshift);
1498 if (update_dirents((char *) &buf, nb)) {
1499 writeat(fsbtodb(oldsb, bn), &buf,
1500 size << oldsb->fs_fshift);
1504 static void
1505 dirmove_callback(struct ufs1_dinode * di, unsigned int inum, void *arg)
1507 switch (di->di_mode & IFMT) {
1508 case IFDIR:
1509 map_inode_data_blocks(di, &update_dir_data);
1510 break;
1514 * Update directory entries to point to new inode locations.
1516 static void
1517 update_for_inode_move(void)
1519 map_inodes(&dirmove_callback, newsb->fs_ncg, NULL);
1522 * Shrink the filesystem.
1524 static void
1525 shrink(void)
1527 int i;
1529 /* Load the inodes off disk - we'll need 'em. */
1530 loadinodes();
1531 /* Update the timestamp. */
1532 newsb->fs_time = timestamp();
1533 /* Update the size figures. */
1534 newsb->fs_size = dbtofsb(newsb, newsize);
1535 newsb->fs_old_ncyl = (newsb->fs_size * NSPF(newsb)) / newsb->fs_old_spc;
1536 newsb->fs_ncg = howmany(newsb->fs_old_ncyl, newsb->fs_old_cpg);
1537 /* Does the (new) last cg end before the end of its inode area? See
1538 * the similar code in grow() for more on this. */
1539 if (cgdmin(newsb, newsb->fs_ncg - 1) > newsb->fs_size) {
1540 newsb->fs_ncg--;
1541 newsb->fs_old_ncyl = newsb->fs_ncg * newsb->fs_old_cpg;
1542 newsb->fs_size = (newsb->fs_old_ncyl * newsb->fs_old_spc) / NSPF(newsb);
1543 printf("Warning: last cylinder group is too small;\n");
1544 printf(" dropping it. New size = %lu.\n",
1545 (unsigned long int) fsbtodb(newsb, newsb->fs_size));
1547 /* Let's make sure we're not being shrunk into oblivion. */
1548 if (newsb->fs_ncg < 1) {
1549 printf("Size too small - filesystem would have no cylinders\n");
1550 exit(1);
1552 /* Initialize for block motion. */
1553 blkmove_init();
1554 /* Update csum size, then fix up for the new size */
1555 newsb->fs_cssize = fragroundup(newsb,
1556 newsb->fs_ncg * sizeof(struct csum));
1557 csum_fixup();
1558 /* Evict data from any cgs being wholly eliminated */
1559 for (i = newsb->fs_ncg; i < oldsb->fs_ncg; i++) {
1560 int base;
1561 int dlow;
1562 int dhigh;
1563 int dmax;
1564 base = cgbase(oldsb, i);
1565 dlow = cgsblock(oldsb, i) - base;
1566 dhigh = cgdmin(oldsb, i) - base;
1567 dmax = oldsb->fs_size - base;
1568 if (dmax > cgs[i]->cg_ndblk)
1569 dmax = cgs[i]->cg_ndblk;
1570 evict_data(cgs[i], 0, dlow);
1571 evict_data(cgs[i], dhigh, dmax - dhigh);
1572 newsb->fs_cstotal.cs_ndir -= cgs[i]->cg_cs.cs_ndir;
1573 newsb->fs_cstotal.cs_nifree -= cgs[i]->cg_cs.cs_nifree;
1574 newsb->fs_cstotal.cs_nffree -= cgs[i]->cg_cs.cs_nffree;
1575 newsb->fs_cstotal.cs_nbfree -= cgs[i]->cg_cs.cs_nbfree;
1577 /* Update the new last cg. */
1578 cgs[newsb->fs_ncg - 1]->cg_ndblk = newsb->fs_size -
1579 ((newsb->fs_ncg - 1) * newsb->fs_fpg);
1580 /* Is the new last cg partial? If so, evict any data from the part
1581 * being shrunken away. */
1582 if (newsb->fs_size % newsb->fs_fpg) {
1583 struct cg *cg;
1584 int oldcgsize;
1585 int newcgsize;
1586 cg = cgs[newsb->fs_ncg - 1];
1587 newcgsize = newsb->fs_size % newsb->fs_fpg;
1588 oldcgsize = oldsb->fs_size - ((newsb->fs_ncg - 1) & oldsb->fs_fpg);
1589 if (oldcgsize > oldsb->fs_fpg)
1590 oldcgsize = oldsb->fs_fpg;
1591 evict_data(cg, newcgsize, oldcgsize - newcgsize);
1592 clr_bits(cg_blksfree(cg, 0), newcgsize, oldcgsize - newcgsize);
1594 /* Find out whether we would run out of inodes. (Note we haven't
1595 * actually done anything to the filesystem yet; all those evict_data
1596 * calls just update blkmove.) */
1598 int slop;
1599 slop = 0;
1600 for (i = 0; i < newsb->fs_ncg; i++)
1601 slop += cgs[i]->cg_cs.cs_nifree;
1602 for (; i < oldsb->fs_ncg; i++)
1603 slop -= oldsb->fs_ipg - cgs[i]->cg_cs.cs_nifree;
1604 if (slop < 0) {
1605 printf("Sorry, would run out of inodes\n");
1606 exit(1);
1609 /* Copy data, then update pointers to data. See the comment header on
1610 * perform_data_move for ordering considerations. */
1611 perform_data_move();
1612 update_for_data_move();
1613 /* Now do inodes. Initialize, evict, move, update - see the comment
1614 * header on perform_inode_move. */
1615 inomove_init();
1616 for (i = newsb->fs_ncg; i < oldsb->fs_ncg; i++)
1617 evict_inodes(cgs[i]);
1618 perform_inode_move();
1619 flush_inodes();
1620 update_for_inode_move();
1621 /* Recompute all the bitmaps; most of them probably need it anyway,
1622 * the rest are just paranoia and not wanting to have to bother
1623 * keeping track of exactly which ones require it. */
1624 for (i = 0; i < newsb->fs_ncg; i++)
1625 cgflags[i] |= CGF_DIRTY | CGF_BLKMAPS | CGF_INOMAPS;
1626 /* Update the cg_old_ncyl value for the last cylinder. The condition is
1627 * commented out because fsck whines if not - see the similar
1628 * condition in grow() for more. */
1629 /* XXX fix once fsck is fixed */
1630 /* if (newsb->fs_old_ncyl % newsb->fs_old_cpg) XXX */
1631 /*XXXJTK*/
1632 cgs[newsb->fs_ncg - 1]->cg_old_ncyl =
1633 newsb->fs_old_ncyl % newsb->fs_old_cpg;
1634 /* Make fs_dsize match the new reality. */
1635 recompute_fs_dsize();
1638 * Recompute the block totals, block cluster summaries, and rotational
1639 * position summaries, for a given cg (specified by number), based on
1640 * its free-frag bitmap (cg_blksfree()[]).
1642 static void
1643 rescan_blkmaps(int cgn)
1645 struct cg *cg;
1646 int f;
1647 int b;
1648 int blkfree;
1649 int blkrun;
1650 int fragrun;
1651 int fwb;
1653 cg = cgs[cgn];
1654 /* Subtract off the current totals from the sb's summary info */
1655 newsb->fs_cstotal.cs_nffree -= cg->cg_cs.cs_nffree;
1656 newsb->fs_cstotal.cs_nbfree -= cg->cg_cs.cs_nbfree;
1657 /* Clear counters and bitmaps. */
1658 cg->cg_cs.cs_nffree = 0;
1659 cg->cg_cs.cs_nbfree = 0;
1660 bzero(&cg->cg_frsum[0], MAXFRAG * sizeof(cg->cg_frsum[0]));
1661 bzero(&cg_blktot(cg, 0)[0],
1662 newsb->fs_old_cpg * sizeof(cg_blktot(cg, 0)[0]));
1663 bzero(&cg_blks(newsb, cg, 0, 0)[0],
1664 newsb->fs_old_cpg * newsb->fs_old_nrpos *
1665 sizeof(cg_blks(newsb, cg, 0, 0)[0]));
1666 if (newsb->fs_contigsumsize > 0) {
1667 cg->cg_nclusterblks = cg->cg_ndblk / newsb->fs_frag;
1668 bzero(&cg_clustersum(cg, 0)[1],
1669 newsb->fs_contigsumsize *
1670 sizeof(cg_clustersum(cg, 0)[1]));
1671 bzero(&cg_clustersfree(cg, 0)[0],
1672 howmany((newsb->fs_old_cpg * newsb->fs_old_spc) / NSPB(newsb),
1673 NBBY));
1675 /* Scan the free-frag bitmap. Runs of free frags are kept track of
1676 * with fragrun, and recorded into cg_frsum[] and cg_cs.cs_nffree; on
1677 * each block boundary, entire free blocks are recorded as well. */
1678 blkfree = 1;
1679 blkrun = 0;
1680 fragrun = 0;
1681 f = 0;
1682 b = 0;
1683 fwb = 0;
1684 while (f < cg->cg_ndblk) {
1685 if (bit_is_set(cg_blksfree(cg, 0), f)) {
1686 fragrun++;
1687 } else {
1688 blkfree = 0;
1689 if (fragrun > 0) {
1690 cg->cg_frsum[fragrun]++;
1691 cg->cg_cs.cs_nffree += fragrun;
1693 fragrun = 0;
1695 f++;
1696 fwb++;
1697 if (fwb >= newsb->fs_frag) {
1698 if (blkfree) {
1699 cg->cg_cs.cs_nbfree++;
1700 if (newsb->fs_contigsumsize > 0)
1701 set_bits(cg_clustersfree(cg, 0), b, 1);
1702 cg_blktot(cg, 0)[cbtocylno(newsb, f - newsb->fs_frag)]++;
1703 cg_blks(newsb, cg,
1704 cbtocylno(newsb, f - newsb->fs_frag),
1705 0)[cbtorpos(newsb, f - newsb->fs_frag)]++;
1706 blkrun++;
1707 } else {
1708 if (fragrun > 0) {
1709 cg->cg_frsum[fragrun]++;
1710 cg->cg_cs.cs_nffree += fragrun;
1712 if (newsb->fs_contigsumsize > 0) {
1713 if (blkrun > 0) {
1714 cg_clustersum(cg, 0)[(blkrun > newsb->fs_contigsumsize) ? newsb->fs_contigsumsize : blkrun]++;
1717 blkrun = 0;
1719 fwb = 0;
1720 b++;
1721 blkfree = 1;
1722 fragrun = 0;
1725 if (fragrun > 0) {
1726 cg->cg_frsum[fragrun]++;
1727 cg->cg_cs.cs_nffree += fragrun;
1729 if ((blkrun > 0) && (newsb->fs_contigsumsize > 0)) {
1730 cg_clustersum(cg, 0)[(blkrun > newsb->fs_contigsumsize) ?
1731 newsb->fs_contigsumsize : blkrun]++;
1734 * Put the updated summary info back into csums, and add it
1735 * back into the sb's summary info. Then mark the cg dirty.
1737 csums[cgn] = cg->cg_cs;
1738 newsb->fs_cstotal.cs_nffree += cg->cg_cs.cs_nffree;
1739 newsb->fs_cstotal.cs_nbfree += cg->cg_cs.cs_nbfree;
1740 cgflags[cgn] |= CGF_DIRTY;
1743 * Recompute the cg_inosused()[] bitmap, and the cs_nifree and cs_ndir
1744 * values, for a cg, based on the in-core inodes for that cg.
1746 static void
1747 rescan_inomaps(int cgn)
1749 struct cg *cg;
1750 int inum;
1751 int iwc;
1753 cg = cgs[cgn];
1754 newsb->fs_cstotal.cs_ndir -= cg->cg_cs.cs_ndir;
1755 newsb->fs_cstotal.cs_nifree -= cg->cg_cs.cs_nifree;
1756 cg->cg_cs.cs_ndir = 0;
1757 cg->cg_cs.cs_nifree = 0;
1758 bzero(&cg_inosused(cg, 0)[0], howmany(newsb->fs_ipg, NBBY));
1759 inum = cgn * newsb->fs_ipg;
1760 if (cgn == 0) {
1761 set_bits(cg_inosused(cg, 0), 0, 2);
1762 iwc = 2;
1763 inum += 2;
1764 } else {
1765 iwc = 0;
1767 for (; iwc < newsb->fs_ipg; iwc++, inum++) {
1768 switch (inodes[inum].di_mode & IFMT) {
1769 case 0:
1770 cg->cg_cs.cs_nifree++;
1771 break;
1772 case IFDIR:
1773 cg->cg_cs.cs_ndir++;
1774 /* fall through */
1775 default:
1776 set_bits(cg_inosused(cg, 0), iwc, 1);
1777 break;
1780 csums[cgn] = cg->cg_cs;
1781 newsb->fs_cstotal.cs_ndir += cg->cg_cs.cs_ndir;
1782 newsb->fs_cstotal.cs_nifree += cg->cg_cs.cs_nifree;
1783 cgflags[cgn] |= CGF_DIRTY;
1786 * Flush cgs to disk, recomputing anything they're marked as needing.
1788 static void
1789 flush_cgs(void)
1791 int i;
1793 for (i = 0; i < newsb->fs_ncg; i++) {
1794 if (cgflags[i] & CGF_BLKMAPS) {
1795 rescan_blkmaps(i);
1797 if (cgflags[i] & CGF_INOMAPS) {
1798 rescan_inomaps(i);
1800 if (cgflags[i] & CGF_DIRTY) {
1801 cgs[i]->cg_rotor = 0;
1802 cgs[i]->cg_frotor = 0;
1803 cgs[i]->cg_irotor = 0;
1804 writeat(fsbtodb(newsb, cgtod(newsb, i)), cgs[i],
1805 cgblksz);
1808 writeat(fsbtodb(newsb, newsb->fs_csaddr), csums, newsb->fs_cssize);
1811 * Write the superblock, both to the main superblock and to each cg's
1812 * alternative superblock.
1814 static void
1815 write_sbs(void)
1817 int i;
1819 writeat(where / DEV_BSIZE, newsb, SBLOCKSIZE);
1820 for (i = 0; i < newsb->fs_ncg; i++) {
1821 writeat(fsbtodb(newsb, cgsblock(newsb, i)), newsb, SBLOCKSIZE);
1825 * main().
1827 int main(int, char **);
1829 main(int ac, char **av)
1831 size_t i;
1832 if (ac != 3) {
1833 fprintf(stderr, "usage: %s filesystem new-size\n",
1834 getprogname());
1835 exit(1);
1837 fd = open(av[1], O_RDWR, 0);
1838 if (fd < 0)
1839 err(1, "Cannot open `%s'", av[1]);
1840 checksmallio();
1841 newsize = atoi(av[2]);
1842 oldsb = (struct fs *) & sbbuf;
1843 newsb = (struct fs *) (SBLOCKSIZE + (char *) &sbbuf);
1844 for (where = search[i = 0]; search[i] != -1; where = search[++i]) {
1845 readat(where / DEV_BSIZE, oldsb, SBLOCKSIZE);
1846 if (oldsb->fs_magic == FS_UFS1_MAGIC)
1847 break;
1848 if (where == SBLOCK_UFS2)
1849 continue;
1850 if (oldsb->fs_old_flags & FS_FLAGS_UPDATED)
1851 err(1, "Cannot resize ffsv2 format suberblock!");
1853 if (where == (off_t)-1)
1854 errx(1, "Bad magic number");
1855 oldsb->fs_qbmask = ~(int64_t) oldsb->fs_bmask;
1856 oldsb->fs_qfmask = ~(int64_t) oldsb->fs_fmask;
1857 if (oldsb->fs_ipg % INOPB(oldsb)) {
1858 printf("ipg[%d] %% INOPB[%d] != 0\n", (int) oldsb->fs_ipg,
1859 (int) INOPB(oldsb));
1860 exit(1);
1862 /* The superblock is bigger than struct fs (there are trailing tables,
1863 * of non-fixed size); make sure we copy the whole thing. SBLOCKSIZE may
1864 * be an over-estimate, but we do this just once, so being generous is
1865 * cheap. */
1866 bcopy(oldsb, newsb, SBLOCKSIZE);
1867 loadcgs();
1868 if (newsize > fsbtodb(oldsb, oldsb->fs_size)) {
1869 grow();
1870 } else if (newsize < fsbtodb(oldsb, oldsb->fs_size)) {
1871 shrink();
1873 flush_cgs();
1874 write_sbs();
1875 exit(0);