5887 want bootfs
[unleashed.git] / usr / src / cmd / swap / swap.c
blob91cecd5cb44c47e2a3a42b5b31bfd6cfba62a1bf
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
39 #pragma ident "%Z%%M% %I% %E% SMI"
42 * Swap administrative interface
43 * Used to add/delete/list swap devices.
46 #include <sys/types.h>
47 #include <sys/dumpadm.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <unistd.h>
52 #include <errno.h>
53 #include <sys/param.h>
54 #include <dirent.h>
55 #include <sys/swap.h>
56 #include <sys/sysmacros.h>
57 #include <sys/mkdev.h>
58 #include <sys/stat.h>
59 #include <sys/statvfs.h>
60 #include <sys/uadmin.h>
61 #include <vm/anon.h>
62 #include <fcntl.h>
63 #include <locale.h>
64 #include <libintl.h>
65 #include <libdiskmgt.h>
66 #include <sys/fs/zfs.h>
68 #define LFLAG 0x01 /* swap -l (list swap devices) */
69 #define DFLAG 0x02 /* swap -d (delete swap device) */
70 #define AFLAG 0x04 /* swap -a (add swap device) */
71 #define SFLAG 0x08 /* swap -s (swap info summary) */
72 #define P1FLAG 0x10 /* swap -1 (swapadd pass1; do not modify dump device) */
73 #define P2FLAG 0x20 /* swap -2 (swapadd pass2; do not modify dump device) */
74 #define HFLAG 0x40 /* swap -h (size in human readable format) */
75 #define KFLAG 0x80 /* swap -k (size in kilobytes) */
77 #define NUMBER_WIDTH 64
78 typedef char numbuf_t[NUMBER_WIDTH];
80 static char *prognamep;
82 static int add(char *, off_t, off_t, int);
83 static int delete(char *, off_t);
84 static void usage(void);
85 static int doswap(int flag);
86 static int valid(char *, off_t, off_t);
87 static int list(int flag);
88 static char *number_to_scaled_string(numbuf_t buf, unsigned long long number,
89 unsigned long long unit_from, unsigned long long scale);
92 int
93 main(int argc, char **argv)
95 int c, flag = 0;
96 int ret;
97 int error = 0;
98 off_t s_offset = 0;
99 off_t length = 0;
100 char *pathname;
101 char *msg;
103 (void) setlocale(LC_ALL, "");
105 #if !defined(TEXT_DOMAIN)
106 #define TEXT_DOMAIN "SYS_TEST"
107 #endif
108 (void) textdomain(TEXT_DOMAIN);
110 prognamep = argv[0];
111 if (argc < 2) {
112 usage();
113 exit(1);
116 while ((c = getopt(argc, argv, "khlsd:a:12")) != EOF) {
117 char *char_p;
118 switch (c) {
119 case 'l': /* list all the swap devices */
120 flag |= LFLAG;
121 break;
122 case 's':
123 flag |= SFLAG;
124 break;
125 case 'd':
127 * The argument for starting offset is optional.
128 * If no argument is specified, the entire swap file
129 * is added although this will fail if a non-zero
130 * starting offset was specified when added.
132 if ((argc - optind) > 1 || flag != 0) {
133 usage();
134 exit(1);
136 flag |= DFLAG;
137 pathname = optarg;
138 if (optind < argc) {
139 errno = 0;
140 s_offset = strtol(argv[optind++], &char_p, 10);
141 if (errno != 0 || *char_p != '\0') {
142 (void) fprintf(stderr,
143 gettext("error in [low block]\n"));
144 exit(1);
147 ret = delete(pathname, s_offset);
148 break;
150 case 'a':
152 * The arguments for starting offset and number of
153 * blocks are optional. If only the starting offset
154 * is specified, all the blocks to the end of the swap
155 * file will be added. If no starting offset is
156 * specified, the entire swap file is assumed.
158 if ((argc - optind) > 2 ||
159 (flag & ~(P1FLAG | P2FLAG)) != 0) {
160 usage();
161 exit(1);
163 if (*optarg != '/') {
164 (void) fprintf(stderr,
165 gettext("%s: path must be absolute\n"),
166 prognamep);
167 exit(1);
169 flag |= AFLAG;
170 pathname = optarg;
171 if (optind < argc) {
172 errno = 0;
173 s_offset = strtol(argv[optind++], &char_p, 10);
174 if (errno != 0 || *char_p != '\0') {
175 (void) fprintf(stderr,
176 gettext("error in [low block]\n"));
177 exit(1);
180 if (optind < argc) {
181 errno = 0;
182 length = strtol(argv[optind++], &char_p, 10);
183 if (errno != 0 || *char_p != '\0') {
184 (void) fprintf(stderr,
185 gettext("error in [nbr of blocks]\n"));
186 exit(1);
189 break;
190 case 'h':
191 flag |= HFLAG;
192 break;
194 case 'k':
195 flag |= KFLAG;
196 break;
198 case '1':
199 flag |= P1FLAG;
200 break;
202 case '2':
203 flag |= P2FLAG;
204 break;
206 case '?':
207 usage();
208 exit(1);
212 if (flag & SFLAG) {
213 if (flag & ~SFLAG & ~HFLAG) {
215 * The only option that can be used with -s is -h.
217 usage();
218 exit(1);
221 ret = doswap(flag);
225 if (flag & LFLAG) {
226 if (flag & ~KFLAG & ~HFLAG & ~LFLAG) {
227 usage();
228 exit(1);
230 ret = list(flag);
234 * do the add here. Check for in use prior to add.
235 * The values for length and offset are set above.
237 if (flag & AFLAG) {
239 * If device is in use for a swap device, print message
240 * and exit.
242 if (dm_inuse(pathname, &msg, DM_WHO_SWAP, &error) ||
243 error) {
244 if (error != 0) {
245 (void) fprintf(stderr, gettext("Error occurred"
246 " with device in use checking: %s\n"),
247 strerror(error));
248 } else {
249 (void) fprintf(stderr, "%s", msg);
250 free(msg);
251 exit(1);
254 if ((ret = valid(pathname,
255 s_offset * 512, length * 512)) == 0) {
256 ret = add(pathname, s_offset, length, flag);
259 if (!(flag & ~HFLAG & ~KFLAG)) {
260 /* only -h and/or -k flag, or no flag */
261 usage();
262 exit(1);
264 return (ret);
268 static void
269 usage(void)
271 (void) fprintf(stderr, gettext("Usage:\t%s -l\n"), prognamep);
272 (void) fprintf(stderr, gettext("\tsub option :\n"));
273 (void) fprintf(stderr, gettext("\t\t-h : displays size in human "
274 "readable format\n"));
275 (void) fprintf(stderr, gettext("\t\t-k : displays size in KB\n"));
276 (void) fprintf(stderr, "\t%s -s\n", prognamep);
277 (void) fprintf(stderr, gettext("\tsub option :\n"));
278 (void) fprintf(stderr, gettext("\t\t-h : displays size in human "
279 "readable format rather than KB\n"));
280 (void) fprintf(stderr, gettext("\t%s -d <file name> [low block]\n"),
281 prognamep);
282 (void) fprintf(stderr, gettext("\t%s -a <file name> [low block]"
283 " [nbr of blocks]\n"), prognamep);
287 * Implement:
288 * #define ctok(x) ((ctob(x))>>10)
289 * in a machine independent way. (Both assume a click > 1k)
291 static size_t
292 ctok(pgcnt_t clicks)
294 static int factor = -1;
296 if (factor == -1)
297 factor = (int)(sysconf(_SC_PAGESIZE) >> 10);
298 return ((size_t)(clicks * factor));
302 static int
303 doswap(int flag)
305 struct anoninfo ai;
306 pgcnt_t allocated, reserved, available;
307 numbuf_t numbuf;
308 unsigned long long scale = 1024L;
311 * max = total amount of swap space including physical memory
312 * ai.ani_max = MAX(anoninfo.ani_resv, anoninfo.ani_max) +
313 * availrmem - swapfs_minfree;
314 * ai.ani_free = amount of unallocated anonymous memory
315 * (ie. = resverved_unallocated + unreserved)
316 * ai.ani_free = anoninfo.ani_free + (availrmem - swapfs_minfree);
317 * ai.ani_resv = total amount of reserved anonymous memory
318 * ai.ani_resv = anoninfo.ani_resv;
320 * allocated = anon memory not free
321 * reserved = anon memory reserved but not allocated
322 * available = anon memory not reserved
324 if (swapctl(SC_AINFO, &ai) == -1) {
325 perror(prognamep);
326 return (2);
329 allocated = ai.ani_max - ai.ani_free;
330 reserved = ai.ani_resv - allocated;
331 available = ai.ani_max - ai.ani_resv;
334 * TRANSLATION_NOTE
335 * Translations (if any) of these keywords should match with
336 * translations (if any) of the swap.1M man page keywords for
337 * -s option: "allocated", "reserved", "used", "available"
340 if (flag & HFLAG) {
341 int factor = (int)(sysconf(_SC_PAGESIZE));
342 (void) printf(gettext("total: %s allocated + "),
343 number_to_scaled_string(numbuf, allocated,
344 factor, scale));
345 (void) printf(gettext("%s reserved = "),
346 number_to_scaled_string(numbuf, reserved,
347 factor, scale));
348 (void) printf(gettext("%s used, "),
349 number_to_scaled_string(numbuf,
350 allocated + reserved, factor, scale));
351 (void) printf(gettext("%s available\n"),
352 number_to_scaled_string(numbuf, available,
353 factor, scale));
354 } else {
355 (void) printf(gettext("total: %luk bytes allocated + %luk"
356 " reserved = %luk used, %luk available\n"),
357 ctok(allocated), ctok(reserved),
358 ctok(reserved) + ctok(allocated),
359 ctok(available));
362 return (0);
365 static int
366 list(int flag)
368 struct swaptable *st;
369 struct swapent *swapent;
370 int i;
371 struct stat64 statbuf;
372 char *path;
373 char fullpath[MAXPATHLEN+1];
374 int num;
375 numbuf_t numbuf;
376 unsigned long long scale = 1024L;
378 if ((num = swapctl(SC_GETNSWP, NULL)) == -1) {
379 perror(prognamep);
380 return (2);
382 if (num == 0) {
383 (void) fprintf(stderr, gettext("No swap devices configured\n"));
384 return (1);
387 if ((st = malloc(num * sizeof (swapent_t) + sizeof (int)))
388 == NULL) {
389 (void) fprintf(stderr,
390 gettext("Malloc failed. Please try later.\n"));
391 perror(prognamep);
392 return (2);
394 if ((path = malloc(num * MAXPATHLEN)) == NULL) {
395 (void) fprintf(stderr,
396 gettext("Malloc failed. Please try later.\n"));
397 perror(prognamep);
398 return (2);
400 swapent = st->swt_ent;
401 for (i = 0; i < num; i++, swapent++) {
402 swapent->ste_path = path;
403 path += MAXPATHLEN;
406 st->swt_n = num;
407 if ((num = swapctl(SC_LIST, st)) == -1) {
408 perror(prognamep);
409 return (2);
413 * TRANSLATION_NOTE
414 * Following translations for "swap -l" should account for for
415 * alignment of header and output.
416 * The first translation is for the header. If the alignment
417 * of the header changes, change the next 5 formats as needed
418 * to make alignment of output agree with alignment of the header.
419 * The next four translations are four cases for printing the
420 * 1st & 2nd fields.
421 * The next translation is for printing the 3rd, 4th & 5th fields.
423 * Translations (if any) of the following keywords should match the
424 * translations (if any) of the swap.1M man page keywords for
425 * -l option: "swapfile", "dev", "swaplo", "blocks", "free"
427 (void) printf(
428 gettext("swapfile dev swaplo blocks free\n"));
430 swapent = st->swt_ent;
431 for (i = 0; i < num; i++, swapent++) {
432 if (*swapent->ste_path != '/')
433 (void) snprintf(fullpath, sizeof (fullpath),
434 "/dev/%s", swapent->ste_path);
435 else
436 (void) snprintf(fullpath, sizeof (fullpath),
437 "%s", swapent->ste_path);
438 if (stat64(fullpath, &statbuf) < 0)
439 if (*swapent->ste_path != '/')
440 (void) printf(gettext("%-20s - "),
441 swapent->ste_path);
442 else
443 (void) printf(gettext("%-20s ?,? "),
444 fullpath);
445 else {
446 if (S_ISBLK(statbuf.st_mode) ||
447 S_ISCHR(statbuf.st_mode)) {
448 (void) printf(gettext("%-19s %2lu,%-2lu"),
449 fullpath,
450 major(statbuf.st_rdev),
451 minor(statbuf.st_rdev));
452 } else {
453 (void) printf(gettext("%-20s - "), fullpath);
457 int diskblks_per_page =
458 (int)(sysconf(_SC_PAGESIZE) >> DEV_BSHIFT);
459 if (flag & HFLAG) {
460 (void) printf(gettext(" %8s"),
461 number_to_scaled_string(numbuf,
462 swapent->ste_start, DEV_BSIZE,
463 scale));
464 (void) printf(gettext(" %8s"),
465 number_to_scaled_string(numbuf,
466 swapent->ste_pages *
467 diskblks_per_page,
468 DEV_BSIZE, scale));
469 (void) printf(gettext(" %8s"),
470 number_to_scaled_string(numbuf,
471 swapent->ste_free *
472 diskblks_per_page,
473 DEV_BSIZE, scale));
474 } else if (flag & KFLAG) {
475 (void) printf(gettext(" %7luK %7luK %7luK"),
476 swapent->ste_start * DEV_BSIZE / 1024,
477 swapent->ste_pages * diskblks_per_page *
478 DEV_BSIZE / 1024,
479 swapent->ste_free * diskblks_per_page *
480 DEV_BSIZE / 1024);
481 } else {
482 (void) printf(gettext(" %8lu %8lu %8lu"),
483 swapent->ste_start,
484 swapent->ste_pages * diskblks_per_page,
485 swapent->ste_free * diskblks_per_page);
488 if (swapent->ste_flags & ST_INDEL)
489 (void) printf(" INDEL\n");
490 else
491 (void) printf("\n");
493 return (0);
496 /* Copied from du.c */
497 static char *
498 number_to_scaled_string(
499 numbuf_t buf, /* put the result here */
500 unsigned long long number, /* convert this number */
501 unsigned long long unit_from, /* number of byes per input unit */
502 unsigned long long scale) /* 1024 (-h) or 1000 (-H) */
504 unsigned long long save = 0;
505 char *M = "KMGTPE"; /* Measurement: kilo, mega, giga, tera, peta, exa */
506 char *uom = M; /* unit of measurement, initially 'K' (=M[0]) */
508 if ((long long)number == (long long) -1) {
509 (void) strcpy(buf, "-1");
510 return (buf);
514 * Convert number from unit_from to given scale (1024 or 1000)
515 * This means multiply number with unit_from and divide by scale.
516 * if number is large enough, we first divide and then multiply
517 * to avoid an overflow (large enough here means 100 (rather arbitrary
518 * value) times scale in order to reduce rounding errors)
519 * otherwise, we first multiply and then divide to avoid an underflow.
521 if (number >= 100L * scale) {
522 number = number / scale;
523 number = number * unit_from;
524 } else {
525 number = number * unit_from;
526 number = number / scale;
530 * Now we have number as a count of scale units.
531 * Stop scaling when we reached exa bytes, then something is
532 * probably wrong with our number.
534 while ((number >= scale) && (*uom != 'E')) {
535 uom++; /* Next unit of measurement */
536 save = number;
537 number = (number + (scale / 2)) / scale;
540 /* Check if we should output a decimal place after the point */
541 if (save && ((save / scale) < 10)) {
542 /* sprintf() will round for us */
543 float fnum = (float)save / scale;
544 (void) sprintf(buf, "%.1f%c", fnum, *uom);
545 } else {
546 (void) sprintf(buf, "%llu%c", number, *uom);
548 return (buf);
554 static void
555 dumpadm_err(const char *warning)
557 (void) fprintf(stderr, "%s (%s):\n", warning, strerror(errno));
558 (void) fprintf(stderr, gettext(
559 "run dumpadm(1M) to verify dump configuration\n"));
562 static int
563 delete(char *path, off_t offset)
565 swapres_t swr;
566 int fd;
568 swr.sr_name = path;
569 swr.sr_start = offset;
571 if (swapctl(SC_REMOVE, &swr) < 0) {
572 switch (errno) {
573 case (ENOSYS):
574 (void) fprintf(stderr, gettext(
575 "%s: Invalid operation for this filesystem type\n"),
576 path);
577 break;
578 default:
579 perror(path);
580 break;
582 return (2);
586 * If our swap -d succeeded, open up /dev/dump and ask what the dump
587 * device is set to. If this returns ENODEV, we just deleted the
588 * dump device, so try to change the dump device to another swap
589 * device. We do this by firing up /usr/sbin/dumpadm -ud swap.
591 if ((fd = open("/dev/dump", O_RDONLY)) >= 0) {
592 char dumpdev[MAXPATHLEN];
594 if (ioctl(fd, DIOCGETDEV, dumpdev) == -1) {
595 if (errno == ENODEV) {
596 (void) printf(gettext("%s was dump device --\n"
597 "invoking dumpadm(1M) -d swap to "
598 "select new dump device\n"), path);
600 * Close /dev/dump prior to executing dumpadm
601 * since /dev/dump mandates exclusive open.
603 (void) close(fd);
605 if (system("/usr/sbin/dumpadm -ud swap") == -1)
606 dumpadm_err(gettext(
607 "Warning: failed to execute dumpadm -d swap"));
608 } else
609 dumpadm_err(gettext(
610 "Warning: failed to check dump device"));
612 (void) close(fd);
613 } else
614 dumpadm_err(gettext("Warning: failed to open /dev/dump"));
616 return (0);
620 * swapres_t structure units are in 512-blocks
622 static int
623 add(char *path, off_t offset, off_t cnt, int flags)
625 swapres_t swr;
627 int fd, have_dumpdev = 1;
628 struct statvfs fsb;
631 * Before adding swap, we first check to see if we have a dump
632 * device configured. If we don't (errno == ENODEV), and if
633 * our SC_ADD is successful, then run /usr/sbin/dumpadm -ud swap
634 * to attempt to reconfigure the dump device to the new swap.
636 if ((fd = open("/dev/dump", O_RDONLY)) >= 0) {
637 char dumpdev[MAXPATHLEN];
639 if (ioctl(fd, DIOCGETDEV, dumpdev) == -1) {
640 if (errno == ENODEV)
641 have_dumpdev = 0;
642 else
643 dumpadm_err(gettext(
644 "Warning: failed to check dump device"));
647 (void) close(fd);
650 * zvols cannot act as both a swap device and dump device.
652 if (strncmp(dumpdev, ZVOL_FULL_DEV_DIR,
653 strlen(ZVOL_FULL_DEV_DIR)) == 0) {
654 if (strcmp(dumpdev, path) == 0) {
655 (void) fprintf(stderr, gettext("%s: zvol "
656 "cannot be used as a swap device and a "
657 "dump device\n"), path);
658 return (2);
662 } else if (!(flags & P1FLAG))
663 dumpadm_err(gettext("Warning: failed to open /dev/dump"));
665 swr.sr_name = path;
666 swr.sr_start = offset;
667 swr.sr_length = cnt;
669 if (swapctl(SC_ADD, &swr) < 0) {
670 switch (errno) {
671 case (ENOSYS):
672 (void) fprintf(stderr, gettext(
673 "%s: Invalid operation for this filesystem type\n"),
674 path);
675 break;
676 case (EEXIST):
677 (void) fprintf(stderr, gettext(
678 "%s: Overlapping swap files are not allowed\n"),
679 path);
680 break;
681 default:
682 perror(path);
683 break;
685 return (2);
689 * If the swapctl worked and we don't have a dump device, and /etc
690 * is part of a writeable filesystem, then run dumpadm -ud swap.
691 * If /etc (presumably part of /) is still mounted read-only, then
692 * dumpadm will fail to write its config file, so there's no point
693 * running it now. This also avoids spurious messages during boot
694 * when the first swapadd takes place, at which point / is still ro.
695 * Similarly, if swapadd invoked us with -1 or -2 (but root is
696 * writeable), we don't want to modify the dump device because
697 * /etc/init.d/savecore has yet to execute; if we run dumpadm now
698 * we would lose the user's previous setting.
700 if (!have_dumpdev && !(flags & (P1FLAG | P2FLAG)) &&
701 statvfs("/etc", &fsb) == 0 && !(fsb.f_flag & ST_RDONLY)) {
703 (void) printf(
704 gettext("operating system crash dump was previously "
705 "disabled --\ninvoking dumpadm(1M) -d swap to select "
706 "new dump device\n"));
708 if (system("/usr/sbin/dumpadm -ud swap") == -1)
709 dumpadm_err(gettext(
710 "Warning: failed to execute dumpadm -d swap"));
713 return (0);
716 static int
717 valid(char *pathname, off_t offset, off_t length)
719 struct stat64 f;
720 struct statvfs64 fs;
721 off_t need;
723 if (stat64(pathname, &f) < 0 || statvfs64(pathname, &fs) < 0) {
724 (void) perror(pathname);
725 return (errno);
728 if (!((S_ISREG(f.st_mode) && (f.st_mode & S_ISVTX) == S_ISVTX) ||
729 S_ISBLK(f.st_mode))) {
730 (void) fprintf(stderr,
731 gettext("\"%s\" is not valid for swapping.\n"
732 "It must be a block device or a regular file with the\n"
733 "\"save user text on execution\" bit set.\n"),
734 pathname);
735 return (EINVAL);
738 if (S_ISREG(f.st_mode)) {
739 if (length == 0)
740 length = (off_t)f.st_size;
743 * "f.st_blocks < 8" because the first eight
744 * 512-byte sectors are always skipped
747 if (f.st_size < (length - offset) || f.st_size == 0 ||
748 f.st_size > MAXOFF_T || f.st_blocks < 8 || length < 0) {
749 (void) fprintf(stderr, gettext("%s: size is invalid\n"),
750 pathname);
751 return (EINVAL);
754 if (offset < 0) {
755 (void) fprintf(stderr,
756 gettext("%s: low block is invalid\n"),
757 pathname);
758 return (EINVAL);
761 need = roundup(length, fs.f_bsize) / DEV_BSIZE;
764 * "need > f.st_blocks" to account for indirect blocks
765 * Note:
766 * This can be fooled by a file large enough to
767 * contain indirect blocks that also contains holes.
768 * However, we don't know (and don't want to know)
769 * about the underlying storage implementation.
770 * But, if it doesn't have at least this many blocks,
771 * there must be a hole.
774 if (need > f.st_blocks) {
775 (void) fprintf(stderr, gettext(
776 "\"%s\" may contain holes - can't swap on it.\n"),
777 pathname);
778 return (EINVAL);
782 * else, we cannot get st_size for S_ISBLK device and
783 * no meaningful checking can be done.
786 return (0);