src/shred.c

   1 /* shred.c - overwrite files and devices to make it harder to recover data
   2
   3    Copyright (C) 1999-2013 Free Software Foundation, Inc.
   4    Copyright (C) 1997, 1998, 1999 Colin Plumb.
   5
   6    This program is free software: you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation, either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18
  19    Written by Colin Plumb.  */
  20
  21 /*
  22  * Do a more secure overwrite of given files or devices, to make it harder
  23  * for even very expensive hardware probing to recover the data.
  24  *
  25  * Although this process is also known as "wiping", I prefer the longer
  26  * name both because I think it is more evocative of what is happening and
  27  * because a longer name conveys a more appropriate sense of deliberateness.
  28  *
  29  * For the theory behind this, see "Secure Deletion of Data from Magnetic
  30  * and Solid-State Memory", on line at
  31  * http://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html
  32  *
  33  * Just for the record, reversing one or two passes of disk overwrite
  34  * is not terribly difficult with hardware help.  Hook up a good-quality
  35  * digitizing oscilloscope to the output of the head preamplifier and copy
  36  * the high-res digitized data to a computer for some off-line analysis.
  37  * Read the "current" data and average all the pulses together to get an
  38  * "average" pulse on the disk.  Subtract this average pulse from all of
  39  * the actual pulses and you can clearly see the "echo" of the previous
  40  * data on the disk.
  41  *
  42  * Real hard drives have to balance the cost of the media, the head,
  43  * and the read circuitry.  They use better-quality media than absolutely
  44  * necessary to limit the cost of the read circuitry.  By throwing that
  45  * assumption out, and the assumption that you want the data processed
  46  * as fast as the hard drive can spin, you can do better.
  47  *
  48  * If asked to wipe a file, this also unlinks it, renaming it to in a
  49  * clever way to try to leave no trace of the original filename.
  50  *
  51  * This was inspired by a desire to improve on some code titled:
  52  * Wipe V1.0-- Overwrite and delete files.  S. 2/3/96
  53  * but I've rewritten everything here so completely that no trace of
  54  * the original remains.
  55  *
  56  * Thanks to:
  57  * Bob Jenkins, for his good RNG work and patience with the FSF copyright
  58  * paperwork.
  59  * Jim Meyering, for his work merging this into the GNU fileutils while
  60  * still letting me feel a sense of ownership and pride.  Getting me to
  61  * tolerate the GNU brace style was quite a feat of diplomacy.
  62  * Paul Eggert, for lots of useful discussion and code.  I disagree with
  63  * an awful lot of his suggestions, but they're disagreements worth having.
  64  *
  65  * Things to think about:
  66  * - Security: Is there any risk to the race
  67  *   between overwriting and unlinking a file?  Will it do anything
  68  *   drastically bad if told to attack a named pipe or socket?
  69  */
  70
  71 /* The official name of this program (e.g., no 'g' prefix).  */
  72 #define PROGRAM_NAME "shred"
  73
  74 #define AUTHORS proper_name ("Colin Plumb")
  75
  76 #include <config.h>
  77
  78 #include <getopt.h>
  79 #include <stdio.h>
  80 #include <assert.h>
  81 #include <setjmp.h>
  82 #include <sys/types.h>
  83
  84 #include "system.h"
  85 #include "xstrtol.h"
  86 #include "error.h"
  87 #include "fcntl--.h"
  88 #include "human.h"
  89 #include "quotearg.h"           /* For quotearg_colon */
  90 #include "randint.h"
  91 #include "randread.h"
  92 #include "stat-size.h"
  93
  94 /* Default number of times to overwrite.  */
  95 enum { DEFAULT_PASSES = 3 };
  96
  97 /* How many seconds to wait before checking whether to output another
  98    verbose output line.  */
  99 enum { VERBOSE_UPDATE = 5 };
 100
 101 /* Sector size and corresponding mask, for recovering after write failures.
 102    The size must be a power of 2.  */
 103 enum { SECTOR_SIZE = 512 };
 104 enum { SECTOR_MASK = SECTOR_SIZE - 1 };
 105 verify (0 < SECTOR_SIZE && (SECTOR_SIZE & SECTOR_MASK) == 0);
 106
 107 struct Options
 108 {
 109   bool force;           /* -f flag: chmod files if necessary */
 110   size_t n_iterations;  /* -n flag: Number of iterations */
 111   off_t size;           /* -s flag: size of file */
 112   bool remove_file;     /* -u flag: remove file after shredding */
 113   bool verbose;         /* -v flag: Print progress */
 114   bool exact;           /* -x flag: Do not round up file size */
 115   bool zero_fill;       /* -z flag: Add a final zero pass */
 116 };
 117
 118 /* For long options that have no equivalent short option, use a
 119    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 120 enum
 121 {
 122   RANDOM_SOURCE_OPTION = CHAR_MAX + 1
 123 };
 124
 125 static struct option const long_opts[] =
 126 {
 127   {"exact", no_argument, NULL, 'x'},
 128   {"force", no_argument, NULL, 'f'},
 129   {"iterations", required_argument, NULL, 'n'},
 130   {"size", required_argument, NULL, 's'},
 131   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
 132   {"remove", no_argument, NULL, 'u'},
 133   {"verbose", no_argument, NULL, 'v'},
 134   {"zero", no_argument, NULL, 'z'},
 135   {GETOPT_HELP_OPTION_DECL},
 136   {GETOPT_VERSION_OPTION_DECL},
 137   {NULL, 0, NULL, 0}
 138 };
 139
 140 void
 141 usage (int status)
 142 {
 143   if (status != EXIT_SUCCESS)
 144     emit_try_help ();
 145   else
 146     {
 147       printf (_("Usage: %s [OPTION]... FILE...\n"), program_name);
 148       fputs (_("\
 149 Overwrite the specified FILE(s) repeatedly, in order to make it harder\n\
 150 for even very expensive hardware probing to recover the data.\n\
 151 "), stdout);
 152
 153       emit_mandatory_arg_note ();
 154
 155       printf (_("\
 156   -f, --force    change permissions to allow writing if necessary\n\
 157   -n, --iterations=N  overwrite N times instead of the default (%d)\n\
 158       --random-source=FILE  get random bytes from FILE\n\
 159   -s, --size=N   shred this many bytes (suffixes like K, M, G accepted)\n\
 160 "), DEFAULT_PASSES);
 161       fputs (_("\
 162   -u, --remove   truncate and remove file after overwriting\n\
 163   -v, --verbose  show progress\n\
 164   -x, --exact    do not round file sizes up to the next full block;\n\
 165                    this is the default for non-regular files\n\
 166   -z, --zero     add a final overwrite with zeros to hide shredding\n\
 167 "), stdout);
 168       fputs (HELP_OPTION_DESCRIPTION, stdout);
 169       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 170       fputs (_("\
 171 \n\
 172 If FILE is -, shred standard output.\n\
 173 \n\
 174 Delete FILE(s) if --remove (-u) is specified.  The default is not to remove\n\
 175 the files because it is common to operate on device files like /dev/hda,\n\
 176 and those files usually should not be removed.  When operating on regular\n\
 177 files, most people use the --remove option.\n\
 178 \n\
 179 "), stdout);
 180       fputs (_("\
 181 CAUTION: Note that shred relies on a very important assumption:\n\
 182 that the file system overwrites data in place.  This is the traditional\n\
 183 way to do things, but many modern file system designs do not satisfy this\n\
 184 assumption.  The following are examples of file systems on which shred is\n\
 185 not effective, or is not guaranteed to be effective in all file system modes:\n\
 186 \n\
 187 "), stdout);
 188       fputs (_("\
 189 * log-structured or journaled file systems, such as those supplied with\n\
 190 AIX and Solaris (and JFS, ReiserFS, XFS, Ext3, etc.)\n\
 191 \n\
 192 * file systems that write redundant data and carry on even if some writes\n\
 193 fail, such as RAID-based file systems\n\
 194 \n\
 195 * file systems that make snapshots, such as Network Appliance's NFS server\n\
 196 \n\
 197 "), stdout);
 198       fputs (_("\
 199 * file systems that cache in temporary locations, such as NFS\n\
 200 version 3 clients\n\
 201 \n\
 202 * compressed file systems\n\
 203 \n\
 204 "), stdout);
 205       fputs (_("\
 206 In the case of ext3 file systems, the above disclaimer applies\n\
 207 (and shred is thus of limited effectiveness) only in data=journal mode,\n\
 208 which journals file data in addition to just metadata.  In both the\n\
 209 data=ordered (default) and data=writeback modes, shred works as usual.\n\
 210 Ext3 journaling modes can be changed by adding the data=something option\n\
 211 to the mount options for a particular file system in the /etc/fstab file,\n\
 212 as documented in the mount man page (man mount).\n\
 213 \n\
 214 "), stdout);
 215       fputs (_("\
 216 In addition, file system backups and remote mirrors may contain copies\n\
 217 of the file that cannot be removed, and that will allow a shredded file\n\
 218 to be recovered later.\n\
 219 "), stdout);
 220       emit_ancillary_info ();
 221     }
 222   exit (status);
 223 }
 224
 225 /*
 226  * Determine if pattern type is periodic or not.
 227  */
 228 static bool
 229 periodic_pattern (int type)
 230 {
 231   if (type <= 0)
 232     return false;
 233
 234   unsigned char r[3];
 235   unsigned int bits = type & 0xfff;
 236
 237   bits |= bits << 12;
 238   r[0] = (bits >> 4) & 255;
 239   r[1] = (bits >> 8) & 255;
 240   r[2] = bits & 255;
 241
 242   return (r[0] != r[1]) || (r[0] != r[2]);
 243 }
 244
 245 /*
 246  * Fill a buffer with a fixed pattern.
 247  *
 248  * The buffer must be at least 3 bytes long, even if
 249  * size is less.  Larger sizes are filled exactly.
 250  */
 251 static void
 252 fillpattern (int type, unsigned char *r, size_t size)
 253 {
 254   size_t i;
 255   unsigned int bits = type & 0xfff;
 256
 257   bits |= bits << 12;
 258   r[0] = (bits >> 4) & 255;
 259   r[1] = (bits >> 8) & 255;
 260   r[2] = bits & 255;
 261   for (i = 3; i < size / 2; i *= 2)
 262     memcpy (r + i, r, i);
 263   if (i < size)
 264     memcpy (r + i, r, size - i);
 265
 266   /* Invert the first bit of every sector. */
 267   if (type & 0x1000)
 268     for (i = 0; i < size; i += SECTOR_SIZE)
 269       r[i] ^= 0x80;
 270 }
 271
 272 /*
 273  * Generate a 6-character (+ nul) pass name string
 274  * FIXME: allow translation of "random".
 275  */
 276 #define PASS_NAME_SIZE 7
 277 static void
 278 passname (unsigned char const *data, char name[PASS_NAME_SIZE])
 279 {
 280   if (data)
 281     sprintf (name, "%02x%02x%02x", data[0], data[1], data[2]);
 282   else
 283     memcpy (name, "random", PASS_NAME_SIZE);
 284 }
 285
 286 /* Return true when it's ok to ignore an fsync or fdatasync
 287    failure that set errno to ERRNO_VAL.  */
 288 static bool
 289 ignorable_sync_errno (int errno_val)
 290 {
 291   return (errno_val == EINVAL
 292           || errno_val == EBADF
 293           /* HP-UX does this */
 294           || errno_val == EISDIR);
 295 }
 296
 297 /* Request that all data for FD be transferred to the corresponding
 298    storage device.  QNAME is the file name (quoted for colons).
 299    Report any errors found.  Return 0 on success, -1
 300    (setting errno) on failure.  It is not an error if fdatasync and/or
 301    fsync is not supported for this file, or if the file is not a
 302    writable file descriptor.  */
 303 static int
 304 dosync (int fd, char const *qname)
 305 {
 306   int err;
 307
 308 #if HAVE_FDATASYNC
 309   if (fdatasync (fd) == 0)
 310     return 0;
 311   err = errno;
 312   if ( ! ignorable_sync_errno (err))
 313     {
 314       error (0, err, _("%s: fdatasync failed"), qname);
 315       errno = err;
 316       return -1;
 317     }
 318 #endif
 319
 320   if (fsync (fd) == 0)
 321     return 0;
 322   err = errno;
 323   if ( ! ignorable_sync_errno (err))
 324     {
 325       error (0, err, _("%s: fsync failed"), qname);
 326       errno = err;
 327       return -1;
 328     }
 329
 330   sync ();
 331   return 0;
 332 }
 333
 334 /* Turn on or off direct I/O mode for file descriptor FD, if possible.
 335    Try to turn it on if ENABLE is true.  Otherwise, try to turn it off.  */
 336 static void
 337 direct_mode (int fd, bool enable)
 338 {
 339   if (O_DIRECT)
 340     {
 341       int fd_flags = fcntl (fd, F_GETFL);
 342       if (0 < fd_flags)
 343         {
 344           int new_flags = (enable
 345                            ? (fd_flags | O_DIRECT)
 346                            : (fd_flags & ~O_DIRECT));
 347           if (new_flags != fd_flags)
 348             fcntl (fd, F_SETFL, new_flags);
 349         }
 350     }
 351
 352 #if HAVE_DIRECTIO && defined DIRECTIO_ON && defined DIRECTIO_OFF
 353   /* This is Solaris-specific.  See the following for details:
 354      http://docs.sun.com/db/doc/816-0213/6m6ne37so?q=directio&a=view  */
 355   directio (fd, enable ? DIRECTIO_ON : DIRECTIO_OFF);
 356 #endif
 357 }
 358
 359 /*
 360  * Do pass number k of n, writing "size" bytes of the given pattern "type"
 361  * to the file descriptor fd.   Qname, k and n are passed in only for verbose
 362  * progress message purposes.  If n == 0, no progress messages are printed.
 363  *
 364  * If *sizep == -1, the size is unknown, and it will be filled in as soon
 365  * as writing fails.
 366  *
 367  * Return 1 on write error, -1 on other error, 0 on success.
 368  */
 369 static int
 370 dopass (int fd, char const *qname, off_t *sizep, int type,
 371         struct randread_source *s, unsigned long int k, unsigned long int n)
 372 {
 373   off_t size = *sizep;
 374   off_t offset;                 /* Current file posiiton */
 375   time_t thresh IF_LINT ( = 0); /* Time to maybe print next status update */
 376   time_t now = 0;               /* Current time */
 377   size_t lim;                   /* Amount of data to try writing */
 378   size_t soff;                  /* Offset into buffer for next write */
 379   ssize_t ssize;                /* Return value from write */
 380
 381   /* Fill pattern buffer.  Aligning it to a page so we can do direct I/O.  */
 382   size_t page_size = getpagesize ();
 383 #define PERIODIC_OUTPUT_SIZE (12 * 1024)
 384 #define NONPERIODIC_OUTPUT_SIZE (64 * 1024)
 385   verify (PERIODIC_OUTPUT_SIZE % 3 == 0);
 386   size_t output_size = periodic_pattern (type)
 387                        ? PERIODIC_OUTPUT_SIZE : NONPERIODIC_OUTPUT_SIZE;
 388 #define PAGE_ALIGN_SLOP (page_size - 1)                /* So directio works */
 389 #define FILLPATTERN_SIZE (((output_size + 2) / 3) * 3) /* Multiple of 3 */
 390 #define PATTERNBUF_SIZE (PAGE_ALIGN_SLOP + FILLPATTERN_SIZE)
 391   void *fill_pattern_mem = xmalloc (PATTERNBUF_SIZE);
 392   unsigned char *pbuf = ptr_align (fill_pattern_mem, page_size);
 393
 394   char pass_string[PASS_NAME_SIZE];     /* Name of current pass */
 395   bool write_error = false;
 396   bool other_error = false;
 397   bool first_write = true;
 398
 399   /* Printable previous offset into the file */
 400   char previous_offset_buf[LONGEST_HUMAN_READABLE + 1];
 401   char const *previous_human_offset IF_LINT ( = 0);
 402
 403   if (lseek (fd, 0, SEEK_SET) == -1)
 404     {
 405       error (0, errno, _("%s: cannot rewind"), qname);
 406       other_error = true;
 407       goto free_pattern_mem;
 408     }
 409
 410   /* Constant fill patterns need only be set up once. */
 411   if (type >= 0)
 412     {
 413       lim = (0 <= size && size < FILLPATTERN_SIZE ? size : FILLPATTERN_SIZE);
 414       fillpattern (type, pbuf, lim);
 415       passname (pbuf, pass_string);
 416     }
 417   else
 418     {
 419       passname (0, pass_string);
 420     }
 421
 422   /* Set position if first status update */
 423   if (n)
 424     {
 425       error (0, 0, _("%s: pass %lu/%lu (%s)..."), qname, k, n, pass_string);
 426       thresh = time (NULL) + VERBOSE_UPDATE;
 427       previous_human_offset = "";
 428     }
 429
 430   offset = 0;
 431   while (true)
 432     {
 433       /* How much to write this time? */
 434       lim = output_size;
 435       if (0 <= size && size - offset < output_size)
 436         {
 437           if (size < offset)
 438             break;
 439           lim = size - offset;
 440           if (!lim)
 441             break;
 442         }
 443       if (type < 0)
 444         randread (s, pbuf, lim);
 445       /* Loop to retry partial writes. */
 446       for (soff = 0; soff < lim; soff += ssize, first_write = false)
 447         {
 448           ssize = write (fd, pbuf + soff, lim - soff);
 449           if (ssize <= 0)
 450             {
 451               if (size < 0 && (ssize == 0 || errno == ENOSPC))
 452                 {
 453                   /* Ah, we have found the end of the file */
 454                   *sizep = size = offset + soff;
 455                   break;
 456                 }
 457               else
 458                 {
 459                   int errnum = errno;
 460                   char buf[INT_BUFSIZE_BOUND (uintmax_t)];
 461
 462                   /* If the first write of the first pass for a given file
 463                      has just failed with EINVAL, turn off direct mode I/O
 464                      and try again.  This works around a bug in Linux kernel
 465                      2.4 whereby opening with O_DIRECT would succeed for some
 466                      file system types (e.g., ext3), but any attempt to
 467                      access a file through the resulting descriptor would
 468                      fail with EINVAL.  */
 469                   if (k == 1 && first_write && errno == EINVAL)
 470                     {
 471                       direct_mode (fd, false);
 472                       ssize = 0;
 473                       continue;
 474                     }
 475                   error (0, errnum, _("%s: error writing at offset %s"),
 476                          qname, umaxtostr (offset + soff, buf));
 477
 478                   /* 'shred' is often used on bad media, before throwing it
 479                      out.  Thus, it shouldn't give up on bad blocks.  This
 480                      code works because lim is always a multiple of
 481                      SECTOR_SIZE, except at the end.  */
 482                   verify (PERIODIC_OUTPUT_SIZE % SECTOR_SIZE == 0);
 483                   verify (NONPERIODIC_OUTPUT_SIZE % SECTOR_SIZE == 0);
 484                   if (errnum == EIO && 0 <= size && (soff | SECTOR_MASK) < lim)
 485                     {
 486                       size_t soff1 = (soff | SECTOR_MASK) + 1;
 487                       if (lseek (fd, offset + soff1, SEEK_SET) != -1)
 488                         {
 489                           /* Arrange to skip this block. */
 490                           ssize = soff1 - soff;
 491                           write_error = true;
 492                           continue;
 493                         }
 494                       error (0, errno, _("%s: lseek failed"), qname);
 495                     }
 496                   other_error = true;
 497                   goto free_pattern_mem;
 498                 }
 499             }
 500         }
 501
 502       /* Okay, we have written "soff" bytes. */
 503
 504       if (offset > OFF_T_MAX - (off_t) soff)
 505         {
 506           error (0, 0, _("%s: file too large"), qname);
 507           other_error = true;
 508           goto free_pattern_mem;
 509         }
 510
 511       offset += soff;
 512
 513       bool done = offset == size;
 514
 515       /* Time to print progress? */
 516       if (n && ((done && *previous_human_offset)
 517                 || thresh <= (now = time (NULL))))
 518         {
 519           char offset_buf[LONGEST_HUMAN_READABLE + 1];
 520           char size_buf[LONGEST_HUMAN_READABLE + 1];
 521           int human_progress_opts = (human_autoscale | human_SI
 522                                      | human_base_1024 | human_B);
 523           char const *human_offset
 524             = human_readable (offset, offset_buf,
 525                               human_floor | human_progress_opts, 1, 1);
 526
 527           if (done || !STREQ (previous_human_offset, human_offset))
 528             {
 529               if (size < 0)
 530                 error (0, 0, _("%s: pass %lu/%lu (%s)...%s"),
 531                        qname, k, n, pass_string, human_offset);
 532               else
 533                 {
 534                   uintmax_t off = offset;
 535                   int percent = (size == 0
 536                                  ? 100
 537                                  : (off <= TYPE_MAXIMUM (uintmax_t) / 100
 538                                     ? off * 100 / size
 539                                     : off / (size / 100)));
 540                   char const *human_size
 541                     = human_readable (size, size_buf,
 542                                       human_ceiling | human_progress_opts,
 543                                       1, 1);
 544                   if (done)
 545                     human_offset = human_size;
 546                   error (0, 0, _("%s: pass %lu/%lu (%s)...%s/%s %d%%"),
 547                          qname, k, n, pass_string, human_offset, human_size,
 548                          percent);
 549                 }
 550
 551               strcpy (previous_offset_buf, human_offset);
 552               previous_human_offset = previous_offset_buf;
 553               thresh = now + VERBOSE_UPDATE;
 554
 555               /*
 556                * Force periodic syncs to keep displayed progress accurate
 557                * FIXME: Should these be present even if -v is not enabled,
 558                * to keep the buffer cache from filling with dirty pages?
 559                * It's a common problem with programs that do lots of writes,
 560                * like mkfs.
 561                */
 562               if (dosync (fd, qname) != 0)
 563                 {
 564                   if (errno != EIO)
 565                     {
 566                       other_error = true;
 567                       goto free_pattern_mem;
 568                     }
 569                   write_error = true;
 570                 }
 571             }
 572         }
 573     }
 574
 575   /* Force what we just wrote to hit the media. */
 576   if (dosync (fd, qname) != 0)
 577     {
 578       if (errno != EIO)
 579         {
 580           other_error = true;
 581           goto free_pattern_mem;
 582         }
 583       write_error = true;
 584     }
 585
 586 free_pattern_mem:
 587   memset (pbuf, 0, FILLPATTERN_SIZE);
 588   free (fill_pattern_mem);
 589
 590   return other_error ? -1 : write_error;
 591 }
 592
 593 /*
 594  * The passes start and end with a random pass, and the passes in between
 595  * are done in random order.  The idea is to deprive someone trying to
 596  * reverse the process of knowledge of the overwrite patterns, so they
 597  * have the additional step of figuring out what was done to the disk
 598  * before they can try to reverse or cancel it.
 599  *
 600  * First, all possible 1-bit patterns.  There are two of them.
 601  * Then, all possible 2-bit patterns.  There are four, but the two
 602  * which are also 1-bit patterns can be omitted.
 603  * Then, all possible 3-bit patterns.  Likewise, 8-2 = 6.
 604  * Then, all possible 4-bit patterns.  16-4 = 12.
 605  *
 606  * The basic passes are:
 607  * 1-bit: 0x000, 0xFFF
 608  * 2-bit: 0x555, 0xAAA
 609  * 3-bit: 0x249, 0x492, 0x924, 0x6DB, 0xB6D, 0xDB6 (+ 1-bit)
 610  *        100100100100         110110110110
 611  *           9   2   4            D   B   6
 612  * 4-bit: 0x111, 0x222, 0x333, 0x444, 0x666, 0x777,
 613  *        0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE (+ 1-bit, 2-bit)
 614  * Adding three random passes at the beginning, middle and end
 615  * produces the default 25-pass structure.
 616  *
 617  * The next extension would be to 5-bit and 6-bit patterns.
 618  * There are 30 uncovered 5-bit patterns and 64-8-2 = 46 uncovered
 619  * 6-bit patterns, so they would increase the time required
 620  * significantly.  4-bit patterns are enough for most purposes.
 621  *
 622  * The main gotcha is that this would require a trickier encoding,
 623  * since lcm(2,3,4) = 12 bits is easy to fit into an int, but
 624  * lcm(2,3,4,5) = 60 bits is not.
 625  *
 626  * One extension that is included is to complement the first bit in each
 627  * 512-byte block, to alter the phase of the encoded data in the more
 628  * complex encodings.  This doesn't apply to MFM, so the 1-bit patterns
 629  * are considered part of the 3-bit ones and the 2-bit patterns are
 630  * considered part of the 4-bit patterns.
 631  *
 632  *
 633  * How does the generalization to variable numbers of passes work?
 634  *
 635  * Here's how...
 636  * Have an ordered list of groups of passes.  Each group is a set.
 637  * Take as many groups as will fit, plus a random subset of the
 638  * last partial group, and place them into the passes list.
 639  * Then shuffle the passes list into random order and use that.
 640  *
 641  * One extra detail: if we can't include a large enough fraction of the
 642  * last group to be interesting, then just substitute random passes.
 643  *
 644  * If you want more passes than the entire list of groups can
 645  * provide, just start repeating from the beginning of the list.
 646  */
 647 static int const
 648   patterns[] =
 649 {
 650   -2,                           /* 2 random passes */
 651   2, 0x000, 0xFFF,              /* 1-bit */
 652   2, 0x555, 0xAAA,              /* 2-bit */
 653   -1,                           /* 1 random pass */
 654   6, 0x249, 0x492, 0x6DB, 0x924, 0xB6D, 0xDB6,  /* 3-bit */
 655   12, 0x111, 0x222, 0x333, 0x444, 0x666, 0x777,
 656   0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE,     /* 4-bit */
 657   -1,                           /* 1 random pass */
 658         /* The following patterns have the frst bit per block flipped */
 659   8, 0x1000, 0x1249, 0x1492, 0x16DB, 0x1924, 0x1B6D, 0x1DB6, 0x1FFF,
 660   14, 0x1111, 0x1222, 0x1333, 0x1444, 0x1555, 0x1666, 0x1777,
 661   0x1888, 0x1999, 0x1AAA, 0x1BBB, 0x1CCC, 0x1DDD, 0x1EEE,
 662   -1,                           /* 1 random pass */
 663   0                             /* End */
 664 };
 665
 666 /*
 667  * Generate a random wiping pass pattern with num passes.
 668  * This is a two-stage process.  First, the passes to include
 669  * are chosen, and then they are shuffled into the desired
 670  * order.
 671  */
 672 static void
 673 genpattern (int *dest, size_t num, struct randint_source *s)
 674 {
 675   size_t randpasses;
 676   int const *p;
 677   int *d;
 678   size_t n;
 679   size_t accum, top, swap;
 680   int k;
 681
 682   if (!num)
 683     return;
 684
 685   /* Stage 1: choose the passes to use */
 686   p = patterns;
 687   randpasses = 0;
 688   d = dest;                     /* Destination for generated pass list */
 689   n = num;                      /* Passes remaining to fill */
 690
 691   while (true)
 692     {
 693       k = *p++;                 /* Block descriptor word */
 694       if (!k)
 695         {                       /* Loop back to the beginning */
 696           p = patterns;
 697         }
 698       else if (k < 0)
 699         {                       /* -k random passes */
 700           k = -k;
 701           if ((size_t) k >= n)
 702             {
 703               randpasses += n;
 704               break;
 705             }
 706           randpasses += k;
 707           n -= k;
 708         }
 709       else if ((size_t) k <= n)
 710         {                       /* Full block of patterns */
 711           memcpy (d, p, k * sizeof (int));
 712           p += k;
 713           d += k;
 714           n -= k;
 715         }
 716       else if (n < 2 || 3 * n < (size_t) k)
 717         {                       /* Finish with random */
 718           randpasses += n;
 719           break;
 720         }
 721       else
 722         {                       /* Pad out with k of the n available */
 723           do
 724             {
 725               if (n == (size_t) k || randint_choose (s, k) < n)
 726                 {
 727                   *d++ = *p;
 728                   n--;
 729                 }
 730               p++;
 731             }
 732           while (n);
 733           break;
 734         }
 735     }
 736   top = num - randpasses;       /* Top of initialized data */
 737   /* assert (d == dest+top); */
 738
 739   /*
 740    * We now have fixed patterns in the dest buffer up to
 741    * "top", and we need to scramble them, with "randpasses"
 742    * random passes evenly spaced among them.
 743    *
 744    * We want one at the beginning, one at the end, and
 745    * evenly spaced in between.  To do this, we basically
 746    * use Bresenham's line draw (a.k.a DDA) algorithm
 747    * to draw a line with slope (randpasses-1)/(num-1).
 748    * (We use a positive accumulator and count down to
 749    * do this.)
 750    *
 751    * So for each desired output value, we do the following:
 752    * - If it should be a random pass, copy the pass type
 753    *   to top++, out of the way of the other passes, and
 754    *   set the current pass to -1 (random).
 755    * - If it should be a normal pattern pass, choose an
 756    *   entry at random between here and top-1 (inclusive)
 757    *   and swap the current entry with that one.
 758    */
 759   randpasses--;                 /* To speed up later math */
 760   accum = randpasses;           /* Bresenham DDA accumulator */
 761   for (n = 0; n < num; n++)
 762     {
 763       if (accum <= randpasses)
 764         {
 765           accum += num - 1;
 766           dest[top++] = dest[n];
 767           dest[n] = -1;
 768         }
 769       else
 770         {
 771           swap = n + randint_choose (s, top - n);
 772           k = dest[n];
 773           dest[n] = dest[swap];
 774           dest[swap] = k;
 775         }
 776       accum -= randpasses;
 777     }
 778   /* assert (top == num); */
 779 }
 780
 781 /*
 782  * The core routine to actually do the work.  This overwrites the first
 783  * size bytes of the given fd.  Return true if successful.
 784  */
 785 static bool
 786 do_wipefd (int fd, char const *qname, struct randint_source *s,
 787            struct Options const *flags)
 788 {
 789   size_t i;
 790   struct stat st;
 791   off_t size;                   /* Size to write, size to read */
 792   unsigned long int n;          /* Number of passes for printing purposes */
 793   int *passarray;
 794   bool ok = true;
 795   struct randread_source *rs;
 796
 797   n = 0;                /* dopass takes n -- 0 to mean "don't print progress" */
 798   if (flags->verbose)
 799     n = flags->n_iterations + flags->zero_fill;
 800
 801   if (fstat (fd, &st))
 802     {
 803       error (0, errno, _("%s: fstat failed"), qname);
 804       return false;
 805     }
 806
 807   /* If we know that we can't possibly shred the file, give up now.
 808      Otherwise, we may go into an infinite loop writing data before we
 809      find that we can't rewind the device.  */
 810   if ((S_ISCHR (st.st_mode) && isatty (fd))
 811       || S_ISFIFO (st.st_mode)
 812       || S_ISSOCK (st.st_mode))
 813     {
 814       error (0, 0, _("%s: invalid file type"), qname);
 815       return false;
 816     }
 817
 818   direct_mode (fd, true);
 819
 820   /* Allocate pass array */
 821   passarray = xnmalloc (flags->n_iterations, sizeof *passarray);
 822
 823   size = flags->size;
 824   if (size == -1)
 825     {
 826       /* Accept a length of zero only if it's a regular file.
 827          For any other type of file, try to get the size another way.  */
 828       if (S_ISREG (st.st_mode))
 829         {
 830           size = st.st_size;
 831           if (size < 0)
 832             {
 833               error (0, 0, _("%s: file has negative size"), qname);
 834               return false;
 835             }
 836         }
 837       else
 838         {
 839           size = lseek (fd, 0, SEEK_END);
 840           if (size <= 0)
 841             {
 842               /* We are unable to determine the length, up front.
 843                  Let dopass do that as part of its first iteration.  */
 844               size = -1;
 845             }
 846         }
 847
 848       /* Allow 'rounding up' only for regular files.  */
 849       if (0 <= size && !(flags->exact) && S_ISREG (st.st_mode))
 850         {
 851           size += ST_BLKSIZE (st) - 1 - (size - 1) % ST_BLKSIZE (st);
 852
 853           /* If in rounding up, we've just overflowed, use the maximum.  */
 854           if (size < 0)
 855             size = TYPE_MAXIMUM (off_t);
 856         }
 857     }
 858
 859   /* Schedule the passes in random order. */
 860   genpattern (passarray, flags->n_iterations, s);
 861
 862   rs = randint_get_source (s);
 863
 864   /* Do the work */
 865   for (i = 0; i < flags->n_iterations; i++)
 866     {
 867       int err = dopass (fd, qname, &size, passarray[i], rs, i + 1, n);
 868       if (err)
 869         {
 870           if (err < 0)
 871             {
 872               memset (passarray, 0, flags->n_iterations * sizeof (int));
 873               free (passarray);
 874               return false;
 875             }
 876           ok = false;
 877         }
 878     }
 879
 880   memset (passarray, 0, flags->n_iterations * sizeof (int));
 881   free (passarray);
 882
 883   if (flags->zero_fill)
 884     {
 885       int err = dopass (fd, qname, &size, 0, rs, flags->n_iterations + 1, n);
 886       if (err)
 887         {
 888           if (err < 0)
 889             return false;
 890           ok = false;
 891         }
 892     }
 893
 894   /* Okay, now deallocate the data.  The effect of ftruncate on
 895      non-regular files is unspecified, so don't worry about any
 896      errors reported for them.  */
 897   if (flags->remove_file && ftruncate (fd, 0) != 0
 898       && S_ISREG (st.st_mode))
 899     {
 900       error (0, errno, _("%s: error truncating"), qname);
 901       return false;
 902     }
 903
 904   return ok;
 905 }
 906
 907 /* A wrapper with a little more checking for fds on the command line */
 908 static bool
 909 wipefd (int fd, char const *qname, struct randint_source *s,
 910         struct Options const *flags)
 911 {
 912   int fd_flags = fcntl (fd, F_GETFL);
 913
 914   if (fd_flags < 0)
 915     {
 916       error (0, errno, _("%s: fcntl failed"), qname);
 917       return false;
 918     }
 919   if (fd_flags & O_APPEND)
 920     {
 921       error (0, 0, _("%s: cannot shred append-only file descriptor"), qname);
 922       return false;
 923     }
 924   return do_wipefd (fd, qname, s, flags);
 925 }
 926
 927 /* --- Name-wiping code --- */
 928
 929 /* Characters allowed in a file name - a safe universal set.  */
 930 static char const nameset[] =
 931 "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_.";
 932
 933 /* Increment NAME (with LEN bytes).  NAME must be a big-endian base N
 934    number with the digits taken from nameset.  Return true if successful.
 935    Otherwise, (because NAME already has the greatest possible value)
 936    return false.  */
 937
 938 static bool
 939 incname (char *name, size_t len)
 940 {
 941   while (len--)
 942     {
 943       char const *p = strchr (nameset, name[len]);
 944
 945       /* Given that NAME is composed of bytes from NAMESET,
 946          P will never be NULL here.  */
 947       assert (p);
 948
 949       /* If this character has a successor, use it.  */
 950       if (p[1])
 951         {
 952           name[len] = p[1];
 953           return true;
 954         }
 955
 956       /* Otherwise, set this digit to 0 and increment the prefix.  */
 957       name[len] = nameset[0];
 958     }
 959
 960   return false;
 961 }
 962
 963 /*
 964  * Repeatedly rename a file with shorter and shorter names,
 965  * to obliterate all traces of the file name on any system that
 966  * adds a trailing delimiter to on-disk file names and reuses
 967  * the same directory slot.  Finally, unlink it.
 968  * The passed-in filename is modified in place to the new filename.
 969  * (Which is unlinked if this function succeeds, but is still present if
 970  * it fails for some reason.)
 971  *
 972  * The main loop is written carefully to not get stuck if all possible
 973  * names of a given length are occupied.  It counts down the length from
 974  * the original to 0.  While the length is non-zero, it tries to find an
 975  * unused file name of the given length.  It continues until either the
 976  * name is available and the rename succeeds, or it runs out of names
 977  * to try (incname wraps and returns 1).  Finally, it unlinks the file.
 978  *
 979  * The unlink is Unix-specific, as ANSI-standard remove has more
 980  * portability problems with C libraries making it "safe".  rename
 981  * is ANSI-standard.
 982  *
 983  * To force the directory data out, we try to open the directory and
 984  * invoke fdatasync and/or fsync on it.  This is non-standard, so don't
 985  * insist that it works: just fall back to a global sync in that case.
 986  * This is fairly significantly Unix-specific.  Of course, on any
 987  * file system with synchronous metadata updates, this is unnecessary.
 988  */
 989 static bool
 990 wipename (char *oldname, char const *qoldname, struct Options const *flags)
 991 {
 992   char *newname = xstrdup (oldname);
 993   char *base = last_component (newname);
 994   size_t len = base_len (base);
 995   char *dir = dir_name (newname);
 996   char *qdir = xstrdup (quotearg_colon (dir));
 997   bool first = true;
 998   bool ok = true;
 999
1000   int dir_fd = open (dir, O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK);
1001
1002   if (flags->verbose)
1003     error (0, 0, _("%s: removing"), qoldname);
1004
1005   while (len)
1006     {
1007       memset (base, nameset[0], len);
1008       base[len] = 0;
1009       do
1010         {
1011           struct stat st;
1012           if (lstat (newname, &st) < 0)
1013             {
1014               if (rename (oldname, newname) == 0)
1015                 {
1016                   if (0 <= dir_fd && dosync (dir_fd, qdir) != 0)
1017                     ok = false;
1018                   if (flags->verbose)
1019                     {
1020                       /*
1021                        * People seem to understand this better than talking
1022                        * about renaming oldname.  newname doesn't need
1023                        * quoting because we picked it.  oldname needs to
1024                        * be quoted only the first time.
1025                        */
1026                       char const *old = (first ? qoldname : oldname);
1027                       error (0, 0, _("%s: renamed to %s"), old, newname);
1028                       first = false;
1029                     }
1030                   memcpy (oldname + (base - newname), base, len + 1);
1031                   break;
1032                 }
1033               else
1034                 {
1035                   /* The rename failed: give up on this length.  */
1036                   break;
1037                 }
1038             }
1039           else
1040             {
1041               /* newname exists, so increment BASE so we use another */
1042             }
1043         }
1044       while (incname (base, len));
1045       len--;
1046     }
1047   if (unlink (oldname) != 0)
1048     {
1049       error (0, errno, _("%s: failed to remove"), qoldname);
1050       ok = false;
1051     }
1052   else if (flags->verbose)
1053     error (0, 0, _("%s: removed"), qoldname);
1054   if (0 <= dir_fd)
1055     {
1056       if (dosync (dir_fd, qdir) != 0)
1057         ok = false;
1058       if (close (dir_fd) != 0)
1059         {
1060           error (0, errno, _("%s: failed to close"), qdir);
1061           ok = false;
1062         }
1063     }
1064   free (newname);
1065   free (dir);
1066   free (qdir);
1067   return ok;
1068 }
1069
1070 /*
1071  * Finally, the function that actually takes a filename and grinds
1072  * it into hamburger.
1073  *
1074  * FIXME
1075  * Detail to note: since we do not restore errno to EACCES after
1076  * a failed chmod, we end up printing the error code from the chmod.
1077  * This is actually the error that stopped us from proceeding, so
1078  * it's arguably the right one, and in practice it'll be either EACCES
1079  * again or EPERM, which both give similar error messages.
1080  * Does anyone disagree?
1081  */
1082 static bool
1083 wipefile (char *name, char const *qname,
1084           struct randint_source *s, struct Options const *flags)
1085 {
1086   bool ok;
1087   int fd;
1088
1089   fd = open (name, O_WRONLY | O_NOCTTY | O_BINARY);
1090   if (fd < 0
1091       && (errno == EACCES && flags->force)
1092       && chmod (name, S_IWUSR) == 0)
1093     fd = open (name, O_WRONLY | O_NOCTTY | O_BINARY);
1094   if (fd < 0)
1095     {
1096       error (0, errno, _("%s: failed to open for writing"), qname);
1097       return false;
1098     }
1099
1100   ok = do_wipefd (fd, qname, s, flags);
1101   if (close (fd) != 0)
1102     {
1103       error (0, errno, _("%s: failed to close"), qname);
1104       ok = false;
1105     }
1106   if (ok && flags->remove_file)
1107     ok = wipename (name, qname, flags);
1108   return ok;
1109 }
1110
1111
1112 /* Buffers for random data.  */
1113 static struct randint_source *randint_source;
1114
1115 /* Just on general principles, wipe buffers containing information
1116    that may be related to the possibly-pseudorandom values used during
1117    shredding.  */
1118 static void
1119 clear_random_data (void)
1120 {
1121   randint_all_free (randint_source);
1122 }
1123
1124
1125 int
1126 main (int argc, char **argv)
1127 {
1128   bool ok = true;
1129   struct Options flags = { 0, };
1130   char **file;
1131   int n_files;
1132   int c;
1133   int i;
1134   char const *random_source = NULL;
1135
1136   initialize_main (&argc, &argv);
1137   set_program_name (argv[0]);
1138   setlocale (LC_ALL, "");
1139   bindtextdomain (PACKAGE, LOCALEDIR);
1140   textdomain (PACKAGE);
1141
1142   atexit (close_stdout);
1143
1144   flags.n_iterations = DEFAULT_PASSES;
1145   flags.size = -1;
1146
1147   while ((c = getopt_long (argc, argv, "fn:s:uvxz", long_opts, NULL)) != -1)
1148     {
1149       switch (c)
1150         {
1151         case 'f':
1152           flags.force = true;
1153           break;
1154
1155         case 'n':
1156           {
1157             uintmax_t tmp;
1158             if (xstrtoumax (optarg, NULL, 10, &tmp, NULL) != LONGINT_OK
1159                 || MIN (UINT32_MAX, SIZE_MAX / sizeof (int)) < tmp)
1160               {
1161                 error (EXIT_FAILURE, 0, _("%s: invalid number of passes"),
1162                        quotearg_colon (optarg));
1163               }
1164             flags.n_iterations = tmp;
1165           }
1166           break;
1167
1168         case RANDOM_SOURCE_OPTION:
1169           if (random_source && !STREQ (random_source, optarg))
1170             error (EXIT_FAILURE, 0, _("multiple random sources specified"));
1171           random_source = optarg;
1172           break;
1173
1174         case 'u':
1175           flags.remove_file = true;
1176           break;
1177
1178         case 's':
1179           {
1180             uintmax_t tmp;
1181             if (xstrtoumax (optarg, NULL, 0, &tmp, "cbBkKMGTPEZY0")
1182                 != LONGINT_OK)
1183               {
1184                 error (EXIT_FAILURE, 0, _("%s: invalid file size"),
1185                        quotearg_colon (optarg));
1186               }
1187             flags.size = tmp;
1188           }
1189           break;
1190
1191         case 'v':
1192           flags.verbose = true;
1193           break;
1194
1195         case 'x':
1196           flags.exact = true;
1197           break;
1198
1199         case 'z':
1200           flags.zero_fill = true;
1201           break;
1202
1203         case_GETOPT_HELP_CHAR;
1204
1205         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1206
1207         default:
1208           usage (EXIT_FAILURE);
1209         }
1210     }
1211
1212   file = argv + optind;
1213   n_files = argc - optind;
1214
1215   if (n_files == 0)
1216     {
1217       error (0, 0, _("missing file operand"));
1218       usage (EXIT_FAILURE);
1219     }
1220
1221   randint_source = randint_all_new (random_source, SIZE_MAX);
1222   if (! randint_source)
1223     error (EXIT_FAILURE, errno, "%s", quotearg_colon (random_source));
1224   atexit (clear_random_data);
1225
1226   for (i = 0; i < n_files; i++)
1227     {
1228       char *qname = xstrdup (quotearg_colon (file[i]));
1229       if (STREQ (file[i], "-"))
1230         {
1231           ok &= wipefd (STDOUT_FILENO, qname, randint_source, &flags);
1232         }
1233       else
1234         {
1235           /* Plain filename - Note that this overwrites *argv! */
1236           ok &= wipefile (file[i], qname, randint_source, &flags);
1237         }
1238       free (qname);
1239     }
1240
1241   exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
1242 }
1243 /*
1244  * vim:sw=2:sts=2:
1245  */