test: expose recent gnulib canonicalize bug
[coreutils/ericb.git] / src / md5sum.c
blobf7e084914260b52e5483d1aa804392f8095a9bf9
1 /* Compute checksums of files or strings.
2 Copyright (C) 1995-2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>. */
19 #include <config.h>
21 #include <getopt.h>
22 #include <sys/types.h>
24 #include "system.h"
26 #if HASH_ALGO_MD5
27 # include "md5.h"
28 #endif
29 #if HASH_ALGO_SHA1
30 # include "sha1.h"
31 #endif
32 #if HASH_ALGO_SHA256 || HASH_ALGO_SHA224
33 # include "sha256.h"
34 #endif
35 #if HASH_ALGO_SHA512 || HASH_ALGO_SHA384
36 # include "sha512.h"
37 #endif
38 #include "error.h"
39 #include "fadvise.h"
40 #include "stdio--.h"
41 #include "xfreopen.h"
43 /* The official name of this program (e.g., no 'g' prefix). */
44 #if HASH_ALGO_MD5
45 # define PROGRAM_NAME "md5sum"
46 # define DIGEST_TYPE_STRING "MD5"
47 # define DIGEST_STREAM md5_stream
48 # define DIGEST_BITS 128
49 # define DIGEST_REFERENCE "RFC 1321"
50 # define DIGEST_ALIGN 4
51 #elif HASH_ALGO_SHA1
52 # define PROGRAM_NAME "sha1sum"
53 # define DIGEST_TYPE_STRING "SHA1"
54 # define DIGEST_STREAM sha1_stream
55 # define DIGEST_BITS 160
56 # define DIGEST_REFERENCE "FIPS-180-1"
57 # define DIGEST_ALIGN 4
58 #elif HASH_ALGO_SHA256
59 # define PROGRAM_NAME "sha256sum"
60 # define DIGEST_TYPE_STRING "SHA256"
61 # define DIGEST_STREAM sha256_stream
62 # define DIGEST_BITS 256
63 # define DIGEST_REFERENCE "FIPS-180-2"
64 # define DIGEST_ALIGN 4
65 #elif HASH_ALGO_SHA224
66 # define PROGRAM_NAME "sha224sum"
67 # define DIGEST_TYPE_STRING "SHA224"
68 # define DIGEST_STREAM sha224_stream
69 # define DIGEST_BITS 224
70 # define DIGEST_REFERENCE "RFC 3874"
71 # define DIGEST_ALIGN 4
72 #elif HASH_ALGO_SHA512
73 # define PROGRAM_NAME "sha512sum"
74 # define DIGEST_TYPE_STRING "SHA512"
75 # define DIGEST_STREAM sha512_stream
76 # define DIGEST_BITS 512
77 # define DIGEST_REFERENCE "FIPS-180-2"
78 # define DIGEST_ALIGN 8
79 #elif HASH_ALGO_SHA384
80 # define PROGRAM_NAME "sha384sum"
81 # define DIGEST_TYPE_STRING "SHA384"
82 # define DIGEST_STREAM sha384_stream
83 # define DIGEST_BITS 384
84 # define DIGEST_REFERENCE "FIPS-180-2"
85 # define DIGEST_ALIGN 8
86 #else
87 # error "Can't decide which hash algorithm to compile."
88 #endif
90 #define DIGEST_HEX_BYTES (DIGEST_BITS / 4)
91 #define DIGEST_BIN_BYTES (DIGEST_BITS / 8)
93 #define AUTHORS \
94 proper_name ("Ulrich Drepper"), \
95 proper_name ("Scott Miller"), \
96 proper_name ("David Madore")
98 /* The minimum length of a valid digest line. This length does
99 not include any newline character at the end of a line. */
100 #define MIN_DIGEST_LINE_LENGTH \
101 (DIGEST_HEX_BYTES /* length of hexadecimal message digest */ \
102 + 1 /* blank */ \
103 + 1 /* minimum filename length */ )
105 /* True if any of the files read were the standard input. */
106 static bool have_read_stdin;
108 /* The minimum length of a valid checksum line for the selected algorithm. */
109 static size_t min_digest_line_length;
111 /* Set to the length of a digest hex string for the selected algorithm. */
112 static size_t digest_hex_bytes;
114 /* With --check, don't generate any output.
115 The exit code indicates success or failure. */
116 static bool status_only = false;
118 /* With --check, print a message to standard error warning about each
119 improperly formatted checksum line. */
120 static bool warn = false;
122 /* With --check, suppress the "OK" printed for each verified file. */
123 static bool quiet = false;
125 /* With --check, exit with a non-zero return code if any line is
126 improperly formatted. */
127 static bool strict = false;
129 /* Whether a BSD reversed format checksum is detected. */
130 static int bsd_reversed = -1;
132 /* For long options that have no equivalent short option, use a
133 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
134 enum
136 STATUS_OPTION = CHAR_MAX + 1,
137 QUIET_OPTION,
138 STRICT_OPTION
141 static struct option const long_options[] =
143 { "binary", no_argument, NULL, 'b' },
144 { "check", no_argument, NULL, 'c' },
145 { "quiet", no_argument, NULL, QUIET_OPTION },
146 { "status", no_argument, NULL, STATUS_OPTION },
147 { "text", no_argument, NULL, 't' },
148 { "warn", no_argument, NULL, 'w' },
149 { "strict", no_argument, NULL, STRICT_OPTION },
150 { GETOPT_HELP_OPTION_DECL },
151 { GETOPT_VERSION_OPTION_DECL },
152 { NULL, 0, NULL, 0 }
155 void
156 usage (int status)
158 if (status != EXIT_SUCCESS)
159 emit_try_help ();
160 else
162 printf (_("\
163 Usage: %s [OPTION]... [FILE]...\n\
164 Print or check %s (%d-bit) checksums.\n\
165 With no FILE, or when FILE is -, read standard input.\n\
168 program_name,
169 DIGEST_TYPE_STRING,
170 DIGEST_BITS);
171 if (O_BINARY)
172 fputs (_("\
173 -b, --binary read in binary mode (default unless reading tty stdin)\n\
174 "), stdout);
175 else
176 fputs (_("\
177 -b, --binary read in binary mode\n\
178 "), stdout);
179 printf (_("\
180 -c, --check read %s sums from the FILEs and check them\n"),
181 DIGEST_TYPE_STRING);
182 if (O_BINARY)
183 fputs (_("\
184 -t, --text read in text mode (default if reading tty stdin)\n\
185 "), stdout);
186 else
187 fputs (_("\
188 -t, --text read in text mode (default)\n\
189 "), stdout);
190 fputs (_("\
192 The following three options are useful only when verifying checksums:\n\
193 --quiet don't print OK for each successfully verified file\n\
194 --status don't output anything, status code shows success\n\
195 -w, --warn warn about improperly formatted checksum lines\n\
197 "), stdout);
198 fputs (_("\
199 --strict with --check, exit non-zero for any invalid input\n\
200 "), stdout);
201 fputs (HELP_OPTION_DESCRIPTION, stdout);
202 fputs (VERSION_OPTION_DESCRIPTION, stdout);
203 printf (_("\
205 The sums are computed as described in %s. When checking, the input\n\
206 should be a former output of this program. The default mode is to print\n\
207 a line with checksum, a character indicating input mode ('*' for binary,\n\
208 space for text), and name for each FILE.\n"),
209 DIGEST_REFERENCE);
210 emit_ancillary_info ();
213 exit (status);
216 #define ISWHITE(c) ((c) == ' ' || (c) == '\t')
218 /* Split the checksum string S (of length S_LEN) from a BSD 'md5' or
219 'sha1' command into two parts: a hexadecimal digest, and the file
220 name. S is modified. Return true if successful. */
222 static bool
223 bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest,
224 char **file_name)
226 size_t i;
228 if (s_len == 0)
229 return false;
231 *file_name = s;
233 /* Find end of filename. The BSD 'md5' and 'sha1' commands do not escape
234 filenames, so search backwards for the last ')'. */
235 i = s_len - 1;
236 while (i && s[i] != ')')
237 i--;
239 if (s[i] != ')')
240 return false;
242 s[i++] = '\0';
244 while (ISWHITE (s[i]))
245 i++;
247 if (s[i] != '=')
248 return false;
250 i++;
252 while (ISWHITE (s[i]))
253 i++;
255 *hex_digest = (unsigned char *) &s[i];
256 return true;
259 /* Split the string S (of length S_LEN) into three parts:
260 a hexadecimal digest, binary flag, and the file name.
261 S is modified. Return true if successful. */
263 static bool
264 split_3 (char *s, size_t s_len,
265 unsigned char **hex_digest, int *binary, char **file_name)
267 bool escaped_filename = false;
268 size_t algo_name_len;
270 size_t i = 0;
271 while (ISWHITE (s[i]))
272 ++i;
274 /* Check for BSD-style checksum line. */
275 algo_name_len = strlen (DIGEST_TYPE_STRING);
276 if (STREQ_LEN (s + i, DIGEST_TYPE_STRING, algo_name_len))
278 if (s[i + algo_name_len] == ' ')
279 ++i;
280 if (s[i + algo_name_len] == '(')
282 *binary = 0;
283 return bsd_split_3 (s + i + algo_name_len + 1,
284 s_len - (i + algo_name_len + 1),
285 hex_digest, file_name);
289 /* Ignore this line if it is too short.
290 Each line must have at least 'min_digest_line_length - 1' (or one more, if
291 the first is a backslash) more characters to contain correct message digest
292 information. */
293 if (s_len - i < min_digest_line_length + (s[i] == '\\'))
294 return false;
296 if (s[i] == '\\')
298 ++i;
299 escaped_filename = true;
301 *hex_digest = (unsigned char *) &s[i];
303 /* The first field has to be the n-character hexadecimal
304 representation of the message digest. If it is not followed
305 immediately by a white space it's an error. */
306 i += digest_hex_bytes;
307 if (!ISWHITE (s[i]))
308 return false;
310 s[i++] = '\0';
312 /* If "bsd reversed" format detected. */
313 if ((s_len - i == 1) || (s[i] != ' ' && s[i] != '*'))
315 /* Don't allow mixing bsd and standard formats,
316 to minimize security issues with attackers
317 renaming files with leading spaces.
318 This assumes that with bsd format checksums
319 that the first file name does not have
320 a leading ' ' or '*'. */
321 if (bsd_reversed == 0)
322 return false;
323 bsd_reversed = 1;
325 else if (bsd_reversed != 1)
327 bsd_reversed = 0;
328 *binary = (s[i++] == '*');
331 /* All characters between the type indicator and end of line are
332 significant -- that includes leading and trailing white space. */
333 *file_name = &s[i];
335 if (escaped_filename)
337 /* Translate each '\n' string in the file name to a NEWLINE,
338 and each '\\' string to a backslash. */
340 char *dst = &s[i];
342 while (i < s_len)
344 switch (s[i])
346 case '\\':
347 if (i == s_len - 1)
349 /* A valid line does not end with a backslash. */
350 return false;
352 ++i;
353 switch (s[i++])
355 case 'n':
356 *dst++ = '\n';
357 break;
358 case '\\':
359 *dst++ = '\\';
360 break;
361 default:
362 /* Only '\' or 'n' may follow a backslash. */
363 return false;
365 break;
367 case '\0':
368 /* The file name may not contain a NUL. */
369 return false;
370 break;
372 default:
373 *dst++ = s[i++];
374 break;
377 *dst = '\0';
379 return true;
382 /* Return true if S is a NUL-terminated string of DIGEST_HEX_BYTES hex digits.
383 Otherwise, return false. */
384 static bool _GL_ATTRIBUTE_PURE
385 hex_digits (unsigned char const *s)
387 unsigned int i;
388 for (i = 0; i < digest_hex_bytes; i++)
390 if (!isxdigit (*s))
391 return false;
392 ++s;
394 return *s == '\0';
397 /* An interface to the function, DIGEST_STREAM.
398 Operate on FILENAME (it may be "-").
400 *BINARY indicates whether the file is binary. BINARY < 0 means it
401 depends on whether binary mode makes any difference and the file is
402 a terminal; in that case, clear *BINARY if the file was treated as
403 text because it was a terminal.
405 Put the checksum in *BIN_RESULT, which must be properly aligned.
406 Return true if successful. */
408 static bool
409 digest_file (const char *filename, int *binary, unsigned char *bin_result)
411 FILE *fp;
412 int err;
413 bool is_stdin = STREQ (filename, "-");
415 if (is_stdin)
417 have_read_stdin = true;
418 fp = stdin;
419 if (O_BINARY && *binary)
421 if (*binary < 0)
422 *binary = ! isatty (STDIN_FILENO);
423 if (*binary)
424 xfreopen (NULL, "rb", stdin);
427 else
429 fp = fopen (filename, (O_BINARY && *binary ? "rb" : "r"));
430 if (fp == NULL)
432 error (0, errno, "%s", filename);
433 return false;
437 fadvise (fp, FADVISE_SEQUENTIAL);
439 err = DIGEST_STREAM (fp, bin_result);
440 if (err)
442 error (0, errno, "%s", filename);
443 if (fp != stdin)
444 fclose (fp);
445 return false;
448 if (!is_stdin && fclose (fp) != 0)
450 error (0, errno, "%s", filename);
451 return false;
454 return true;
457 static bool
458 digest_check (const char *checkfile_name)
460 FILE *checkfile_stream;
461 uintmax_t n_misformatted_lines = 0;
462 uintmax_t n_properly_formatted_lines = 0;
463 uintmax_t n_improperly_formatted_lines = 0;
464 uintmax_t n_mismatched_checksums = 0;
465 uintmax_t n_open_or_read_failures = 0;
466 unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES + DIGEST_ALIGN];
467 /* Make sure bin_buffer is properly aligned. */
468 unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
469 uintmax_t line_number;
470 char *line;
471 size_t line_chars_allocated;
472 bool is_stdin = STREQ (checkfile_name, "-");
474 if (is_stdin)
476 have_read_stdin = true;
477 checkfile_name = _("standard input");
478 checkfile_stream = stdin;
480 else
482 checkfile_stream = fopen (checkfile_name, "r");
483 if (checkfile_stream == NULL)
485 error (0, errno, "%s", checkfile_name);
486 return false;
490 line_number = 0;
491 line = NULL;
492 line_chars_allocated = 0;
495 char *filename IF_LINT ( = NULL);
496 int binary;
497 unsigned char *hex_digest IF_LINT ( = NULL);
498 ssize_t line_length;
500 ++line_number;
501 if (line_number == 0)
502 error (EXIT_FAILURE, 0, _("%s: too many checksum lines"),
503 checkfile_name);
505 line_length = getline (&line, &line_chars_allocated, checkfile_stream);
506 if (line_length <= 0)
507 break;
509 /* Ignore comment lines, which begin with a '#' character. */
510 if (line[0] == '#')
511 continue;
513 /* Remove any trailing newline. */
514 if (line[line_length - 1] == '\n')
515 line[--line_length] = '\0';
517 if (! (split_3 (line, line_length, &hex_digest, &binary, &filename)
518 && ! (is_stdin && STREQ (filename, "-"))
519 && hex_digits (hex_digest)))
521 ++n_misformatted_lines;
523 if (warn)
525 error (0, 0,
526 _("%s: %" PRIuMAX
527 ": improperly formatted %s checksum line"),
528 checkfile_name, line_number,
529 DIGEST_TYPE_STRING);
532 ++n_improperly_formatted_lines;
534 else
536 static const char bin2hex[] = { '0', '1', '2', '3',
537 '4', '5', '6', '7',
538 '8', '9', 'a', 'b',
539 'c', 'd', 'e', 'f' };
540 bool ok;
542 ++n_properly_formatted_lines;
544 ok = digest_file (filename, &binary, bin_buffer);
546 if (!ok)
548 ++n_open_or_read_failures;
549 if (!status_only)
551 printf (_("%s: FAILED open or read\n"), filename);
554 else
556 size_t digest_bin_bytes = digest_hex_bytes / 2;
557 size_t cnt;
558 /* Compare generated binary number with text representation
559 in check file. Ignore case of hex digits. */
560 for (cnt = 0; cnt < digest_bin_bytes; ++cnt)
562 if (tolower (hex_digest[2 * cnt])
563 != bin2hex[bin_buffer[cnt] >> 4]
564 || (tolower (hex_digest[2 * cnt + 1])
565 != (bin2hex[bin_buffer[cnt] & 0xf])))
566 break;
568 if (cnt != digest_bin_bytes)
569 ++n_mismatched_checksums;
571 if (!status_only)
573 if (cnt != digest_bin_bytes)
574 printf ("%s: %s\n", filename, _("FAILED"));
575 else if (!quiet)
576 printf ("%s: %s\n", filename, _("OK"));
581 while (!feof (checkfile_stream) && !ferror (checkfile_stream));
583 free (line);
585 if (ferror (checkfile_stream))
587 error (0, 0, _("%s: read error"), checkfile_name);
588 return false;
591 if (!is_stdin && fclose (checkfile_stream) != 0)
593 error (0, errno, "%s", checkfile_name);
594 return false;
597 if (n_properly_formatted_lines == 0)
599 /* Warn if no tests are found. */
600 error (0, 0, _("%s: no properly formatted %s checksum lines found"),
601 checkfile_name, DIGEST_TYPE_STRING);
603 else
605 if (!status_only)
607 if (n_misformatted_lines != 0)
608 error (0, 0,
609 (ngettext
610 ("WARNING: %" PRIuMAX " line is improperly formatted",
611 "WARNING: %" PRIuMAX " lines are improperly formatted",
612 select_plural (n_misformatted_lines))),
613 n_misformatted_lines);
615 if (n_open_or_read_failures != 0)
616 error (0, 0,
617 (ngettext
618 ("WARNING: %" PRIuMAX " listed file could not be read",
619 "WARNING: %" PRIuMAX " listed files could not be read",
620 select_plural (n_open_or_read_failures))),
621 n_open_or_read_failures);
623 if (n_mismatched_checksums != 0)
624 error (0, 0,
625 (ngettext
626 ("WARNING: %" PRIuMAX " computed checksum did NOT match",
627 "WARNING: %" PRIuMAX " computed checksums did NOT match",
628 select_plural (n_mismatched_checksums))),
629 n_mismatched_checksums);
633 return (n_properly_formatted_lines != 0
634 && n_mismatched_checksums == 0
635 && n_open_or_read_failures == 0
636 && (!strict || n_improperly_formatted_lines == 0));
640 main (int argc, char **argv)
642 unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES + DIGEST_ALIGN];
643 /* Make sure bin_buffer is properly aligned. */
644 unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
645 bool do_check = false;
646 int opt;
647 bool ok = true;
648 int binary = -1;
650 /* Setting values of global variables. */
651 initialize_main (&argc, &argv);
652 set_program_name (argv[0]);
653 setlocale (LC_ALL, "");
654 bindtextdomain (PACKAGE, LOCALEDIR);
655 textdomain (PACKAGE);
657 atexit (close_stdout);
659 /* Line buffer stdout to ensure lines are written atomically and immediately
660 so that processes running in parallel do not intersperse their output. */
661 setvbuf (stdout, NULL, _IOLBF, 0);
663 while ((opt = getopt_long (argc, argv, "bctw", long_options, NULL)) != -1)
664 switch (opt)
666 case 'b':
667 binary = 1;
668 break;
669 case 'c':
670 do_check = true;
671 break;
672 case STATUS_OPTION:
673 status_only = true;
674 warn = false;
675 quiet = false;
676 break;
677 case 't':
678 binary = 0;
679 break;
680 case 'w':
681 status_only = false;
682 warn = true;
683 quiet = false;
684 break;
685 case QUIET_OPTION:
686 status_only = false;
687 warn = false;
688 quiet = true;
689 break;
690 case STRICT_OPTION:
691 strict = true;
692 break;
693 case_GETOPT_HELP_CHAR;
694 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
695 default:
696 usage (EXIT_FAILURE);
699 min_digest_line_length = MIN_DIGEST_LINE_LENGTH;
700 digest_hex_bytes = DIGEST_HEX_BYTES;
702 if (0 <= binary && do_check)
704 error (0, 0, _("the --binary and --text options are meaningless when "
705 "verifying checksums"));
706 usage (EXIT_FAILURE);
709 if (status_only && !do_check)
711 error (0, 0,
712 _("the --status option is meaningful only when verifying checksums"));
713 usage (EXIT_FAILURE);
716 if (warn && !do_check)
718 error (0, 0,
719 _("the --warn option is meaningful only when verifying checksums"));
720 usage (EXIT_FAILURE);
723 if (quiet && !do_check)
725 error (0, 0,
726 _("the --quiet option is meaningful only when verifying checksums"));
727 usage (EXIT_FAILURE);
730 if (strict & !do_check)
732 error (0, 0,
733 _("the --strict option is meaningful only when verifying checksums"));
734 usage (EXIT_FAILURE);
737 if (!O_BINARY && binary < 0)
738 binary = 0;
740 if (optind == argc)
741 argv[argc++] = bad_cast ("-");
743 for (; optind < argc; ++optind)
745 char *file = argv[optind];
747 if (do_check)
748 ok &= digest_check (file);
749 else
751 int file_is_binary = binary;
753 if (! digest_file (file, &file_is_binary, bin_buffer))
754 ok = false;
755 else
757 size_t i;
759 /* Output a leading backslash if the file name contains
760 a newline or backslash. */
761 if (strchr (file, '\n') || strchr (file, '\\'))
762 putchar ('\\');
764 for (i = 0; i < (digest_hex_bytes / 2); ++i)
765 printf ("%02x", bin_buffer[i]);
767 putchar (' ');
768 if (file_is_binary)
769 putchar ('*');
770 else
771 putchar (' ');
773 /* Translate each NEWLINE byte to the string, "\\n",
774 and each backslash to "\\\\". */
775 for (i = 0; i < strlen (file); ++i)
777 switch (file[i])
779 case '\n':
780 fputs ("\\n", stdout);
781 break;
783 case '\\':
784 fputs ("\\\\", stdout);
785 break;
787 default:
788 putchar (file[i]);
789 break;
792 putchar ('\n');
797 if (have_read_stdin && fclose (stdin) == EOF)
798 error (EXIT_FAILURE, errno, _("standard input"));
800 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);