Handle invalid sparse entries in pax header
[tar.git] / src / sparse.c
blob7587edb2bcec6f6e0c1c109af0a03759cd9922b4
1 /* Functions for dealing with sparse files
3 Copyright 2003-2021 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any later
8 version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program. If not, see <http://www.gnu.org/licenses/>. */
18 #include <system.h>
19 #include <inttostr.h>
20 #include <quotearg.h>
21 #include "common.h"
23 struct tar_sparse_file;
24 static bool sparse_select_optab (struct tar_sparse_file *file);
26 enum sparse_scan_state
28 scan_begin,
29 scan_block,
30 scan_end
33 struct tar_sparse_optab
35 bool (*init) (struct tar_sparse_file *);
36 bool (*done) (struct tar_sparse_file *);
37 bool (*sparse_member_p) (struct tar_sparse_file *);
38 bool (*dump_header) (struct tar_sparse_file *);
39 bool (*fixup_header) (struct tar_sparse_file *);
40 bool (*decode_header) (struct tar_sparse_file *);
41 bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state,
42 void *);
43 bool (*dump_region) (struct tar_sparse_file *, size_t);
44 bool (*extract_region) (struct tar_sparse_file *, size_t);
47 struct tar_sparse_file
49 int fd; /* File descriptor */
50 bool seekable; /* Is fd seekable? */
51 off_t offset; /* Current offset in fd if seekable==false.
52 Otherwise unused */
53 off_t dumped_size; /* Number of bytes actually written
54 to the archive */
55 struct tar_stat_info *stat_info; /* Information about the file */
56 struct tar_sparse_optab const *optab; /* Operation table */
57 void *closure; /* Any additional data optab calls might
58 require */
61 /* Dump zeros to file->fd until offset is reached. It is used instead of
62 lseek if the output file is not seekable */
63 static bool
64 dump_zeros (struct tar_sparse_file *file, off_t offset)
66 static char const zero_buf[BLOCKSIZE];
68 if (offset < file->offset)
70 errno = EINVAL;
71 return false;
74 while (file->offset < offset)
76 size_t size = (BLOCKSIZE < offset - file->offset
77 ? BLOCKSIZE
78 : offset - file->offset);
79 ssize_t wrbytes;
81 wrbytes = write (file->fd, zero_buf, size);
82 if (wrbytes <= 0)
84 if (wrbytes == 0)
85 errno = EINVAL;
86 return false;
88 file->offset += wrbytes;
91 return true;
94 static bool
95 tar_sparse_member_p (struct tar_sparse_file *file)
97 if (file->optab->sparse_member_p)
98 return file->optab->sparse_member_p (file);
99 return false;
102 static bool
103 tar_sparse_init (struct tar_sparse_file *file)
105 memset (file, 0, sizeof *file);
107 if (!sparse_select_optab (file))
108 return false;
110 if (file->optab->init)
111 return file->optab->init (file);
113 return true;
116 static bool
117 tar_sparse_done (struct tar_sparse_file *file)
119 if (file->optab->done)
120 return file->optab->done (file);
121 return true;
124 static bool
125 tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state,
126 void *block)
128 if (file->optab->scan_block)
129 return file->optab->scan_block (file, state, block);
130 return true;
133 static bool
134 tar_sparse_dump_region (struct tar_sparse_file *file, size_t i)
136 if (file->optab->dump_region)
137 return file->optab->dump_region (file, i);
138 return false;
141 static bool
142 tar_sparse_extract_region (struct tar_sparse_file *file, size_t i)
144 if (file->optab->extract_region)
145 return file->optab->extract_region (file, i);
146 return false;
149 static bool
150 tar_sparse_dump_header (struct tar_sparse_file *file)
152 if (file->optab->dump_header)
153 return file->optab->dump_header (file);
154 return false;
157 static bool
158 tar_sparse_decode_header (struct tar_sparse_file *file)
160 if (file->optab->decode_header)
161 return file->optab->decode_header (file);
162 return true;
165 static bool
166 tar_sparse_fixup_header (struct tar_sparse_file *file)
168 if (file->optab->fixup_header)
169 return file->optab->fixup_header (file);
170 return true;
174 static bool
175 lseek_or_error (struct tar_sparse_file *file, off_t offset)
177 if (file->seekable
178 ? lseek (file->fd, offset, SEEK_SET) < 0
179 : ! dump_zeros (file, offset))
181 seek_diag_details (file->stat_info->orig_file_name, offset);
182 return false;
184 return true;
187 /* Takes a blockful of data and basically cruises through it to see if
188 it's made *entirely* of zeros, returning a 0 the instant it finds
189 something that is a nonzero, i.e., useful data. */
190 static bool
191 zero_block_p (char const *buffer, size_t size)
193 while (size--)
194 if (*buffer++)
195 return false;
196 return true;
199 static void
200 sparse_add_map (struct tar_stat_info *st, struct sp_array const *sp)
202 struct sp_array *sparse_map = st->sparse_map;
203 size_t avail = st->sparse_map_avail;
204 if (avail == st->sparse_map_size)
205 st->sparse_map = sparse_map =
206 x2nrealloc (sparse_map, &st->sparse_map_size, sizeof *sparse_map);
207 sparse_map[avail] = *sp;
208 st->sparse_map_avail = avail + 1;
211 /* Scan the sparse file byte-by-byte and create its map. */
212 static bool
213 sparse_scan_file_raw (struct tar_sparse_file *file)
215 struct tar_stat_info *st = file->stat_info;
216 int fd = file->fd;
217 char buffer[BLOCKSIZE];
218 size_t count = 0;
219 off_t offset = 0;
220 struct sp_array sp = {0, 0};
222 st->archive_file_size = 0;
224 if (!tar_sparse_scan (file, scan_begin, NULL))
225 return false;
227 while ((count = blocking_read (fd, buffer, sizeof buffer)) != 0
228 && count != SAFE_READ_ERROR)
230 /* Analyze the block. */
231 if (zero_block_p (buffer, count))
233 if (sp.numbytes)
235 sparse_add_map (st, &sp);
236 sp.numbytes = 0;
237 if (!tar_sparse_scan (file, scan_block, NULL))
238 return false;
241 else
243 if (sp.numbytes == 0)
244 sp.offset = offset;
245 sp.numbytes += count;
246 st->archive_file_size += count;
247 if (!tar_sparse_scan (file, scan_block, buffer))
248 return false;
251 offset += count;
254 /* save one more sparse segment of length 0 to indicate that
255 the file ends with a hole */
256 if (sp.numbytes == 0)
257 sp.offset = offset;
259 sparse_add_map (st, &sp);
260 st->archive_file_size += count;
261 return tar_sparse_scan (file, scan_end, NULL);
264 static bool
265 sparse_scan_file_wholesparse (struct tar_sparse_file *file)
267 struct tar_stat_info *st = file->stat_info;
268 struct sp_array sp = {0, 0};
270 /* Note that this function is called only for truly sparse files of size >= 1
271 block size (checked via ST_IS_SPARSE before). See the thread
272 http://www.mail-archive.com/bug-tar@gnu.org/msg04209.html for more info */
273 if (ST_NBLOCKS (st->stat) == 0)
275 st->archive_file_size = 0;
276 sp.offset = st->stat.st_size;
277 sparse_add_map (st, &sp);
278 return true;
281 return false;
284 #ifdef SEEK_HOLE
285 /* Try to engage SEEK_HOLE/SEEK_DATA feature. */
286 static bool
287 sparse_scan_file_seek (struct tar_sparse_file *file)
289 struct tar_stat_info *st = file->stat_info;
290 int fd = file->fd;
291 struct sp_array sp = {0, 0};
292 off_t offset = 0;
293 off_t data_offset;
294 off_t hole_offset;
296 st->archive_file_size = 0;
298 for (;;)
300 /* locate first chunk of data */
301 data_offset = lseek (fd, offset, SEEK_DATA);
303 if (data_offset == (off_t)-1)
304 /* ENXIO == EOF; error otherwise */
306 if (errno == ENXIO)
308 /* file ends with hole, add one more empty chunk of data */
309 sp.numbytes = 0;
310 sp.offset = st->stat.st_size;
311 sparse_add_map (st, &sp);
312 return true;
314 return false;
317 hole_offset = lseek (fd, data_offset, SEEK_HOLE);
319 /* according to specs, if FS does not fully support
320 SEEK_DATA/SEEK_HOLE it may just implement kind of "wrapper" around
321 classic lseek() call. We must detect it here and try to use other
322 hole-detection methods. */
323 if (offset == 0 /* first loop */
324 && data_offset == 0
325 && hole_offset == st->stat.st_size)
327 lseek (fd, 0, SEEK_SET);
328 return false;
331 sp.offset = data_offset;
332 sp.numbytes = hole_offset - data_offset;
333 sparse_add_map (st, &sp);
335 st->archive_file_size += sp.numbytes;
336 offset = hole_offset;
339 #endif
341 static bool
342 sparse_scan_file (struct tar_sparse_file *file)
344 /* always check for completely sparse files */
345 if (sparse_scan_file_wholesparse (file))
346 return true;
348 switch (hole_detection)
350 case HOLE_DETECTION_DEFAULT:
351 case HOLE_DETECTION_SEEK:
352 #ifdef SEEK_HOLE
353 if (sparse_scan_file_seek (file))
354 return true;
355 #else
356 if (hole_detection == HOLE_DETECTION_SEEK)
357 WARN((0, 0,
358 _("\"seek\" hole detection is not supported, using \"raw\".")));
359 /* fall back to "raw" for this and all other files */
360 hole_detection = HOLE_DETECTION_RAW;
361 #endif
362 FALLTHROUGH;
363 case HOLE_DETECTION_RAW:
364 if (sparse_scan_file_raw (file))
365 return true;
368 return false;
371 static struct tar_sparse_optab const oldgnu_optab;
372 static struct tar_sparse_optab const star_optab;
373 static struct tar_sparse_optab const pax_optab;
375 static bool
376 sparse_select_optab (struct tar_sparse_file *file)
378 switch (current_format == DEFAULT_FORMAT ? archive_format : current_format)
380 case V7_FORMAT:
381 case USTAR_FORMAT:
382 return false;
384 case OLDGNU_FORMAT:
385 case GNU_FORMAT: /*FIXME: This one should disappear? */
386 file->optab = &oldgnu_optab;
387 break;
389 case POSIX_FORMAT:
390 file->optab = &pax_optab;
391 break;
393 case STAR_FORMAT:
394 file->optab = &star_optab;
395 break;
397 default:
398 return false;
400 return true;
403 static bool
404 sparse_dump_region (struct tar_sparse_file *file, size_t i)
406 union block *blk;
407 off_t bytes_left = file->stat_info->sparse_map[i].numbytes;
409 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
410 return false;
412 while (bytes_left > 0)
414 size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left;
415 size_t bytes_read;
417 blk = find_next_block ();
418 bytes_read = safe_read (file->fd, blk->buffer, bufsize);
419 if (bytes_read == SAFE_READ_ERROR)
421 read_diag_details (file->stat_info->orig_file_name,
422 (file->stat_info->sparse_map[i].offset
423 + file->stat_info->sparse_map[i].numbytes
424 - bytes_left),
425 bufsize);
426 return false;
428 else if (bytes_read == 0)
430 char buf[UINTMAX_STRSIZE_BOUND];
431 struct stat st;
432 size_t n;
433 if (fstat (file->fd, &st) == 0)
434 n = file->stat_info->stat.st_size - st.st_size;
435 else
436 n = file->stat_info->stat.st_size
437 - (file->stat_info->sparse_map[i].offset
438 + file->stat_info->sparse_map[i].numbytes
439 - bytes_left);
441 WARNOPT (WARN_FILE_SHRANK,
442 (0, 0,
443 ngettext ("%s: File shrank by %s byte; padding with zeros",
444 "%s: File shrank by %s bytes; padding with zeros",
446 quotearg_colon (file->stat_info->orig_file_name),
447 STRINGIFY_BIGINT (n, buf)));
448 if (! ignore_failed_read_option)
449 set_exit_status (TAREXIT_DIFFERS);
450 return false;
453 memset (blk->buffer + bytes_read, 0, BLOCKSIZE - bytes_read);
454 bytes_left -= bytes_read;
455 file->dumped_size += bytes_read;
456 set_next_block_after (blk);
459 return true;
462 static bool
463 sparse_extract_region (struct tar_sparse_file *file, size_t i)
465 off_t write_size;
467 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
468 return false;
470 write_size = file->stat_info->sparse_map[i].numbytes;
472 if (write_size == 0)
474 /* Last block of the file is a hole */
475 if (file->seekable && sys_truncate (file->fd))
476 truncate_warn (file->stat_info->orig_file_name);
478 else while (write_size > 0)
480 size_t count;
481 size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size;
482 union block *blk = find_next_block ();
483 if (!blk)
485 ERROR ((0, 0, _("Unexpected EOF in archive")));
486 return false;
488 set_next_block_after (blk);
489 file->dumped_size += BLOCKSIZE;
490 count = blocking_write (file->fd, blk->buffer, wrbytes);
491 write_size -= count;
492 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
493 file->offset += count;
494 if (count != wrbytes)
496 write_error_details (file->stat_info->orig_file_name,
497 count, wrbytes);
498 return false;
501 return true;
506 /* Interface functions */
507 enum dump_status
508 sparse_dump_file (int fd, struct tar_stat_info *st)
510 bool rc;
511 struct tar_sparse_file file;
513 if (!tar_sparse_init (&file))
514 return dump_status_not_implemented;
516 file.stat_info = st;
517 file.fd = fd;
518 file.seekable = true; /* File *must* be seekable for dump to work */
520 rc = sparse_scan_file (&file);
521 if (rc && file.optab->dump_region)
523 tar_sparse_dump_header (&file);
525 if (fd >= 0)
527 size_t i;
529 mv_begin_write (file.stat_info->file_name,
530 file.stat_info->stat.st_size,
531 file.stat_info->archive_file_size - file.dumped_size);
532 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
533 rc = tar_sparse_dump_region (&file, i);
537 pad_archive (file.stat_info->archive_file_size - file.dumped_size);
538 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
541 bool
542 sparse_member_p (struct tar_stat_info *st)
544 struct tar_sparse_file file;
546 if (!tar_sparse_init (&file))
547 return false;
548 file.stat_info = st;
549 return tar_sparse_member_p (&file);
552 bool
553 sparse_fixup_header (struct tar_stat_info *st)
555 struct tar_sparse_file file;
557 if (!tar_sparse_init (&file))
558 return false;
559 file.stat_info = st;
560 return tar_sparse_fixup_header (&file);
563 enum dump_status
564 sparse_extract_file (int fd, struct tar_stat_info *st, off_t *size)
566 bool rc = true;
567 struct tar_sparse_file file;
568 size_t i;
570 if (!tar_sparse_init (&file))
572 *size = st->stat.st_size;
573 return dump_status_not_implemented;
576 file.stat_info = st;
577 file.fd = fd;
578 file.seekable = lseek (fd, 0, SEEK_SET) == 0;
579 file.offset = 0;
581 rc = tar_sparse_decode_header (&file);
582 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
583 rc = tar_sparse_extract_region (&file, i);
584 *size = file.stat_info->archive_file_size - file.dumped_size;
585 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
588 enum dump_status
589 sparse_skip_file (struct tar_stat_info *st)
591 bool rc = true;
592 struct tar_sparse_file file;
594 if (!tar_sparse_init (&file))
595 return dump_status_not_implemented;
597 file.stat_info = st;
598 file.fd = -1;
600 rc = tar_sparse_decode_header (&file);
601 skip_file (file.stat_info->archive_file_size - file.dumped_size);
602 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
606 static bool
607 check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end)
609 if (!lseek_or_error (file, beg))
610 return false;
612 while (beg < end)
614 size_t bytes_read;
615 size_t rdsize = BLOCKSIZE < end - beg ? BLOCKSIZE : end - beg;
616 char diff_buffer[BLOCKSIZE];
618 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
619 if (bytes_read == SAFE_READ_ERROR)
621 read_diag_details (file->stat_info->orig_file_name,
622 beg,
623 rdsize);
624 return false;
626 else if (bytes_read == 0)
628 report_difference (file->stat_info, _("Size differs"));
629 return false;
632 if (!zero_block_p (diff_buffer, bytes_read))
634 char begbuf[INT_BUFSIZE_BOUND (off_t)];
635 report_difference (file->stat_info,
636 _("File fragment at %s is not a hole"),
637 offtostr (beg, begbuf));
638 return false;
641 beg += bytes_read;
644 return true;
647 static bool
648 check_data_region (struct tar_sparse_file *file, size_t i)
650 off_t size_left;
652 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
653 return false;
654 size_left = file->stat_info->sparse_map[i].numbytes;
655 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
657 while (size_left > 0)
659 size_t bytes_read;
660 size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left;
661 char diff_buffer[BLOCKSIZE];
663 union block *blk = find_next_block ();
664 if (!blk)
666 ERROR ((0, 0, _("Unexpected EOF in archive")));
667 return false;
669 set_next_block_after (blk);
670 file->dumped_size += BLOCKSIZE;
671 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
672 if (bytes_read == SAFE_READ_ERROR)
674 read_diag_details (file->stat_info->orig_file_name,
675 (file->stat_info->sparse_map[i].offset
676 + file->stat_info->sparse_map[i].numbytes
677 - size_left),
678 rdsize);
679 return false;
681 else if (bytes_read == 0)
683 report_difference (&current_stat_info, _("Size differs"));
684 return false;
686 size_left -= bytes_read;
687 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
688 if (memcmp (blk->buffer, diff_buffer, bytes_read))
690 report_difference (file->stat_info, _("Contents differ"));
691 return false;
694 return true;
697 bool
698 sparse_diff_file (int fd, struct tar_stat_info *st)
700 bool rc = true;
701 struct tar_sparse_file file;
702 size_t i;
703 off_t offset = 0;
705 if (!tar_sparse_init (&file))
706 return false;
708 file.stat_info = st;
709 file.fd = fd;
710 file.seekable = true; /* File *must* be seekable for compare to work */
712 rc = tar_sparse_decode_header (&file);
713 mv_begin_read (st);
714 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
716 rc = check_sparse_region (&file,
717 offset, file.stat_info->sparse_map[i].offset)
718 && check_data_region (&file, i);
719 offset = file.stat_info->sparse_map[i].offset
720 + file.stat_info->sparse_map[i].numbytes;
723 if (!rc)
724 skip_file (file.stat_info->archive_file_size - file.dumped_size);
725 mv_end ();
727 tar_sparse_done (&file);
728 return rc;
732 /* Old GNU Format. The sparse file information is stored in the
733 oldgnu_header in the following manner:
735 The header is marked with type 'S'. Its 'size' field contains
736 the cumulative size of all non-empty blocks of the file. The
737 actual file size is stored in 'realsize' member of oldgnu_header.
739 The map of the file is stored in a list of 'struct sparse'.
740 Each struct contains offset to the block of data and its
741 size (both as octal numbers). The first file header contains
742 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
743 contains more structs, then the field 'isextended' of the main
744 header is set to 1 (binary) and the 'struct sparse_header'
745 header follows, containing at most 21 following structs
746 (SPARSES_IN_SPARSE_HEADER). If more structs follow, 'isextended'
747 field of the extended header is set and next next extension header
748 follows, etc... */
750 enum oldgnu_add_status
752 add_ok,
753 add_finish,
754 add_fail
757 static bool
758 oldgnu_sparse_member_p (struct tar_sparse_file *file MAYBE_UNUSED)
760 return current_header->header.typeflag == GNUTYPE_SPARSE;
763 /* Add a sparse item to the sparse file and its obstack */
764 static enum oldgnu_add_status
765 oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s)
767 struct sp_array sp;
769 if (s->numbytes[0] == '\0')
770 return add_finish;
771 sp.offset = OFF_FROM_HEADER (s->offset);
772 sp.numbytes = OFF_FROM_HEADER (s->numbytes);
773 if (sp.offset < 0 || sp.numbytes < 0
774 || INT_ADD_OVERFLOW (sp.offset, sp.numbytes)
775 || file->stat_info->stat.st_size < sp.offset + sp.numbytes
776 || file->stat_info->archive_file_size < 0)
777 return add_fail;
779 sparse_add_map (file->stat_info, &sp);
780 return add_ok;
783 static bool
784 oldgnu_fixup_header (struct tar_sparse_file *file)
786 /* NOTE! st_size was initialized from the header
787 which actually contains archived size. The following fixes it */
788 off_t realsize = OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
789 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
790 file->stat_info->stat.st_size = max (0, realsize);
791 return 0 <= realsize;
794 /* Convert old GNU format sparse data to internal representation */
795 static bool
796 oldgnu_get_sparse_info (struct tar_sparse_file *file)
798 size_t i;
799 union block *h = current_header;
800 int ext_p;
801 enum oldgnu_add_status rc;
803 file->stat_info->sparse_map_avail = 0;
804 for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++)
806 rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]);
807 if (rc != add_ok)
808 break;
811 for (ext_p = h->oldgnu_header.isextended;
812 rc == add_ok && ext_p; ext_p = h->sparse_header.isextended)
814 h = find_next_block ();
815 if (!h)
817 ERROR ((0, 0, _("Unexpected EOF in archive")));
818 return false;
820 set_next_block_after (h);
821 for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++)
822 rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]);
825 if (rc == add_fail)
827 ERROR ((0, 0, _("%s: invalid sparse archive member"),
828 file->stat_info->orig_file_name));
829 return false;
831 return true;
834 static void
835 oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex,
836 struct sparse *sp, size_t sparse_size)
838 for (; *pindex < file->stat_info->sparse_map_avail
839 && sparse_size > 0; sparse_size--, sp++, ++*pindex)
841 OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset,
842 sp->offset);
843 OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
844 sp->numbytes);
848 static bool
849 oldgnu_dump_header (struct tar_sparse_file *file)
851 off_t block_ordinal = current_block_ordinal ();
852 union block *blk;
853 size_t i;
855 blk = start_header (file->stat_info);
856 blk->header.typeflag = GNUTYPE_SPARSE;
857 if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER)
858 blk->oldgnu_header.isextended = 1;
860 /* Store the real file size */
861 OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize);
862 /* Store the effective (shrunken) file size */
863 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
865 i = 0;
866 oldgnu_store_sparse_info (file, &i,
867 blk->oldgnu_header.sp,
868 SPARSES_IN_OLDGNU_HEADER);
869 blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail;
870 finish_header (file->stat_info, blk, block_ordinal);
872 while (i < file->stat_info->sparse_map_avail)
874 blk = find_next_block ();
875 memset (blk->buffer, 0, BLOCKSIZE);
876 oldgnu_store_sparse_info (file, &i,
877 blk->sparse_header.sp,
878 SPARSES_IN_SPARSE_HEADER);
879 if (i < file->stat_info->sparse_map_avail)
880 blk->sparse_header.isextended = 1;
881 set_next_block_after (blk);
883 return true;
886 static struct tar_sparse_optab const oldgnu_optab = {
887 NULL, /* No init function */
888 NULL, /* No done function */
889 oldgnu_sparse_member_p,
890 oldgnu_dump_header,
891 oldgnu_fixup_header,
892 oldgnu_get_sparse_info,
893 NULL, /* No scan_block function */
894 sparse_dump_region,
895 sparse_extract_region,
899 /* Star */
901 static bool
902 star_sparse_member_p (struct tar_sparse_file *file MAYBE_UNUSED)
904 return current_header->header.typeflag == GNUTYPE_SPARSE;
907 static bool
908 star_fixup_header (struct tar_sparse_file *file)
910 /* NOTE! st_size was initialized from the header
911 which actually contains archived size. The following fixes it */
912 off_t realsize = OFF_FROM_HEADER (current_header->star_in_header.realsize);
913 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
914 file->stat_info->stat.st_size = max (0, realsize);
915 return 0 <= realsize;
918 /* Convert STAR format sparse data to internal representation */
919 static bool
920 star_get_sparse_info (struct tar_sparse_file *file)
922 size_t i;
923 union block *h = current_header;
924 int ext_p;
925 enum oldgnu_add_status rc = add_ok;
927 file->stat_info->sparse_map_avail = 0;
929 if (h->star_in_header.prefix[0] == '\0'
930 && h->star_in_header.sp[0].offset[10] != '\0')
932 /* Old star format */
933 for (i = 0; i < SPARSES_IN_STAR_HEADER; i++)
935 rc = oldgnu_add_sparse (file, &h->star_in_header.sp[i]);
936 if (rc != add_ok)
937 break;
939 ext_p = h->star_in_header.isextended;
941 else
942 ext_p = 1;
944 for (; rc == add_ok && ext_p; ext_p = h->star_ext_header.isextended)
946 h = find_next_block ();
947 if (!h)
949 ERROR ((0, 0, _("Unexpected EOF in archive")));
950 return false;
952 set_next_block_after (h);
953 for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++)
954 rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]);
955 file->dumped_size += BLOCKSIZE;
958 if (rc == add_fail)
960 ERROR ((0, 0, _("%s: invalid sparse archive member"),
961 file->stat_info->orig_file_name));
962 return false;
964 return true;
968 static struct tar_sparse_optab const star_optab = {
969 NULL, /* No init function */
970 NULL, /* No done function */
971 star_sparse_member_p,
972 NULL,
973 star_fixup_header,
974 star_get_sparse_info,
975 NULL, /* No scan_block function */
976 NULL, /* No dump region function */
977 sparse_extract_region,
981 /* GNU PAX sparse file format. There are several versions:
983 * 0.0
985 The initial version of sparse format used by tar 1.14-1.15.1.
986 The sparse file map is stored in x header:
988 GNU.sparse.size Real size of the stored file
989 GNU.sparse.numblocks Number of blocks in the sparse map
990 repeat numblocks time
991 GNU.sparse.offset Offset of the next data block
992 GNU.sparse.numbytes Size of the next data block
993 end repeat
995 This has been reported as conflicting with the POSIX specs. The reason is
996 that offsets and sizes of non-zero data blocks were stored in multiple
997 instances of GNU.sparse.offset/GNU.sparse.numbytes variables, whereas
998 POSIX requires the latest occurrence of the variable to override all
999 previous occurrences.
1001 To avoid this incompatibility two following versions were introduced.
1003 * 0.1
1005 Used by tar 1.15.2 -- 1.15.91 (alpha releases).
1007 The sparse file map is stored in
1008 x header:
1010 GNU.sparse.size Real size of the stored file
1011 GNU.sparse.numblocks Number of blocks in the sparse map
1012 GNU.sparse.map Map of non-null data chunks. A string consisting
1013 of comma-separated values "offset,size[,offset,size]..."
1015 The resulting GNU.sparse.map string can be *very* long. While POSIX does not
1016 impose any limit on the length of a x header variable, this can confuse some
1017 tars.
1019 * 1.0
1021 Starting from this version, the exact sparse format version is specified
1022 explicitly in the header using the following variables:
1024 GNU.sparse.major Major version
1025 GNU.sparse.minor Minor version
1027 X header keeps the following variables:
1029 GNU.sparse.name Real file name of the sparse file
1030 GNU.sparse.realsize Real size of the stored file (corresponds to the old
1031 GNU.sparse.size variable)
1033 The name field of the ustar header is constructed using the pattern
1034 "%d/GNUSparseFile.%p/%f".
1036 The sparse map itself is stored in the file data block, preceding the actual
1037 file data. It consists of a series of octal numbers of arbitrary length,
1038 delimited by newlines. The map is padded with nulls to the nearest block
1039 boundary.
1041 The first number gives the number of entries in the map. Following are map
1042 entries, each one consisting of two numbers giving the offset and size of
1043 the data block it describes.
1045 The format is designed in such a way that non-posix aware tars and tars not
1046 supporting GNU.sparse.* keywords will extract each sparse file in its
1047 condensed form with the file map attached and will place it into a separate
1048 directory. Then, using a simple program it would be possible to expand the
1049 file to its original form even without GNU tar.
1051 Bu default, v.1.0 archives are created. To use other formats,
1052 --sparse-version option is provided. Additionally, v.0.0 can be obtained
1053 by deleting GNU.sparse.map from 0.1 format: --sparse-version 0.1
1054 --pax-option delete=GNU.sparse.map
1057 static bool
1058 pax_sparse_member_p (struct tar_sparse_file *file)
1060 return file->stat_info->sparse_map_avail > 0
1061 || file->stat_info->sparse_major > 0;
1064 /* Start a header that uses the effective (shrunken) file size. */
1065 static union block *
1066 pax_start_header (struct tar_stat_info *st)
1068 off_t realsize = st->stat.st_size;
1069 union block *blk;
1070 st->stat.st_size = st->archive_file_size;
1071 blk = start_header (st);
1072 st->stat.st_size = realsize;
1073 return blk;
1076 static bool
1077 pax_dump_header_0 (struct tar_sparse_file *file)
1079 off_t block_ordinal = current_block_ordinal ();
1080 union block *blk;
1081 size_t i;
1082 char nbuf[UINTMAX_STRSIZE_BOUND];
1083 struct sp_array *map = file->stat_info->sparse_map;
1084 char *save_file_name = NULL;
1086 /* Store the real file size */
1087 xheader_store ("GNU.sparse.size", file->stat_info, NULL);
1088 xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL);
1090 if (xheader_keyword_deleted_p ("GNU.sparse.map")
1091 || tar_sparse_minor == 0)
1093 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1095 xheader_store ("GNU.sparse.offset", file->stat_info, &i);
1096 xheader_store ("GNU.sparse.numbytes", file->stat_info, &i);
1099 else
1101 xheader_store ("GNU.sparse.name", file->stat_info, NULL);
1102 save_file_name = file->stat_info->file_name;
1103 file->stat_info->file_name = xheader_format_name (file->stat_info,
1104 "%d/GNUSparseFile.%p/%f", 0);
1106 xheader_string_begin (&file->stat_info->xhdr);
1107 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1109 if (i)
1110 xheader_string_add (&file->stat_info->xhdr, ",");
1111 xheader_string_add (&file->stat_info->xhdr,
1112 umaxtostr (map[i].offset, nbuf));
1113 xheader_string_add (&file->stat_info->xhdr, ",");
1114 xheader_string_add (&file->stat_info->xhdr,
1115 umaxtostr (map[i].numbytes, nbuf));
1117 if (!xheader_string_end (&file->stat_info->xhdr,
1118 "GNU.sparse.map"))
1120 free (file->stat_info->file_name);
1121 file->stat_info->file_name = save_file_name;
1122 return false;
1125 blk = pax_start_header (file->stat_info);
1126 finish_header (file->stat_info, blk, block_ordinal);
1127 if (save_file_name)
1129 free (file->stat_info->file_name);
1130 file->stat_info->file_name = save_file_name;
1132 return true;
1135 static bool
1136 pax_dump_header_1 (struct tar_sparse_file *file)
1138 off_t block_ordinal = current_block_ordinal ();
1139 union block *blk;
1140 char *p, *q;
1141 size_t i;
1142 char nbuf[UINTMAX_STRSIZE_BOUND];
1143 off_t size = 0;
1144 struct sp_array *map = file->stat_info->sparse_map;
1145 char *save_file_name = file->stat_info->file_name;
1147 #define COPY_STRING(b,dst,src) do \
1149 char *endp = b->buffer + BLOCKSIZE; \
1150 char const *srcp = src; \
1151 while (*srcp) \
1153 if (dst == endp) \
1155 set_next_block_after (b); \
1156 b = find_next_block (); \
1157 dst = b->buffer; \
1158 endp = b->buffer + BLOCKSIZE; \
1160 *dst++ = *srcp++; \
1162 } while (0)
1164 /* Compute stored file size */
1165 p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
1166 size += strlen (p) + 1;
1167 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1169 p = umaxtostr (map[i].offset, nbuf);
1170 size += strlen (p) + 1;
1171 p = umaxtostr (map[i].numbytes, nbuf);
1172 size += strlen (p) + 1;
1174 size = (size + BLOCKSIZE - 1) / BLOCKSIZE;
1175 file->stat_info->archive_file_size += size * BLOCKSIZE;
1176 file->dumped_size += size * BLOCKSIZE;
1178 /* Store sparse file identification */
1179 xheader_store ("GNU.sparse.major", file->stat_info, NULL);
1180 xheader_store ("GNU.sparse.minor", file->stat_info, NULL);
1181 xheader_store ("GNU.sparse.name", file->stat_info, NULL);
1182 xheader_store ("GNU.sparse.realsize", file->stat_info, NULL);
1184 file->stat_info->file_name =
1185 xheader_format_name (file->stat_info, "%d/GNUSparseFile.%p/%f", 0);
1186 /* Make sure the created header name is shorter than NAME_FIELD_SIZE: */
1187 if (strlen (file->stat_info->file_name) > NAME_FIELD_SIZE)
1188 file->stat_info->file_name[NAME_FIELD_SIZE] = 0;
1190 blk = pax_start_header (file->stat_info);
1191 finish_header (file->stat_info, blk, block_ordinal);
1192 free (file->stat_info->file_name);
1193 file->stat_info->file_name = save_file_name;
1195 blk = find_next_block ();
1196 q = blk->buffer;
1197 p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
1198 COPY_STRING (blk, q, p);
1199 COPY_STRING (blk, q, "\n");
1200 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1202 p = umaxtostr (map[i].offset, nbuf);
1203 COPY_STRING (blk, q, p);
1204 COPY_STRING (blk, q, "\n");
1205 p = umaxtostr (map[i].numbytes, nbuf);
1206 COPY_STRING (blk, q, p);
1207 COPY_STRING (blk, q, "\n");
1209 memset (q, 0, BLOCKSIZE - (q - blk->buffer));
1210 set_next_block_after (blk);
1211 return true;
1214 static bool
1215 pax_dump_header (struct tar_sparse_file *file)
1217 file->stat_info->sparse_major = tar_sparse_major;
1218 file->stat_info->sparse_minor = tar_sparse_minor;
1220 return (file->stat_info->sparse_major == 0) ?
1221 pax_dump_header_0 (file) : pax_dump_header_1 (file);
1224 static bool
1225 decode_num (uintmax_t *num, char const *arg, uintmax_t maxval)
1227 uintmax_t u;
1228 char *arg_lim;
1230 if (!ISDIGIT (*arg))
1231 return false;
1233 errno = 0;
1234 u = strtoumax (arg, &arg_lim, 10);
1236 if (! (u <= maxval && errno != ERANGE) || *arg_lim)
1237 return false;
1239 *num = u;
1240 return true;
1243 static bool
1244 pax_decode_header (struct tar_sparse_file *file)
1246 if (file->stat_info->sparse_major > 0)
1248 uintmax_t u;
1249 char nbuf[UINTMAX_STRSIZE_BOUND];
1250 union block *blk;
1251 char *p;
1252 size_t i;
1253 off_t start;
1255 #define COPY_BUF(b,buf,src) do \
1257 char *endp = b->buffer + BLOCKSIZE; \
1258 char *dst = buf; \
1259 do \
1261 if (dst == buf + UINTMAX_STRSIZE_BOUND -1) \
1263 ERROR ((0, 0, _("%s: numeric overflow in sparse archive member"), \
1264 file->stat_info->orig_file_name)); \
1265 return false; \
1267 if (src == endp) \
1269 set_next_block_after (b); \
1270 b = find_next_block (); \
1271 if (!b) \
1272 FATAL_ERROR ((0, 0, _("Unexpected EOF in archive"))); \
1273 src = b->buffer; \
1274 endp = b->buffer + BLOCKSIZE; \
1276 *dst = *src++; \
1278 while (*dst++ != '\n'); \
1279 dst[-1] = 0; \
1280 } while (0)
1282 start = current_block_ordinal ();
1283 set_next_block_after (current_header);
1284 blk = find_next_block ();
1285 if (!blk)
1286 FATAL_ERROR ((0, 0, _("Unexpected EOF in archive")));
1287 p = blk->buffer;
1288 COPY_BUF (blk,nbuf,p);
1289 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t)))
1291 ERROR ((0, 0, _("%s: malformed sparse archive member"),
1292 file->stat_info->orig_file_name));
1293 return false;
1295 file->stat_info->sparse_map_size = u;
1296 file->stat_info->sparse_map = xcalloc (file->stat_info->sparse_map_size,
1297 sizeof (*file->stat_info->sparse_map));
1298 file->stat_info->sparse_map_avail = 0;
1299 for (i = 0; i < file->stat_info->sparse_map_size; i++)
1301 struct sp_array sp;
1303 COPY_BUF (blk,nbuf,p);
1304 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t)))
1306 ERROR ((0, 0, _("%s: malformed sparse archive member"),
1307 file->stat_info->orig_file_name));
1308 return false;
1310 sp.offset = u;
1311 COPY_BUF (blk,nbuf,p);
1312 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t))
1313 || INT_ADD_OVERFLOW (sp.offset, u)
1314 || file->stat_info->stat.st_size < sp.offset + u)
1316 ERROR ((0, 0, _("%s: malformed sparse archive member"),
1317 file->stat_info->orig_file_name));
1318 return false;
1320 sp.numbytes = u;
1321 sparse_add_map (file->stat_info, &sp);
1323 set_next_block_after (blk);
1325 file->dumped_size += BLOCKSIZE * (current_block_ordinal () - start);
1328 return true;
1331 static struct tar_sparse_optab const pax_optab = {
1332 NULL, /* No init function */
1333 NULL, /* No done function */
1334 pax_sparse_member_p,
1335 pax_dump_header,
1336 NULL,
1337 pax_decode_header,
1338 NULL, /* No scan_block function */
1339 sparse_dump_region,
1340 sparse_extract_region,