shift/BUGS

308 lines
9.9 KiB
Plaintext

KNOWN BUGS
==========
1. A bug exists in the Shift 4.0 and 5.0 tar creation function that
could leave some entries in tar files in a partially corrupted state.
The conditions under which this could occur are very specific, so the
overall percentage of affected tar files is expected to be very low.
To be affected, a directory name within the tar has to be either
(1) a multiple of 512 characters in length or (2) between 155-255
characters in length with a slash at the 100th character from the
end. In the first case, the 512-byte header of the entry immediately
following the directory entry would be corrupted. In the second
case, the 512-byte header as well as an additional 512 bytes (which
may be an additional header, a different header, or the initial 512
bytes of a file's data) would be corrupted. All other entries and
data in affected tar files will be intact and can be extracted
normally.
Because Shift validates tar entries at the end of tar creation,
most transfers in which these conditions were met would have
initially ended in an "error" state with one of the following
messages:
- Invalid tar header checksum
- Invalid tar long link/name data
Note, however, that any transfers that were restarted (via --restart)
after the issue occurred are likely to have completed without further
error, even though the corrupted file(s) would not have been
repaired by the restart operation.
To help determine whether a given tar file has been impacted by this
bug, the perl code following this description can be saved to a file
(e.g. tarcheck.pl) and run on any tar file. Note that the results
of this tool are only meaningful for tar files created with Shift
versions 4.0 or 5.0. Run the tool as follows:
tarcheck.pl data1.tar data2.tar ... dataN.tar
The tool will first attempt to check files using an associated table
of contents (.toc) file. If no such file is found (which normally
occurs only when Shift is invoked without --index-tar), it will then
analyze the actual tar entries to search for corrupted entries.
Files with affected entries found in .toc files will report a message
similar to the following:
ONE OF HEADER OF
/some/file/1
OR HEADER AND FIRST 512 BYTES OF
/some/file/2
IS AFFECTED
Files with affected entries that do not have a corresponding .toc
file will report one of the following messages:
- Invalid tar record at byte N
- Invalid tar header checksum
If neither type of message is displayed, the tar file is not
affected.
##############################
#### BEGIN TAR CHECK CODE ####
##############################
#!/usr/bin/perl
# this program checks one or more tar files given on the command line
# for the Shift tar corruption problem in versions 4.0 and 5.0
use strict;
my $force = shift @ARGV;
if ($force ne '-f' && $force ne '-t') {
unshift(@ARGV, $force);
$force = undef;
}
foreach my $tar (@ARGV) {
print "$tar:\n";
if (! -e $tar) {
print " ...does not exist\n";
} elsif (-e "$tar.toc" && $force ne '-f') {
print " ...reading toc file $tar.toc\n";
check_toc("$tar.toc");
} elsif ($force ne '-t') {
print " ...reading contents of $tar\n";
find_tar($tar);
}
}
sub check_toc {
my $toc = shift;
if (open(TOC, '<', $toc)) {
my $diff;
while (<TOC>) {
chomp;
my @cols = split(/\s+/);
my $name = join(" ", @cols[7 .. scalar(@cols) - 1]);
if ($diff) {
print " OF\n\n\t\t$name\n\n\tIS AFFECTED\n";
$diff = 0;
}
next if ($cols[0] !~ /^d/);
my %size;
foreach my $tar_name ($name, $name . "/") {
if (length($tar_name) > 100) {
my $pos = index($tar_name, "/", length($tar_name) - 100);
if ($pos == -1 || $pos > 155 || length($tar_name) > 255) {
# add size of long name plus extra record
my $asize = 512 + length($tar_name) + 512;
$asize += (512 - ($asize % 512)) if ($asize % 512 > 0);
$size{$tar_name} = $asize;
next;
}
}
$size{$tar_name} = 512;
}
$diff = $size{$name . "/"} - $size{$name};
if ($diff) {
print "\n\tONE OF HEADER OF\n\n\t\t$name\n\n\tOR HEADER";
print " AND FIRST ", $diff - 512, " BYTES" if ($diff > 512);
}
}
close TOC;
} else {
print " ERROR: unable to open toc file $toc\n";
}
}
# output list of files/dirs within given files with stat info
# based on Tar/Archive::Tar 0.07 by Calle Dybedahl (no license specified)
sub find_tar {
my $spath = shift;
my $fh;
$fh = undef if (!open($fh, '<', $spath));
my $tell = 0;
if (!$fh) {
print "Unable to open tar file $spath\n";
return;
}
binmode $fh;
my %real;
my ($eof, $head);
read($fh, $head, 512);
while (length($head) == 512) {
# end of archive is two blocks of 512 but GNU tar uses one sometimes
if ($head eq "\0" x 512) {
$eof = 1;
last;
}
# uid, gid, and size must be 'a' instead of 'A' for base-256 encoding
# name, lnk, mgc, unam, gnam, and pfx are 'Z' for trailing whitespace
my @attrs = unpack('Z100A8a8a8a12A12A8A1Z100Z6A2Z32Z32A8A8Z155', $head);
# name mode uid gid size time sum type lnk mgc ver unam gnam dmj dmn pfx
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# prepend prefix to name
if ($attrs[15]) {
$attrs[0] = $attrs[15] . "/" . $attrs[0];
$attrs[15] = "";
}
# remove last non-standalone slash
$attrs[0] =~ s/(?!^)\/$//;
if (!$attrs[0]) {
print "Empty file name in tar file $spath\n";
# read next header
read($fh, $head, 512);
next;
}
# old GNU tar may have space after ustar
if ($attrs[9] ne 'ustar' && $attrs[9] ne 'ustar ') {
if ($tell == 0) {
print "Not in supported ustar format\n";
return;
}
print "Invalid tar record at byte ", tell($fh) - 512, "\n";
# read next header
read($fh, $head, 512);
next;
}
# convert octal numeric fields
$attrs[$_] = oct($attrs[$_]) foreach (1, 5, 6, 13, 14);
# handle GNU large uid/gid/size extension (two's-complement base-256)
foreach my $i (2 .. 4) {
if (substr($attrs[$i], 0, 1) eq "\x80") {
my $val = ord(substr($attrs[$i], 1, 1)) & 0xff;
for (2 .. ($i == 4 ? 11 : 7)) {
$val <<= 8;
$val |= (ord(substr($attrs[$i], $_, 1)) & 0xff);
}
$attrs[$i] = $val;
} else {
$attrs[$i] = oct $attrs[$i];
}
}
# validate checksum
substr($head, 148, 8) = " ";
if (unpack("%16C*", $head) != $attrs[6]) {
print "Invalid tar header checksum for $attrs[0]\n";
# read next header
read($fh, $head, 512);
next;
}
# handle GNU long names
if ($attrs[7] =~ /^[LK]$/) {
do {
# read next header
read($fh, $head, 512);
$head = substr($head, 0, $attrs[4]) if ($attrs[4] < 512);
# remove the extra byte used for \0
$head =~ s/\0$//;
$real{$attrs[7]} .= $head;
$attrs[4] -= 512;
} while ($attrs[4] > 0);
# read next header
read($fh, $head, 512);
next;
}
# find next header
my $offset = tell($fh);
if (!seek($fh, $attrs[4], 1)) {
print "Unable to seek in tar file $spath\n";
last;
}
my $diff = $attrs[4] % 512;
# ignore padding
if ($diff != 0 && !seek($fh, 512 - $diff, 1)) {
print "Unable to ignore padding in tar file $spath\n";
last;
}
$tell = $offset + $attrs[4] + ($diff ? 512 - $diff : 0);
if ($real{L}) {
$attrs[0] = $real{L};
$real{L} = undef;
}
if ($real{K}) {
$attrs[8] = $real{K};
$real{K} = undef;
}
# read next header
read($fh, $head, 512);
if ($attrs[0] eq '././@LongLink') {
print "Dangling long link/name record\n";
next;
}
my $udst = tar_canonpath($attrs[0]);
substr($udst, 0, 0) = "/" if ($udst !~ /^\//);
}
if (length($head) < 512) {
print "Unable to read header at offset $tell in tar file $spath\n";
}
close $fh;
}
# return given path logically cleaned of . and .. and stripped of leading ..
sub tar_canonpath {
my $path = shift;
my $abs = $path =~ /^\//;
my @dirs = File::Spec->splitdir($path);
for (my $i = 0; $i < scalar(@dirs); $i++) {
if ($dirs[$i] eq '.' || $dirs[$i] eq '') {
# ./foo becomes foo, foo//bar becomes foo/bar
splice(@dirs, $i--, 1);
} elsif ($dirs[$i] ne '..' && $dirs[$i + 1] eq '..') {
# foo/../bar becomes bar
splice(@dirs, $i, 2);
$i -= 2;
}
}
# remove leading ..
shift @dirs while ($dirs[0] eq '..');
# make path absolute if it was originally
unshift(@dirs, "/") if ($abs);
return File::Spec->catdir(@dirs);
}
# return uri-unescaped version of given string
sub unescape {
my $text = shift;
$text =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if (defined $text);
return $text;
}
############################
#### END TAR CHECK CODE ####
############################