mirror of https://github.com/pkolano/shift.git
308 lines
9.9 KiB
Plaintext
308 lines
9.9 KiB
Plaintext
KNOWN BUGS
|
|
==========
|
|
|
|
1. A bug exists in the Shift 4.0 and 5.0 tar creation function that
|
|
could leave some entries in tar files in a partially corrupted state.
|
|
The conditions under which this could occur are very specific, so the
|
|
overall percentage of affected tar files is expected to be very low.
|
|
|
|
To be affected, a directory name within the tar has to be either
|
|
(1) a multiple of 512 characters in length or (2) between 155-255
|
|
characters in length with a slash at the 100th character from the
|
|
end. In the first case, the 512-byte header of the entry immediately
|
|
following the directory entry would be corrupted. In the second
|
|
case, the 512-byte header as well as an additional 512 bytes (which
|
|
may be an additional header, a different header, or the initial 512
|
|
bytes of a file's data) would be corrupted. All other entries and
|
|
data in affected tar files will be intact and can be extracted
|
|
normally.
|
|
|
|
Because Shift validates tar entries at the end of tar creation,
|
|
most transfers in which these conditions were met would have
|
|
initially ended in an "error" state with one of the following
|
|
messages:
|
|
|
|
- Invalid tar header checksum
|
|
- Invalid tar long link/name data
|
|
|
|
Note, however, that any transfers that were restarted (via --restart)
|
|
after the issue occurred are likely to have completed without further
|
|
error, even though the corrupted file(s) would not have been
|
|
repaired by the restart operation.
|
|
|
|
To help determine whether a given tar file has been impacted by this
|
|
bug, the perl code following this description can be saved to a file
|
|
(e.g. tarcheck.pl) and run on any tar file. Note that the results
|
|
of this tool are only meaningful for tar files created with Shift
|
|
versions 4.0 or 5.0. Run the tool as follows:
|
|
|
|
tarcheck.pl data1.tar data2.tar ... dataN.tar
|
|
|
|
The tool will first attempt to check files using an associated table
|
|
of contents (.toc) file. If no such file is found (which normally
|
|
occurs only when Shift is invoked without --index-tar), it will then
|
|
analyze the actual tar entries to search for corrupted entries.
|
|
|
|
Files with affected entries found in .toc files will report a message
|
|
similar to the following:
|
|
|
|
ONE OF HEADER OF
|
|
|
|
/some/file/1
|
|
|
|
OR HEADER AND FIRST 512 BYTES OF
|
|
|
|
/some/file/2
|
|
|
|
IS AFFECTED
|
|
|
|
Files with affected entries that do not have a corresponding .toc
|
|
file will report one of the following messages:
|
|
|
|
- Invalid tar record at byte N
|
|
- Invalid tar header checksum
|
|
|
|
If neither type of message is displayed, the tar file is not
|
|
affected.
|
|
|
|
|
|
##############################
|
|
#### BEGIN TAR CHECK CODE ####
|
|
##############################
|
|
|
|
#!/usr/bin/perl
|
|
# this program checks one or more tar files given on the command line
|
|
# for the Shift tar corruption problem in versions 4.0 and 5.0
|
|
|
|
use strict;
|
|
|
|
my $force = shift @ARGV;
|
|
if ($force ne '-f' && $force ne '-t') {
|
|
unshift(@ARGV, $force);
|
|
$force = undef;
|
|
}
|
|
|
|
foreach my $tar (@ARGV) {
|
|
print "$tar:\n";
|
|
if (! -e $tar) {
|
|
print " ...does not exist\n";
|
|
} elsif (-e "$tar.toc" && $force ne '-f') {
|
|
print " ...reading toc file $tar.toc\n";
|
|
check_toc("$tar.toc");
|
|
} elsif ($force ne '-t') {
|
|
print " ...reading contents of $tar\n";
|
|
find_tar($tar);
|
|
}
|
|
}
|
|
|
|
sub check_toc {
|
|
my $toc = shift;
|
|
if (open(TOC, '<', $toc)) {
|
|
my $diff;
|
|
while (<TOC>) {
|
|
chomp;
|
|
my @cols = split(/\s+/);
|
|
my $name = join(" ", @cols[7 .. scalar(@cols) - 1]);
|
|
if ($diff) {
|
|
print " OF\n\n\t\t$name\n\n\tIS AFFECTED\n";
|
|
$diff = 0;
|
|
}
|
|
next if ($cols[0] !~ /^d/);
|
|
|
|
my %size;
|
|
foreach my $tar_name ($name, $name . "/") {
|
|
if (length($tar_name) > 100) {
|
|
my $pos = index($tar_name, "/", length($tar_name) - 100);
|
|
if ($pos == -1 || $pos > 155 || length($tar_name) > 255) {
|
|
# add size of long name plus extra record
|
|
my $asize = 512 + length($tar_name) + 512;
|
|
$asize += (512 - ($asize % 512)) if ($asize % 512 > 0);
|
|
$size{$tar_name} = $asize;
|
|
next;
|
|
}
|
|
}
|
|
$size{$tar_name} = 512;
|
|
}
|
|
$diff = $size{$name . "/"} - $size{$name};
|
|
if ($diff) {
|
|
print "\n\tONE OF HEADER OF\n\n\t\t$name\n\n\tOR HEADER";
|
|
print " AND FIRST ", $diff - 512, " BYTES" if ($diff > 512);
|
|
}
|
|
}
|
|
close TOC;
|
|
} else {
|
|
print " ERROR: unable to open toc file $toc\n";
|
|
}
|
|
}
|
|
|
|
# output list of files/dirs within given files with stat info
|
|
# based on Tar/Archive::Tar 0.07 by Calle Dybedahl (no license specified)
|
|
sub find_tar {
|
|
my $spath = shift;
|
|
|
|
my $fh;
|
|
$fh = undef if (!open($fh, '<', $spath));
|
|
|
|
my $tell = 0;
|
|
if (!$fh) {
|
|
print "Unable to open tar file $spath\n";
|
|
return;
|
|
}
|
|
binmode $fh;
|
|
|
|
my %real;
|
|
my ($eof, $head);
|
|
read($fh, $head, 512);
|
|
while (length($head) == 512) {
|
|
# end of archive is two blocks of 512 but GNU tar uses one sometimes
|
|
if ($head eq "\0" x 512) {
|
|
$eof = 1;
|
|
last;
|
|
}
|
|
|
|
# uid, gid, and size must be 'a' instead of 'A' for base-256 encoding
|
|
# name, lnk, mgc, unam, gnam, and pfx are 'Z' for trailing whitespace
|
|
my @attrs = unpack('Z100A8a8a8a12A12A8A1Z100Z6A2Z32Z32A8A8Z155', $head);
|
|
# name mode uid gid size time sum type lnk mgc ver unam gnam dmj dmn pfx
|
|
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
|
|
|
# prepend prefix to name
|
|
if ($attrs[15]) {
|
|
$attrs[0] = $attrs[15] . "/" . $attrs[0];
|
|
$attrs[15] = "";
|
|
}
|
|
# remove last non-standalone slash
|
|
$attrs[0] =~ s/(?!^)\/$//;
|
|
|
|
if (!$attrs[0]) {
|
|
print "Empty file name in tar file $spath\n";
|
|
# read next header
|
|
read($fh, $head, 512);
|
|
next;
|
|
}
|
|
# old GNU tar may have space after ustar
|
|
if ($attrs[9] ne 'ustar' && $attrs[9] ne 'ustar ') {
|
|
if ($tell == 0) {
|
|
print "Not in supported ustar format\n";
|
|
return;
|
|
}
|
|
print "Invalid tar record at byte ", tell($fh) - 512, "\n";
|
|
# read next header
|
|
read($fh, $head, 512);
|
|
next;
|
|
}
|
|
|
|
# convert octal numeric fields
|
|
$attrs[$_] = oct($attrs[$_]) foreach (1, 5, 6, 13, 14);
|
|
|
|
# handle GNU large uid/gid/size extension (two's-complement base-256)
|
|
foreach my $i (2 .. 4) {
|
|
if (substr($attrs[$i], 0, 1) eq "\x80") {
|
|
my $val = ord(substr($attrs[$i], 1, 1)) & 0xff;
|
|
for (2 .. ($i == 4 ? 11 : 7)) {
|
|
$val <<= 8;
|
|
$val |= (ord(substr($attrs[$i], $_, 1)) & 0xff);
|
|
}
|
|
$attrs[$i] = $val;
|
|
} else {
|
|
$attrs[$i] = oct $attrs[$i];
|
|
}
|
|
}
|
|
|
|
# validate checksum
|
|
substr($head, 148, 8) = " ";
|
|
if (unpack("%16C*", $head) != $attrs[6]) {
|
|
print "Invalid tar header checksum for $attrs[0]\n";
|
|
# read next header
|
|
read($fh, $head, 512);
|
|
next;
|
|
}
|
|
|
|
# handle GNU long names
|
|
if ($attrs[7] =~ /^[LK]$/) {
|
|
do {
|
|
# read next header
|
|
read($fh, $head, 512);
|
|
$head = substr($head, 0, $attrs[4]) if ($attrs[4] < 512);
|
|
# remove the extra byte used for \0
|
|
$head =~ s/\0$//;
|
|
$real{$attrs[7]} .= $head;
|
|
$attrs[4] -= 512;
|
|
} while ($attrs[4] > 0);
|
|
# read next header
|
|
read($fh, $head, 512);
|
|
next;
|
|
}
|
|
|
|
# find next header
|
|
my $offset = tell($fh);
|
|
if (!seek($fh, $attrs[4], 1)) {
|
|
print "Unable to seek in tar file $spath\n";
|
|
last;
|
|
}
|
|
my $diff = $attrs[4] % 512;
|
|
# ignore padding
|
|
if ($diff != 0 && !seek($fh, 512 - $diff, 1)) {
|
|
print "Unable to ignore padding in tar file $spath\n";
|
|
last;
|
|
}
|
|
$tell = $offset + $attrs[4] + ($diff ? 512 - $diff : 0);
|
|
|
|
if ($real{L}) {
|
|
$attrs[0] = $real{L};
|
|
$real{L} = undef;
|
|
}
|
|
if ($real{K}) {
|
|
$attrs[8] = $real{K};
|
|
$real{K} = undef;
|
|
}
|
|
|
|
# read next header
|
|
read($fh, $head, 512);
|
|
|
|
if ($attrs[0] eq '././@LongLink') {
|
|
print "Dangling long link/name record\n";
|
|
next;
|
|
}
|
|
my $udst = tar_canonpath($attrs[0]);
|
|
substr($udst, 0, 0) = "/" if ($udst !~ /^\//);
|
|
}
|
|
if (length($head) < 512) {
|
|
print "Unable to read header at offset $tell in tar file $spath\n";
|
|
}
|
|
close $fh;
|
|
}
|
|
|
|
# return given path logically cleaned of . and .. and stripped of leading ..
|
|
sub tar_canonpath {
|
|
my $path = shift;
|
|
my $abs = $path =~ /^\//;
|
|
my @dirs = File::Spec->splitdir($path);
|
|
for (my $i = 0; $i < scalar(@dirs); $i++) {
|
|
if ($dirs[$i] eq '.' || $dirs[$i] eq '') {
|
|
# ./foo becomes foo, foo//bar becomes foo/bar
|
|
splice(@dirs, $i--, 1);
|
|
} elsif ($dirs[$i] ne '..' && $dirs[$i + 1] eq '..') {
|
|
# foo/../bar becomes bar
|
|
splice(@dirs, $i, 2);
|
|
$i -= 2;
|
|
}
|
|
}
|
|
# remove leading ..
|
|
shift @dirs while ($dirs[0] eq '..');
|
|
# make path absolute if it was originally
|
|
unshift(@dirs, "/") if ($abs);
|
|
return File::Spec->catdir(@dirs);
|
|
}
|
|
|
|
# return uri-unescaped version of given string
|
|
sub unescape {
|
|
my $text = shift;
|
|
$text =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if (defined $text);
|
|
return $text;
|
|
}
|
|
|
|
############################
|
|
#### END TAR CHECK CODE ####
|
|
############################
|