Shift 8.1

master
Paul Kolano 2024-01-08 17:38:55 -08:00
parent 1fbeb53e3b
commit 576ec1db4b
9 changed files with 668 additions and 755 deletions

306
BUGS
View File

@ -1,307 +1,7 @@
KNOWN BUGS
==========
1. A bug exists in the Shift 4.0 and 5.0 tar creation function that
could leave some entries in tar files in a partially corrupted state.
The conditions under which this could occur are very specific, so the
overall percentage of affected tar files is expected to be very low.
1. Shift is currently not capable of handling paths over the operating
system limit. This limit may be viewed using:
To be affected, a directory name within the tar has to be either
(1) a multiple of 512 characters in length or (2) between 155-255
characters in length with a slash at the 100th character from the
end. In the first case, the 512-byte header of the entry immediately
following the directory entry would be corrupted. In the second
case, the 512-byte header as well as an additional 512 bytes (which
may be an additional header, a different header, or the initial 512
bytes of a file's data) would be corrupted. All other entries and
data in affected tar files will be intact and can be extracted
normally.
Because Shift validates tar entries at the end of tar creation,
most transfers in which these conditions were met would have
initially ended in an "error" state with one of the following
messages:
- Invalid tar header checksum
- Invalid tar long link/name data
Note, however, that any transfers that were restarted (via --restart)
after the issue occurred are likely to have completed without further
error, even though the corrupted file(s) would not have been
repaired by the restart operation.
To help determine whether a given tar file has been impacted by this
bug, the perl code following this description can be saved to a file
(e.g. tarcheck.pl) and run on any tar file. Note that the results
of this tool are only meaningful for tar files created with Shift
versions 4.0 or 5.0. Run the tool as follows:
tarcheck.pl data1.tar data2.tar ... dataN.tar
The tool will first attempt to check files using an associated table
of contents (.toc) file. If no such file is found (which normally
occurs only when Shift is invoked without --index-tar), it will then
analyze the actual tar entries to search for corrupted entries.
Files with affected entries found in .toc files will report a message
similar to the following:
ONE OF HEADER OF
/some/file/1
OR HEADER AND FIRST 512 BYTES OF
/some/file/2
IS AFFECTED
Files with affected entries that do not have a corresponding .toc
file will report one of the following messages:
- Invalid tar record at byte N
- Invalid tar header checksum
If neither type of message is displayed, the tar file is not
affected.
##############################
#### BEGIN TAR CHECK CODE ####
##############################
#!/usr/bin/perl
# this program checks one or more tar files given on the command line
# for the Shift tar corruption problem in versions 4.0 and 5.0
use strict;
my $force = shift @ARGV;
if ($force ne '-f' && $force ne '-t') {
unshift(@ARGV, $force);
$force = undef;
}
foreach my $tar (@ARGV) {
print "$tar:\n";
if (! -e $tar) {
print " ...does not exist\n";
} elsif (-e "$tar.toc" && $force ne '-f') {
print " ...reading toc file $tar.toc\n";
check_toc("$tar.toc");
} elsif ($force ne '-t') {
print " ...reading contents of $tar\n";
find_tar($tar);
}
}
sub check_toc {
my $toc = shift;
if (open(TOC, '<', $toc)) {
my $diff;
while (<TOC>) {
chomp;
my @cols = split(/\s+/);
my $name = join(" ", @cols[7 .. scalar(@cols) - 1]);
if ($diff) {
print " OF\n\n\t\t$name\n\n\tIS AFFECTED\n";
$diff = 0;
}
next if ($cols[0] !~ /^d/);
my %size;
foreach my $tar_name ($name, $name . "/") {
if (length($tar_name) > 100) {
my $pos = index($tar_name, "/", length($tar_name) - 100);
if ($pos == -1 || $pos > 155 || length($tar_name) > 255) {
# add size of long name plus extra record
my $asize = 512 + length($tar_name) + 512;
$asize += (512 - ($asize % 512)) if ($asize % 512 > 0);
$size{$tar_name} = $asize;
next;
}
}
$size{$tar_name} = 512;
}
$diff = $size{$name . "/"} - $size{$name};
if ($diff) {
print "\n\tONE OF HEADER OF\n\n\t\t$name\n\n\tOR HEADER";
print " AND FIRST ", $diff - 512, " BYTES" if ($diff > 512);
}
}
close TOC;
} else {
print " ERROR: unable to open toc file $toc\n";
}
}
# output list of files/dirs within given files with stat info
# based on Tar/Archive::Tar 0.07 by Calle Dybedahl (no license specified)
sub find_tar {
my $spath = shift;
my $fh;
$fh = undef if (!open($fh, '<', $spath));
my $tell = 0;
if (!$fh) {
print "Unable to open tar file $spath\n";
return;
}
binmode $fh;
my %real;
my ($eof, $head);
read($fh, $head, 512);
while (length($head) == 512) {
# end of archive is two blocks of 512 but GNU tar uses one sometimes
if ($head eq "\0" x 512) {
$eof = 1;
last;
}
# uid, gid, and size must be 'a' instead of 'A' for base-256 encoding
# name, lnk, mgc, unam, gnam, and pfx are 'Z' for trailing whitespace
my @attrs = unpack('Z100A8a8a8a12A12A8A1Z100Z6A2Z32Z32A8A8Z155', $head);
# name mode uid gid size time sum type lnk mgc ver unam gnam dmj dmn pfx
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# prepend prefix to name
if ($attrs[15]) {
$attrs[0] = $attrs[15] . "/" . $attrs[0];
$attrs[15] = "";
}
# remove last non-standalone slash
$attrs[0] =~ s/(?!^)\/$//;
if (!$attrs[0]) {
print "Empty file name in tar file $spath\n";
# read next header
read($fh, $head, 512);
next;
}
# old GNU tar may have space after ustar
if ($attrs[9] ne 'ustar' && $attrs[9] ne 'ustar ') {
if ($tell == 0) {
print "Not in supported ustar format\n";
return;
}
print "Invalid tar record at byte ", tell($fh) - 512, "\n";
# read next header
read($fh, $head, 512);
next;
}
# convert octal numeric fields
$attrs[$_] = oct($attrs[$_]) foreach (1, 5, 6, 13, 14);
# handle GNU large uid/gid/size extension (two's-complement base-256)
foreach my $i (2 .. 4) {
if (substr($attrs[$i], 0, 1) eq "\x80") {
my $val = ord(substr($attrs[$i], 1, 1)) & 0xff;
for (2 .. ($i == 4 ? 11 : 7)) {
$val <<= 8;
$val |= (ord(substr($attrs[$i], $_, 1)) & 0xff);
}
$attrs[$i] = $val;
} else {
$attrs[$i] = oct $attrs[$i];
}
}
# validate checksum
substr($head, 148, 8) = " ";
if (unpack("%16C*", $head) != $attrs[6]) {
print "Invalid tar header checksum for $attrs[0]\n";
# read next header
read($fh, $head, 512);
next;
}
# handle GNU long names
if ($attrs[7] =~ /^[LK]$/) {
do {
# read next header
read($fh, $head, 512);
$head = substr($head, 0, $attrs[4]) if ($attrs[4] < 512);
# remove the extra byte used for \0
$head =~ s/\0$//;
$real{$attrs[7]} .= $head;
$attrs[4] -= 512;
} while ($attrs[4] > 0);
# read next header
read($fh, $head, 512);
next;
}
# find next header
my $offset = tell($fh);
if (!seek($fh, $attrs[4], 1)) {
print "Unable to seek in tar file $spath\n";
last;
}
my $diff = $attrs[4] % 512;
# ignore padding
if ($diff != 0 && !seek($fh, 512 - $diff, 1)) {
print "Unable to ignore padding in tar file $spath\n";
last;
}
$tell = $offset + $attrs[4] + ($diff ? 512 - $diff : 0);
if ($real{L}) {
$attrs[0] = $real{L};
$real{L} = undef;
}
if ($real{K}) {
$attrs[8] = $real{K};
$real{K} = undef;
}
# read next header
read($fh, $head, 512);
if ($attrs[0] eq '././@LongLink') {
print "Dangling long link/name record\n";
next;
}
my $udst = tar_canonpath($attrs[0]);
substr($udst, 0, 0) = "/" if ($udst !~ /^\//);
}
if (length($head) < 512) {
print "Unable to read header at offset $tell in tar file $spath\n";
}
close $fh;
}
# return given path logically cleaned of . and .. and stripped of leading ..
sub tar_canonpath {
my $path = shift;
my $abs = $path =~ /^\//;
my @dirs = File::Spec->splitdir($path);
for (my $i = 0; $i < scalar(@dirs); $i++) {
if ($dirs[$i] eq '.' || $dirs[$i] eq '') {
# ./foo becomes foo, foo//bar becomes foo/bar
splice(@dirs, $i--, 1);
} elsif ($dirs[$i] ne '..' && $dirs[$i + 1] eq '..') {
# foo/../bar becomes bar
splice(@dirs, $i, 2);
$i -= 2;
}
}
# remove leading ..
shift @dirs while ($dirs[0] eq '..');
# make path absolute if it was originally
unshift(@dirs, "/") if ($abs);
return File::Spec->catdir(@dirs);
}
# return uri-unescaped version of given string
sub unescape {
my $text = shift;
$text =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if (defined $text);
return $text;
}
############################
#### END TAR CHECK CODE ####
############################
perl -MPOSIX -le 'print PATH_MAX'

19
CHANGES
View File

@ -360,3 +360,22 @@ CHANGES
- Fixed unnecessary metadata traversal when showing detailed status
- Fixed application of lustre striping expression to directories
- Removed bbcp and gridftp support since reported fatal bugs never fixed
* Shift 8.1 (01/08/24)
- Added silent corruption database search using --search with --last-sum
- Added custom lustre striping of directories using DR in expressions
- Added alternative metadata locking for broken NFSv3 locks
- Added debugging output when manager unable to map local/remote files
- Changed embedded mesh handling to latest mesh version
- Fixed fish-tcp --secure SSL handling with more recent IO::Socket::SSL
- Fixed mount collection when file server is an alias to many IPs
- Fixed handling of @kfi forms in lustre mount collection
- Fixed remote mount point in GPFS mount collection
- Fixed mcp default stripe handling overriding --stripe=0
- Fixed handling of --mgr so no longer always overrides with default
- Fixed inherited retry count of operations in run state during restart
- Fixed clean of files associated with old monitoring processes
- Fixed exception during silent corruption processing of some tar operations
- Fixed escape of commas in email command line
- Fixed synchronization of monitor files between managers
- Fixed error handling of manager invocations

View File

@ -85,7 +85,6 @@ Shift Installation and Configuration
o {get,set}fattr - get and set file extended attributes
o lfs - get and set Lustre striping
o lspci - find 10GE adapters
o mmlsmgr - get GPFS server information
o mount - get file system information
o ping - determine network latency
o ps - find clients and PBS processes, and determine client CPU load

View File

@ -66,8 +66,8 @@ Shift includes the following features, among others:
Shift is in active production at the NASA Advanced Supercomputing Facility
(https://www.nas.nasa.gov/hecc/support/kb/entry/300) and has facilitated
approximately 2.0M transfers over 3.0B files totalling 170 PB (as of Dec.
2020) since deployment in March 2012.
approximately 3.4M transfers over 11B files totalling 420 PB (as of Jan.
2024) since deployment in March 2012.
For full details of the Shift architecture, see
https://pkolano.github.io/papers/resilience12.pdf and

View File

@ -123,7 +123,7 @@ given in following sections.
\fBMonitoring and management options:\fP
\-\-history[=csv] show command line/origin of transfers [in CSV form]
\-\-id=NUM use transfer identifier NUM for other commands
\-\-last-sum show last stored sum for SOURCE(s)
\-\-last-sum show last stored sum for SOURCE(s) or matching \-\-search
\-\-mgr=HOST set host of shift manager to HOST
\-\-mgr\-identity=FILE access manager host with ssh identity in FILE
\-\-mgr\-user=USER access manager host as USER
@ -135,7 +135,7 @@ given in following sections.
io,ln,mcp,meta, mkdir,msum,rsync,shift-cp,
shift-sum,sum,tool})
\-\-restart[=ignore] restart transfer with given \-\-id [ignoring errors]
\-\-search=REGEX show only status/history matching REGEX
\-\-search=REGEX show only status/history/last-sum matching REGEX
\-\-state=STATE show status of only those operations in STATE
(STATE one of {done,error,none,queue,run,warn})
\-\-stats[=csv] show stats across all transfers [in CSV form]
@ -167,7 +167,7 @@ given in following sections.
\-\-streams=NUM use NUM streams in remote transports [4]
\-\-stripe=[CEXP] choose stripe {count,size,pool} via expr {C,S,P}EXP
[::[SEXP][::PEXP]] (EXP may be NUM, SIZE, or full perl expression w/
const {NM,SZ,SC,SS} for src {name,size,scnt,ssz})
const {DR,NM,SZ,SC,SS} for src {is_dirname,size,scnt,ssz})
(use suffix {k,m,g,t} for {KiB,MiB,GiB,TiB})
\-\-threads=NUM use NUM threads in local transports [4]
\-\-verify\-fast verify faster but less safely by reusing src buffer
@ -471,13 +471,15 @@ the origin host/directory and the original command. When
Specify the transfer identifier to be used with management and status
commands.
.IP "\fB\-\-last\-sum\fP"
Queries the silent corruption database for all files given on the
command line and prints (one file per line) the last known checksum, the
file modification time associated with this checksum, and the file name.
When \fB\-\-index\-tar\fP is given, the first file argument is assumed
to be a tar file and the remaining arguments names of files within the
tar for which checksum information will be printed. A checksum of "-"
means that no information is stored for the file.
When \fB\-\-search\fP is given, queries the silent corruption database for all
files whose name or file system matches the given regular expression and prints
(one file per line) the last known checksum, the file modification time
associated with this checksum, and the file name. Otherwise, queries the silent
corruption database for all files given on the command line. When
\fB\-\-index\-tar\fP is given, the first file argument is assumed to be a tar
file and the remaining arguments names of files within the tar for which
checksum information will be printed. A checksum of "-" means that no
information is stored for the file.
.IP "\fB\-\-mgr=HOST\fP"
Set the host that will be used to manage transfers. By default, this
host will be accessed as the current user with hostbased authentication
@ -579,6 +581,10 @@ When \fB\-\-history\fP is specified, this option will show a brief
history of the transfers whose origin host or original command match the
given regular expression.
.IP
When \fB\-\-last-sum\fP is specified, this option will query the silent
corruption database for files whose name or file system match the given
regular expression.
.IP
Note that regular expressions must be given in Perl syntax (see
perlre(1) for details).
.IP "\fB\-\-state=STATE\fP"
@ -770,15 +776,15 @@ directories. A greater number or size defined with the suffixes k, m,
g, and t for KiB, MiB, GiB, and TiB, respectively, specifies that files
will be allocated one stripe per given size while directories will be
striped according to the default policy. Finally, an arbitrary Perl
expression (see perlsyn(1) for details) involving the constants NM,
SZ, SC, and SS for source name, size, stripe count, and stripe size,
respectively, may be specified to dynamically define the stripe count
differently for every file and directory in the transfer. For example,
the expression "NM =~ /foo/ ? 4 : (SZ < 10g ? 2g : 10g)" would set the
stripe count of files whose name contains "foo" to 4, and the stripe
count of files whose name does not contain "foo" to either one stripe
per 2 GiB when the file size is less than 10 GiB or one stripe per 10
GiB otherwise.
expression (see perlsyn(1) for details) involving the constants DR, NM,
SZ, SC, and SS for is a directory, source name, size, stripe count, and
stripe size, respectively, may be specified to dynamically define the
stripe count differently for every file and directory in the transfer.
For example, the expression "NM =~ /foo/ ? 4 : (SZ < 10g ? 2g : 10g)"
would set the stripe count of files whose name contains "foo" to 4, and
the stripe count of files whose name does not contain "foo" to either one
stripe per 2 GiB when the file size is less than 10 GiB or one stripe per
10 GiB otherwise.
.IP
Striping behavior may be further refined by specifying a stripe size
expression and/or Lustre pool name expression with similar conventions.

View File

@ -22,31 +22,33 @@
# (parent dir must be world writable with sticky bit for multi-user installs)
# (multi-user example: user_dir /var/lib/shift/%u)
# (single-user example: user_dir /home/%u/.shift)
#user_dir nodefault
user_dir /home/%u/.shift
# time (seconds) to store transfer metadata after last activity
#data_expire 604800
# location of file system information database
# (must be world readable for multi-user installs)
# (example: mount_db /var/lib/shift/mounts)
#mount_db nodefault
# use cron to automatically restart transfers after host or process failures
#default_cron 1
# log debugging information for user X in user_dir/X.debug
# (may be specified multiple times for different users)
# (example: debug_alice 1)
#debug_X 1
# use cron to automatically restart transfers after host or process failures
#default_cron 1
# domain to which user accounts belong for email notifications
# (assumes user X can receive email at X@email_domain)
# (assumes localhost:25 SMTP server running on manager host)
# (example: email_domain example.com)
#email_domain nodefault
# location of file system information database
# (must be world readable for multi-user installs)
# (example: mount_db /var/lib/shift/mounts)
#mount_db nodefault
# use hardlink-based locking (mainly for NFSv3 and below)
#nfs_lock 0
# command to invoke to make host selection decisions
# (must be world readable/executable for multi-user installs)
# (example: select_hook /usr/local/bin/shift-select.hook)
@ -173,7 +175,7 @@ user_dir /home/%u/.shift
# expressions by which to select lustre stripe count/size/pool
# (format is same as --stripe: [CEXP][::[SEXP][::PEXP]])
# (EXP may be NUM, SIZE, or full perl expression)
# (EXP may use const {NM,SZ,SC,SS} for src {name,size,scnt,ssz})
# (EXP may use const {DR,NM,SZ,SC,SS} for src {is_dir,name,size,scnt,ssz})
# (set to 0 to use system default striping)
# (note that transports using temporary files are not supported)
# (use suffix {k,m,g,t} for {KB,MB,GB,TB})
@ -393,4 +395,3 @@ user_dir /home/%u/.shift
# global avg. network writes (MB/s) at which to throttle transfers on host X
# (example: throttle_netw_host_host1.example.com 10000)
#throttle_netw_host_X nodefault

View File

@ -66,7 +66,7 @@ use Symbol qw(gensym);
use Sys::Hostname;
use Text::ParseWords;
our $VERSION = 8.0;
our $VERSION = 8.1;
# do not die when receiving sigpipe
$SIG{PIPE} = 'IGNORE';
@ -991,9 +991,11 @@ sub fish {
require IO::Socket::SSL::Utils;
($scert, my $skey) = IO::Socket::SSL::Utils::CERT_create(
CA => 1,
purpose => 'server,client',
subject => {CN => $key},
);
$scert = PEM_cert2string($scert) . PEM_key2string($skey);
$scert = IO::Socket::SSL::Utils::PEM_cert2string($scert) .
IO::Socket::SSL::Utils::PEM_key2string($skey);
(my $fh, $cert) = tempfile();
print $fh $scert;
close $fh;
@ -1221,12 +1223,25 @@ sub fqdn {
my $host = shift;
if ($host =~ /^\d+\.\d+\.\d+\.\d+$/) {
my $name = gethostbyaddr(inet_aton($host), AF_INET);
return $name if ($name);
$host = $name if ($name);
} elsif (wantarray) {
# resolve from name to ip back to name to normalize multiple aliases
my %names;
my ($n, $a, $t, $l, @addrs) = gethostbyname($host);
foreach my $addr (@addrs) {
my $name = gethostbyaddr($addr, AF_INET);
$names{$name} = 1 if ($name);
}
return keys(%names);
} else {
my @cols = gethostbyname($host);
return $cols[0] if ($cols[0]);
# resolve from name to ip back to name to normalize multiple aliases
my $ip = gethostbyname($host);
if ($ip) {
my $name = gethostbyaddr($ip, AF_INET);
$host = $name if ($name);
}
}
return $host;
return wantarray ? ($host) : $host;
}
###############
@ -1280,7 +1295,7 @@ sub mount {
}
if (/server_list=\(([^\)]+)\)/) {
# cxfs appears as xfs but with server_list set
$mnt{servers} = join("|", map {$_ = fqdn($_)} split(/,/, $1));
$mnt{servers} = join("|", map {fqdn($_)} split(/,/, $1));
$mnt{remote} = $mnt{local};
$type = "cxfs";
} elsif ($dev =~ /^(\S+):([^:]+)$/) {
@ -1289,7 +1304,8 @@ sub mount {
$mnt{servers} = $1;
# lustre may have extra @id and multiple colon-separated servers
$mnt{servers} =~ s/@\w*//g;
$mnt{servers} = join("|", map {$_ = fqdn($_)} split(/:/, $mnt{servers}));
# lustre may have both commas and colons in @kfi forms
$mnt{servers} = join("|", map {fqdn($_)} split(/[:,]/, $mnt{servers}));
} elsif ($type eq 'gpfs') {
# gpfs servers do not appear in mount output so read config
if (open(FILE, "/var/mmfs/gen/mmfs.cfg")) {
@ -1297,7 +1313,7 @@ sub mount {
s/^\s+|\s+$//g;
if (/^clustername\s+(\S+)/i) {
$mnt{servers} = $1;
$mnt{remote} = "/" . $mnt{servers};
$mnt{remote} = "/$dev";
last;
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long