Shift 5.2

master
Paul Kolano 2018-04-23 15:35:59 -07:00
parent eb3817e492
commit 4c08b149e2
4 changed files with 93 additions and 28 deletions

View File

@ -29,10 +29,49 @@ ability to provide advanced reliability and automatic single and multi-file
parallelization to any stock command-line transfer application while being
easily deployed by both individual users as well as entire organizations.
Shift includes the following features, among others:
- support for local, LAN, and WAN transfers
- drop-in replacement for both cp and scp (basic options only)
- tracking of individual file operations with on-demand status
- transfer stop and restart
- email notification of completion, errors, and warnings
- local and remote tar creation/extraction
- rsync-like synchronization based on modification times and checksums
- integrity verification of transfers with partial retransfer/resum to
rectify corruption
- throttling based on local and remote resource utilization
- automatic retrieval/release of files residing on DMF-managed file systems
- automatic striping of files transferred to Lustre file systems
- fully self-contained besides perl core and ssh
- automatic detection and selection of higher performance transports and
hash utilities when available including bbcp, bbftp, gridftp, mcp, msum,
and rsync
- automatic many-to-many parallelization of single and multi-file
transfers with file system equivalence detection and rewriting
Shift is in active production at the NASA Advanced Supercomputing
Facility (https://www.nas.nasa.gov/hecc/support/kb/entry/300) and has
facilitated transfers of over 85 PB (as of April 2018) since deployment.
For full details of the Shift architecture, see
https://pkolano.github.io/papers/resilience12.pdf. For installation
details, see "INSTALL". For usage details, see "doc/shiftc.1" (in man
page format, viewable with "nroff -man").
https://pkolano.github.io/papers/resilience12.pdf and
https://pkolano.github.io/papers/hust15.pdf. For installation details, see
"INSTALL". For usage details, see "doc/shiftc.1" (in man page format, viewable
with "nroff -man").
Questions, comments, fixes, and/or enhancements welcome.

View File

@ -66,7 +66,7 @@ use Symbol qw(gensym);
use Sys::Hostname;
use Text::ParseWords;
our $VERSION = 0.94;
our $VERSION = 0.95;
# need threads and version of Thread::Queue from perl >= 5.10.1
my $have_threads = eval 'require 5.010_001; use threads; use Thread::Queue; 1';
@ -169,7 +169,7 @@ sub chattr {
$size = 0 if (!$size);
# stripes > 160 may fail due to max of lustre < 2.4
$count = 160 if ($count > 160);
@copts = ("setstripe", "-c", $count, "-s", $size, $ufile);
@copts = ("setstripe", "-c", $count, "-S", $size, $ufile);
splice(@copts, -1, 0, "-p", $pool) if ($pool);
} elsif ($cmd eq 'setfacl') {
$uattrs =~ s/,/\n/g;
@ -318,6 +318,14 @@ sub find1 {
$dmf = 0 if (scalar(@files) == 1 && !S_ISDIR($mode));
}
# exclude files (must be before dir processing)
if (defined $opts{exclude}) {
foreach my $re (@{$opts{exclude}}) {
my $ure = unescape($re);
next FILE if (eval {$file =~ /$ure/});
}
}
if (scalar(@stat) == 0 || S_ISDIR($mode)) {
my $err = "";
if (scalar(@stat) > 0 && (!$opts{dereference} || scalar(@files) == 1) &&
@ -362,7 +370,7 @@ sub find1 {
}
}
# include files
# include files (must be after dir processing)
if (defined $opts{include}) {
my $found;
foreach my $re (@{$opts{include}}) {
@ -373,14 +381,7 @@ sub find1 {
}
next if (!$found);
}
# exclude files
if (defined $opts{exclude}) {
foreach my $re (@{$opts{exclude}}) {
my $ure = unescape($re);
next FILE if (eval {$file =~ /$ure/});
}
}
# newer/older files
# newer/older files (must be after dir processing)
next if (defined $opts{newer} && $stat[9] < $opts{newer});
next if (defined $opts{older} && $stat[9] >= $opts{older});

View File

@ -65,7 +65,7 @@ use Symbol qw(gensym);
use Term::ANSIColor;
use Text::ParseWords;
our $VERSION = 0.94;
our $VERSION = 0.95;
$Term::ANSIColor::EACHLINE = "\n";
@ -176,7 +176,7 @@ my %opts;
my $rc = GetOptions(\%opts,
"alive", "get", "history", "host=s", "id=s", "lock", "meta:1", "mounts",
"pid=i", "plot:s", "put", "restart", "search=s", "shift-mgr", "state=s",
"stats", "status:s", "stop", "sync", "user=s",
"stats:s", "status:s", "stop", "sync", "user=s",
);
die "Invalid options\n" if (!$rc || scalar(@ARGV) != 0);
@ -203,7 +203,7 @@ die "The user_dir setting must be configured\n" if (!$conf{user_dir});
$conf{opts_bbftp} =~ s/\\n/\n/g;
# process --stats immediately before setuid or $conf{user_dir} changes
if ($opts{stats}) {
if (defined $opts{stats}) {
stats();
exit;
}
@ -1028,6 +1028,8 @@ sub get {
next if ($i == 1 && $cmd eq 'find' && $meta{'create-tar'});
$args[$i] = map_remote($opts{host}, $args[$i], $ref);
last LOG if (!defined $args[$i]);
# record if cksum arg has flipped from remote to local
$op{"map$i"} = 1 if ($cmd eq 'cksum' && $args[$i] =~ /^\//);
my $host = $args[$i] =~ /^([^\/:]+)%3A/ ? $1 : "localhost";
if ($host ne 'localhost' && !defined $rtthost{$host}) {
# determine if already have round-trip time for domain
@ -3345,6 +3347,25 @@ sub stats {
my $date1 = strftime('%m/%d/%y', localtime($time - $conf{data_expire}));
my $date2 = strftime('%m/%d/%y', localtime);
if ($opts{stats} eq 'csv') {
my @heads = map {@{$heads{$_}}} @order;
print join(",", "user", @heads), "\n";
# add row for each user
foreach my $user (sort keys(%users)) {
my @row = map {$users{$user}->{$_} || ""} @heads;
# only print row if there is an actual non-empty value
next if (!first {$_} @row);
print join(",", $user, @row), "\n";
}
# add row for each transfer type
foreach my $type (qw(local lan wan)) {
print join(",", $type, map {$types{$type}->{$_} || ""} @heads), "\n";
}
# add overall totals
print join(",", "all", map {$all->{$_} || ""} @heads), "\n";
return;
}
# print tables
require Text::FormatTable;
foreach my $head (@order) {

View File

@ -1,6 +1,6 @@
#!/usr/bin/perl
#
# Copyright (C) 2012-2017 United States Government as represented by the
# Copyright (C) 2012-2018 United States Government as represented by the
# Administrator of the National Aeronautics and Space Administration
# (NASA). All Rights Reserved.
#
@ -80,7 +80,7 @@ use constant SFTP_TRUNC => 0x10;
use constant SFTP_WRITE => 0x02;
use constant SFTP_EXCL => 0x20;
our $VERSION = 0.94;
our $VERSION = 0.95;
$Data::Dumper::Indent = 0;
$Data::Dumper::Purity = 1;
@ -1839,6 +1839,13 @@ sub shift_find {
$dmf = 0 if (scalar(@files) == 1 && !S_ISDIR($mode));
}
# exclude files (must be before dir processing);
if (defined $opts{exclude}) {
foreach my $re (@{$opts{exclude}}) {
next FILE if (eval {$file =~ /$re/});
}
}
if (scalar(@stat) == 0 || S_ISDIR($mode)) {
my $err = "";
if (scalar(@stat) > 0 && (!$opts{dereference} || scalar(@files) == 1) &&
@ -1890,7 +1897,7 @@ sub shift_find {
}
}
# include files
# include files (must be after dir processing)
if (defined $opts{include}) {
my $found;
foreach my $re (@{$opts{include}}) {
@ -1900,13 +1907,8 @@ sub shift_find {
}
next if (!$found);
}
# exclude files
if (defined $opts{exclude}) {
foreach my $re (@{$opts{exclude}}) {
next FILE if (eval {$file =~ /$re/});
}
}
# newer/older files
# newer/older files (must be after dir processing)
next if (defined $opts{newer} && $stat[9] < $opts{newer});
next if (defined $opts{older} && $stat[9] >= $opts{older});
@ -2357,6 +2359,8 @@ sub shift_loop {
}
# record if local or not for i/o throttling
$op{local} = $local;
# must cksum src if src was mapped from remote to local
$rindex = 0 if ($op{map0});
# record remote host index to determine offset in tar case
$op{rindex} = $rindex;
transport($cmd, $rhost[$rindex], undef, $rpath[$rindex], \%op);
@ -3558,7 +3562,7 @@ sub transport_chattr {
# stripe local files immediately
my $dir = $op eq 'mkdir' ? $dst : dirname($dst);
eval {mkpath($dir)};
my @args = ("lfs", "setstripe", "-c", $scount, "-s", $ssize, $dst);
my @args = ("lfs", "setstripe", "-c", $scount, "-S", $ssize, $dst);
splice(@args, -1, 0, "-p", $opts{'stripe-pool'})
if ($opts{'stripe-pool'});
system(@args);