#!/usr/bin/perl
#=============================================================== ARCHIVE

#----------------------------------------------------------- Use modules
use constant PSTDIR => "/usr/local/pst";
use lib PSTDIR;
use TUSC::local;
use TUSC::archive;

#------------------------------------------------------ Define variables
my (%errs,%opts,$opts,$command,%args,@args,$args,@valid);
my ($fatal,$plural,$e,$out,%out,%retry,$count,$host);

#------------------------------------------------- Define error messages
%errs = define_error_table();

#-------------------------------------------- Define valid command names
%valid = define_valid_commands_and_options();

#--------------------------------------------------- Set default options
%opts = set_default_opts(\%errs);

#------------------------------------------------- Parse input variables
($opts,$args) = parse_input(\@ARGV,\%opts,\%errs,\%valid);
%opts=%$opts; @args=@$args;

#---------------------------------------------------------- Define %args
%args = define_args_hash(\%opts,\@args,\%errs);

# proceed only if $opts{fatal} is 0.
if ($opts{fatal}==0) {

#---------------------------------------- Call site-specific subroutines
#
# All site-specific routines take %opts, %args, and %errs as input.
# %opts contains the user-supplied options. %errs contains a table of
# error codes and messages.  %args contains the file name arguments 
# and status.  
#
# See documentation for each routine for a more detailed description 
# of the routine input and output.
#
  if    ($opts{command} eq "get") { 
    $host=$opts{DEFAULT_HOST};
    ($opts,$out)=archive_get(\%opts,$host,\%args,\%errs); #########
    %opts=%$opts; %out=%$out;
  }
  elsif ($opts{command} eq "put") { 
    ($opts,$out)=archive_put(\%opts,\%args,\%errs); ###############
    %opts=%$opts; %out=%$out;
  }
  elsif ($opts{command} eq "mv") { 
    ($opts,$out)=archive_mv(\%opts,\%args,\%errs);
    %opts=%$opts; %out=%$out;
  }
  elsif ($opts{command} eq "rm") { 
    ($opts,$out)=archive_rm(\%opts,\%args,\%errs);
    %opts=%$opts; %out=%$out;
  }
  elsif ($opts{command} eq "ls") { 
    $host=$opts{DEFAULT_HOST};
    ($opts,$out)=archive_ls(\%opts,$host,\%args,\%errs); ##########
    %opts=%$opts; %out=%$out;
  }
  elsif ($opts{command} eq "stat") { 
    $host=$opts{DEFAULT_HOST};
    %out=archive_stat(\%opts,$host,\%errs);  ######################
  }
  elsif ($opts{command} eq "mkdir") { 
    ($opts,$out)=archive_mkdir(\%opts,\%args,\%errs); #############
    %opts=%$opts; %out=%$out;
  }
  elsif ($opts{command} eq "rmdir") { 
    ($opts,$out)=archive_rmdir(\%opts,\%args,\%errs);
    %opts=%$opts; %out=%$out;
  }
  elsif ($opts{command} eq "chmod") { 
    ($opts,$out)=archive_chmod(\%opts,\%args,\%errs);
    %opts=%$opts; %out=%$out;
  }
  elsif ($opts{command} eq "chgrp") { 
    ($opts,$out)=archive_chgrp(\%opts,\%args,\%errs);
    %opts=%$opts; %out=%$out;
  }
  elsif ($opts{command} eq "chown") { 
    ($opts,$out)=archive_chown(\%opts,\%args,\%errs);
    %opts=%$opts; %out=%$out;
  }
}
  
#------------------------------------------------- Warn if fatal nonzero

if ($opts{fatal} > 0) { 
  if ($opts{fatal} == 1) {$plural=""} else {$plural="s"}
  warn sprintf $errs{FATAL_ERRS}{MSG},
    ($0,$opts{fatal},$plural,"archive(1)");
}

#------------------------------------------------------------------ Exit

exit $opts{fatal};

__END__

=pod

=head1 NAME

B<archive> - perform basic file-handling operations on the local 
archival storage system.

=head1 SYNOPSIS

S<B<archive> B<put> [putopts] F<file1> [F<file2 ...>]             >E<10>
S<B<archive> B<get> [getopts] F<file1> [F<file2 ...>]             >E<10>
S<B<archive> B<mv>* [mvopts] F<file1> [F<file2 ...>] F<target>     >E<10>
S<B<archive> B<rm>* [rmopts] F<file1> [F<file2 ...>]               >E<10>
S<B<archive> B<ls> [lsopts] [F<file1 ...>]                        >E<10>
S<B<archive> B<stat> [statopts] [I<name>]                         >E<10>
S<B<archive> B<mkdir> [mkdiropts] F<dir1> [F<dir2 ...>]           >E<10>
S<B<archive> B<rmdir>* [rmdiropts] F<dir1> [F<dir2 ...>]           >E<10>
S<B<archive> B<chmod>* [chmodopts] I<mode> F<file1> [F<file2 ...>] >E<10>
S<B<archive> B<chgrp>* [chgrpopts] group F<file1> [F<file2 ...>]   >E<10>
S<B<archive> B<chown>* [chownopts] owner[:group] [F<file1 ...>]    >E<10>

=head1 DESCRIPTION
 

B<archive> provides users with a uniform, site-independent command 
interface to the local archival storage system, eliminating the need 
to learn the myriad syntactical variations that appear at different 
institutions to perform essentially identical archival file management 
tasks.

=head1 COMMANDS

Each instance of B<archive> performs the file-management command
identified by B<archive>'s first argument: B<put>, B<get>, B<mv>*, 
B<rm>*, B<ls>, B<stat>, B<mkdir>, B<rmdir>*, B<chmod>*, B<chgrp>*, or 
B<chown>*.  Each command accepts a set of arguments as well as a 
set of command-line options described in the B<OPTIONS> section 
below.  If multiple archival storage facilities are available at
a particular site, all of the commands act on the the default 
system only, unless another system is specified with the 
S<B<-asf> I<host>> option.  The detailed behavior of the different 
commands is as follows:E<10>

* The current version of B<archive> does not support the commands 
indicated with asterisks (*).  Later versions will incorporate 
these additional commands.

=over 4

=item S<B<archive put> [putopts] F<file1> [F<file2 ...>]>

E<10>Copy files F<file1> [F<file2 ...>] from the current working 
directory to the archival storage system.  Options I<putopts> 
include B<-retry N>, S<B<-asf> I<host>>, S<B<-C> F<path>>, B<-s>,
B<-f>, B<-i>, B<-p>, B<-M>, B<-D>, B<-n>, S<B<-t> F<tarfile>>, B<-S>, 
and B<-z>.  F<file1> S<[F<file2 ...>]> may contain wildcards * and ?.  

The destination 
directory on the archival storage system may be specified using the 
S<B<-C> F<path>> option.  If S<B<-C> F<path>> is not present, the 
user's archival home directory will be used.  

Multiple files are combined with tar(1) before copying to the 
archival storage system if the S<B<-t> F<tarfile>> option is 
specified.  Files are tarred by first executing

tar cf - run10 run10.001, ..., run10.025 E<62> F<tarfile>

then F<tarfile> is transferred to the archival storage system.E<10>

=item S<B<archive get> [getopts] F<file1> [F<file2 ...>]  >

E<10>Copy files F<file1> [F<file2 ...>] in the archival storage 
system to the current working directory.  Options I<getopts> 
include B<-retry N>, S<B<-asf> I<host>>, S<B<-C> F<path>>, B<-s>, 
B<-f>, B<-i>, B<-p>, B<-M>, B<-n>, B<-S>, B<-x>, and B<-z>.  
If wildcards * and ? are used in specifying F<file1> [F<file2 ...>], 
then double quotes must be used:

S<archive get "F<file1> [F<file2 ...>]">

A source directory on the archival storage system may be specified 
using the S<B<-C> F<path>> option.  If S<B<-C> F<path>> is not 
specified, the user's archival home directory will be used.  

Archival storage files with a .tar, .tar.gz, or .tgz extension will 
be extracted with the B<gzip> and B<tar> utilities if the B<-x> 
option is present.  For example, if F<filea> is named run10.tgz, 
then B<archive get -x> will copy run10.tgz from the archival storage 
system, then execute 

gunzip -f < run10.tgz | tar xf -

Specifying B<-x> has no effect on files with extensions different 
from .tar, .tar.gz, or .tgz.E<10>

=item S<B<archive mv>* [mvopts] F<file1> [F<file2 ...>] F<target>>

E<10>Move F<file1> [F<file2 ...>] to the target location F<target>;
F<file1> [F<file2 ...>] and F<target> are all located on the archival 
storage system.  
Options I<mvopts> include B<-retry N>, S<B<-asf> I<host>>, 
S<B<-C> F<path>>, B<-s>, B<-f>, B<-i>, B<-M>, and B<-n>.
If F<target> is an existing directory, then F<file1> 
[F<file2 ...>] are moved into F<target>.  If F<target> does not exist,
S<B<archive mv>> will attempt to rename F<file1> as F<target>.  If
in this case [F<file2 ...>] is present or the names F<file1> and 
F<target> are identical, B<archive mv> will fail and write an error 
message to standard error.  If wildcards * and ? are used to specify 
F<file1> [F<file2 ...>] and F<target>, then double quotes must be used:

S<archive mv "F<file1> [F<file2 ...>]" "F<target>">

Note: do not include both F<file1> [F<file2 ...>] and F<target> in the
same set of quotes.  If wildcards are used to specify F<file1> 
[F<file2 ...>] or F<target>, but not both, then quotes need only
delineate the argument containing wildcards.E<10>

=item S<B<archive rm>* [rmopts] F<file1> [F<file2 ...>]       >

E<10>Remove F<file1> [F<file2 ...>] on the archival storage system.
Options I<rmopts> include B<-retry N>, S<B<-asf> I<host>>, S<B<-C> F<path>>, 
B<-s>, B<-f>, B<-i>, B<-r>, and B<-n>.  If wildcards * and ? are used 
to specify F<file1> [F<file2 ...>], then double quotes must be used:

S<archive rm "F<file1> [F<file2 ...>]">

When B<-r> is specified, directories are emptied of files and removed.
The user is prompted for removal of any write-protected files, except
when the B<-f> option is used.E<10>

=item S<B<archive ls> [lsopts] [F<file1 ...>]                >

E<10>List [F<file1 ...>] on the archival storage system.  Options 
I<lsopts> include B<-retry N>, S<B<-asf> I<host>>,
and other options identified by the manual page for B<ls> on the
archival storage system.  (See B<man archive_ls(3)> for a full list of 
I<lsopts>.)  If wildcards * and ? are used to specify [F<file1 ...>], 
then double quotes must be used:

S<archive ls "[F<file1 ...>]">

If [F<file1 ...>] is not specified, then the contents of the user's home 
directory on the archival storage system are listed.E<10>

=item S<B<archive stat> [statopts]                           >

E<10>Check and report status of archival storage system.
Options I<statopts> include B<-retry N>, S<B<-asf> I<host>>, and B<-s>.
An exit status of 0 is returned when the system is operational; a 
non-zero exit status is returned otherwise.  A message of the form 

hh:mm:ss mm/dd/yyyy F<name> I<status> [I<note>]

is printed to standard out, where S<hh:mm:ss mm/dd/yyyy> is the current
date, F<name> is the name of the archival storage system, I<status> is
either "on-line" or "unavailable", and I<note> is an optional text
message that may or may not be returned.E<10>

=item S<B<archive mkdir> [mkdiropts] F<dir1> [F<dir2 ...>]   >

E<10>Create directories F<dir1> [F<dir2 ...>] on the archival storage 
system.  Options I<mkdiropts> include B<-retry N>, S<B<-asf> I<host>>, 
S<B<-C> F<path>>, B<-s>, B<-p>, S<B<-m> I<mode>>, and B<-n>.E<10>

=item S<B<archive rmdir>* [rmdiropts] F<dir1> [F<dir2 ...>]     >

E<10>Remove empty directories F<dir1> [F<dir2 ...>] from the archival 
storage system.  Options I<rmdiropts> include B<-retry N>, 
S<B<-asf> I<host>>, S<B<-C> F<path>>, B<-s>, B<-p>, and B<-n>.  If 
F<dir1> [F<dir2 ...>] are not empty, B<archive rmdir> will fail and 
print an error message to standard error.  If wildcards * and ? are 
used to specify F<dir1> [F<dir2 ...>], then double quotes must be used:

S<archive rmdir "F<dir1> [F<dir2 ...>]">E<10>

=item S<B<archive chmod>* [chmodopts] I<mode> F<file1> [F<file2 ...>]>

E<10>Change permissions for F<file1> [F<file2 ...>] according to I<mode>. 
Options I<chmodopts> include B<-retry N>, S<B<-asf> I<host>>, 
B<-C> F<path>, B<-s>, B<-R>, and B<-n>.  I<mode> may be either symbolic 
or absolute.  See discussion of I<mode> syntax in the B<OPTIONS> section
 under S<B<-m> I<mode>>.E<10>

=item S<B<archive chgrp>* [chgrpopts] group F<file1> [F<file2 ...>]>

E<10>Change the group ID of F<file1> [F<file2 ...>] to I<group>, which
may be a decimal group ID or a group name found in the group file.
Options I<chgrpopts> include B<-retry N>, S<B<-asf> I<host>>, B<-C> F<path>,
B<-s>, B<-f>, B<-h>, B<-R>, and B<-n>.  Only an appropriately authorized
user may change the group of a file that is owned by another user.
Unless users are appropriately authorized, they must be a member of the
specified group to change the group of a file.  Unless the user is
appropriately authorized, B<archive chgrp> clears the set-user-ID and
set-group-ID file mode bits.E<10>

=item S<B<archive chown>* [chownopts] owner[:group] F<file1> [F<file2 ...>]>

E<10>Change the owner of F<file1> [F<file2 ...>] to I<owner>;  The 
optional I<group> argument changes the group.  Options I<chownopts> 
include B<-retry N>, S<B<-asf> I<host>>, B<-C> F<path>, B<-s>, B<-f>, 
B<-h>, B<-R>, and B<-n>.  Only an appropriately authorized user may 
change the owner of a file.  Unless the user is appropriately authorized,
B<archive chown> clears the set-user-ID and set-group-ID file mode 
bits.E<10> 

=back

=head1 OPTIONS
 

By default, B<archive> assumes all arguments beginning with a dash (-)
are command-line options.  Therefore, when B<archive> encounters an input
item of the form -?, where ? does not match any of B<archive>'s valid
options, B<archive> will print a message to standard error and return a
non-zero exit status.  Because this default behavior does not allow the
user to specify file or directory names that begin with a dash, you can
override this default behavior by including a dash-dash (--) in the
argument list.  When B<archive> encounters a dash-dash, it stops
parsing command options and assumes all following input items are 
command arguments.

* archive currently does not support the options identified below with 
asterisks (*).  There are two acceptions: B<-f> is assumed, but sufficient
error checking to enforce B<-f> for all possible errors is not currently
enabled.  Also, S<B<-m> I<mode>> currently only handles asolute (as
opposed to sybmolic) mode specification.  Later versions of archive will
include all of the options and behavior listed below.

=over 5

=item B<-C> F<path>

Manage files in the archival storage directory F<path>.  F<path> is 
the user's archival storage home directory by default.  

=item B<-M>

Once a file has been written or accessed on the archival storage 
system, migrate the file to offline media.  Some centers do not support
this operation.  This option may prove useful for users with numerous 
large data files to move, which, if moved all at once, would threaten 
to fill the first tier of the archival storage system.  By specifying 
B<-M> the user immediately makes room for subsequent archival data 
transfers.

=item B<-R>*

Execute command recursively in all directories and subdirectories in
the argument list.  Same as B<-r>.  B<archive chmod> only takes B<-R>,
since B<-r> can have a different meaning for this command. 

=item B<-asf> I<host>

Archival Storage Facility: execute command on the archival storage
facility I<host>.  This option overrides the default specified by
the $ARCHIVE_HOST environment variable.

=item B<-D>

Delete the local copy of source files involved in data transfer.  If
not set, all local source files are retained.  Note, "source files"
refers to files before they have been tarred with the B<-t> option or
local files that are not tarred before transfer.  "Source files" are 
not the intermediate tarfiles generated when B<-t> is envoked.  
Intermediate tarfiles generated with B<-t> are deleted by default 
upon successful transfer.  To override this default behavior and 
retain intermediate tarfiles, use B<-S>.

=item B<-S>

Save local copies of transferred tarfiles. 

=item B<-f>*

Force creation, deletion, or modification of files/directories, 
possibly overriding permissions set on existing files/directories 
owned by the user.  When B<-f> is not present, the user is prompted 
interactively to override permission settings on files owned by the 
user.  Regardless the presence of B<-f>, B<archive> will not attempt 
to change permissions on files or directories not owned by the user.
When used in conjunction with B<archive chgrp> or B<archive chown>, 
the B<-f> option is identical to B<-s>.

=item B<-h>*

If the file is a symbolic link, change attributes of the symbolic
link.  If B<-h> is not used, attributes of the file referenced by
the symbolic link are changed.

=item B<-i>*

Force interactive confirmation by the user before overwriting or
removing an existing file or directory, both on the archival storage
system and local disks.  If B<-i> is not set and permission settings 
permit, B<archive> will overwrite or remove existing files/directories.

=item B<-m> I<mode>*

Specify the permissions for the directory or file according to 
I<mode>.  The syntax for I<mode> described here is also used by the 
B<archive chmod> command.  

File I<mode>s may be either symbolic or absolute.  Absolute changes 
to modes are specified with octal numbers, with I<mode> of the form 
I<nnn>.  Here I<n> is a number from 0 to 7.

An absolute mode is given as an octal number constructed from the
"OR" of the following modes:

=over 8

=item S<Code>

Description

=item S< 400>

Sets read permission for owner.

=item S< 200>

Sets write permission for owner.

=item S< 100>

Sets execute (search in directory) permission for owner.

=item S< 070>

Sets read, write, and execute (search in directory)
permission for group.

=item S< 007>

Sets read, write, and execute (search in directory)
permission for others. 

=back

Symbolic changes to modes are specified with mnemonic characters, with
I<mode> of the form 

[I<who>] I<operator> [I<permissions>]

Note: if I<who> is specified, there should be no space between I<who>
and I<operator>; similarly, if I<permissions> is specified, there should
be no space between I<operator> and I<permissions>.
Here I<who>
is one or more characters that corresponds to user, group, other, or all
(B<u>, B<g>, B<o>, or B<a>); I<operator> is +, -, or =,
signifying the addition, removal, or absolute assignment of permissions;
and I<permissions> is one or more characters that correspond to the type
of permission, e.g., read (B<r>), write (B<w>), execution (B<x>)
for a given file.  For example, to add read and write permission for 
group, I<mode> should be g+rw.

Multiple symbolic modes separated by commas can be specified, although 
no spaces may intervene between these modes.  Operations are performed
in the order given.  Multiple symbolic letters following a single
operator cause the corresponding operations to be performed
simultaneously.

By default I<mode> is 777 (a+rwx) minus the bits set in the umask
on the archival storage system.

=item B<-n>*

Execute B<archive> in background mode and return zero exit status.
If B<-i> is set, B<-n> is ignored.

=item B<-p>

Create all non-existent parent directories in a specified archival 
storage system path.  By default, if the parent directory of a target 
pathname does not exist, B<archive> will fail and write an output error 
message to standard error.

=item B<-r>

Execute command recursively in all directories and subdirectories in
the argument list.  Same as B<-R>.  B<archive chmod> only takes B<-R>,
since B<-r> has a different meaning for this command. 

=item B<-retry N>

If B<archive> fails, retry B<N> times, with increasing wait time 
between repeated attempts.  Currently B<archive> sleeps for 5 seconds
plus two seconds for every iteration attempt.  Default is B<-retry 1000>.

=item B<-s>

Suppress printing of any output messages to standard error or 
standard out, except for usage errors.

=item B<-t> F<tarfile>

This option handles archival storage files created with tar(1).
See B<DESCRIPTION> section for B<archive put> above for discussion 
of the B<-t> option.

=item B<-x>

Extract (possibly gzipped) tarfiles after transfer from archival 
storage with B<archive_get>.

=item B<-z>

Use gzip(1) to compress/uncompress files transfered to/from
archival storage.  The compressed file will have a .gz extension.
If B<-t> is also set, then the output from tar(1) will be piped to
gzip(1), and the resulting compressed tar file will have a .tgz
extension.  See B<EXAMPLES> section below.

=back

=head1 DIAGNOSTICS

=head1 EXAMPLES

=head1 ENVIRONMENT

The following environment variables are checked by B<archive>:
$ARCHIVE_HOST, $ARCHIVE_HOME, and $ARCHIVE_PROBE.  If set,
B<archive> overrides default settings established during 
installation.

Each environment variable is a comma-separated list (with no white
space), representing multiple archival storage facilities.  Currenlty
all B<archive> commands operate on only one system and use the first
element of each list, which is interpretted by B<archive> as the 
default.  A different system may be specified with the B<-asf> option.
In the future, B<archive get>, B<stat>, and B<ls> will be modified to
operate on multiple systems, sequencing through the entries in the 
lists for these environment variables.

=over 4

=item B<$ARCHIVE_HOST>

List of archival storage systems available at this site.  
S<B<-asf> I<host>> overrides this environment variable.

=item B<$ARCHIVE_HOME>

List of archival storage system home directories associated with 
$ARCHIVE_HOST.  $ARCHIVE_HOME must contain either only one element,
which is used for all elements of $ARCHIVE_HOME, or a separate 
element for each system specified by $ARCHIVE_HOST.

=item B<$ARCHIVE_PROBE>

List of flags ("yes" or "no") that indicate whether or not B<archive> 
will attempt to check the existence, type, and size of files on the 
archival storage system before and after transfer so that error 
checking is possible.  A reason for setting $ARCHIVE_PROBE="no" could 
be failure in accurate or complete reporting of file information by 
the archival storage system.

=back

=head1 FILES

=head1 NOTES

B<archive> is a command-line translation tool in the TUSC layer of the 
Practical Supercomputing Toolkit (PST).  By using B<archive> (and other
elements of PST), users develop scripts and techniques that work, 
without modification, at all institutions supported by PST.

Centers currently supported include ERDC, NAVO, ASC, AHPCRC.

=head1 SEE ALSO

archive_put(3), 
archive_get(3), 
archive_mv(3)*,
archive_rm(3)*,
archive_ls(3),
archive_stat(3),
archive_mkdir(3),
archive_rmdir(3)*,
archive_chmod(3)*,
archive_chgrp(3)*,
archive_chown(3)*.


=head1 AUTHOR

Written and Copyright (C) 2001 by Joseph Werne and Michael Gourlay.
www.pstoolkit.org.

=head1 HISTORY

Documentation v1.0 assembled Friday, July 13, 2001.


