################################################################################
#
#  kPerl Core Library Group
#  Library for Physical File Handling
#
#  copyright (c)
#    UPenn Philadelphia, Center for Bioinformatics, 2004
#    Institute of Molecular Biotechnology Jena, Dept. Genome Analysis, 1998-2001,2003-2004
#  author
#    Karol Szafranski, karol.szafranski@leibniz-fli.de
#
################################################################################
#
#  DESCRIPTION
#
# - purpose:
#   ...
#   functions in this module are in close neighborhood to functions in
#   module MainLib::Path. See there for details.
#
# - dependencies:
#   - Some aspects of the implementation are specific for UNIX/Linux systems.
#
# - individual description of functions at the beginning of the code blocks
#
################################################################################
#
#  FUNCTIONS, DATA
#
#   @EXPORT
#   %LibGlob
#
# - file properties
#   &owner
#   &ftime
#   &atime
#   &wc_l
#
# - physical handling of files
#   see also MainLib::Path.pm
#   &ReadDir
#   &ReadFile
#   &WriteFile
#   &FileLink
#   &FileCopy
#   &mv
#   &touch
#
# - I/O handles, logging concept
#   &Unbuffer
#   $LibGlob{Log*}
#   &LogOpen
#   &LogClose
#   &TrueStdout
#
#
#  STD OPTIONS
#
#   -debug      print debug protocol to STDERR
#
################################################################################

package MainLib::File;

# includes
use strict; #use warnings;  # OK 20090220
use FileHandle;
use MainLib::StrRegexp qw(&packWX &TimeStr);
use MainLib::Path;
use MainLib::Misc qw(&MySub);
use Math::kCalc qw(&Min &Max);

# symbol export
use Exporter qw(import);
our @EXPORT = qw (
  &owner &ftime &atime &wc_l
  &ReadDir &ReadFile &WriteFile &FileLink &FileCopy &mv &touch
  &Unbuffer &LogOpen &LogClose &TrueStdout
  );

# package-wide constants and variables
our (%LibGlob);


################################################################################
# file properties
################################################################################


# owner of a file
#
# INTERFACE
# - argument 1: file path
# - return val: - owner of the file
#               - undef if an error occurred
#
sub owner {
  my ($path) = @_;

  # read info of the file
  return getpwuid ((stat $path)[4]);
}


# file modification time since the epoch
#
# INTERFACE
# - argument 1: pathname to evaluate
# - return val: - ftime
#               - undef if an error occurs
#
sub ftime {
  my ($path) = @_;
  unless (-e $path) { return undef }

  # evaluate file modification time and return
  return (stat $path)[9];
}


# most recent access / modify / inode change time since the epoch
#
# INTERFACE
# - argument 1: pathname to evaluate
# - return val: - atime
#               - undef if an error occurs
#
# DESCRIPTION
# - atime may be different from but always >= ftime.
#
sub atime {
  my ($path) = @_;
  unless (-e $path) { return undef }

  # evaluate file access and return
  return &Max ( (stat $path)[8,9,10] );
}


# perform wc -l on file
#
# INTERFACE
# - argument 1: pathname or piped call
# - return val: - number of lines in file
#               - undef if an error occurred
#
sub wc_l {
  my ($path) = @_;
  unless (-e $path) { return undef }

  # process call
  my $call = ($path =~ m/\|\s*$/) ? "$path wc -l" : "wc -l $path";

  # start process
  my $count = do{
    my $hIn = FileHandle->new("$call |");
    unless ($hIn) { return undef }
    else {
      my $line = <$hIn>;
      close $hIn;
      unless ($line) { return undef }
      unless ($line=~m/\d+/) { return undef }
      $&;
    }
  };

  # exit SUB
  return $count;
}


################################################################################
# physical handling of files
################################################################################


# read all entries of a directory
#
# INTERFACE
# - argument 1: directory path
#
# - options:
#   -debug      [STD]
#   -expand     expand filenames to pathnames (incl. directory as specified)
#   -select     regexp for directory entry filtering
#
# - return:     - array of directory entries
#               - undef if an error occurs
#
# DESCRIPTION
# - directory entries '.' and '..' are not returned
# ? are the returned directory entries sorted? [20030517]
#
sub ReadDir {
  my ($PathDir, %opt) = @_;
  my $debug = $opt{-debug} || 0;
  $PathDir =~ s|/+$||;
  my $slc = $opt{-select};

  # open directory for reading
  unless (opendir (DIRECTORY,$PathDir)) {
    $debug and printf STDERR "%s. ERROR: unable to read directory %s\n", &MySub, $PathDir||"''";
    return undef;
  }

  # select entries
  my ($CtEntry,@ReturnEntry);
  while (my $ItEntry = readdir(DIRECTORY)) {
    ++ $CtEntry;
    if ($slc) {
      if ($ItEntry =~ m/${slc}/) {
        push @ReturnEntry, $opt{-expand} ? ($PathDir.'/'.$ItEntry):$ItEntry;
      }
    } elsif ($ItEntry !~ m/^\.{1,2}$/) {
      push @ReturnEntry, $opt{-expand} ? ($PathDir.'/'.$ItEntry):$ItEntry;
    }
  }
  closedir DIRECTORY;
  $debug and printf STDERR "%s. directory %s read\n  primary entries: %d\n  returned entries: %d\n", &MySub,
    $PathDir||"''", $CtEntry, int(@ReturnEntry);
  $debug>1 and printf STDERR "  list of entries:\n%s", join ('',map{ "    $_\n" }@ReturnEntry);

  # return entries
  return @ReturnEntry;
}


# read file to scalar
#
# INTERFACE
# - argument 1: - file path / input pipe call
#               - filehandle reference (closed after the procedure)
#
# - options:
#   -debug       [STD]
#   -LineLimit   read up to specified number of lines
#
# - return val: - wantscalar: document string
#               - wantarray: array of lines
#               - undef if an error occurred
#
# DESCRIPTION
# - in case of a file handle typeglob reference given, the handle is left open
#   by this procedure
#
sub ReadFile {
  my ($ArgIn,%opt) = @_;
  my $bMe = ((getlogin()||getpwuid($<)) eq 'szafrans') ? 1 : 0;
  my $debug = $opt{-debug};

  # open source file
  my $hIn;
  if (ref($ArgIn) =~ m/\b(FileHandle|GLOB)/) {
    $hIn = $ArgIn;
  } elsif ($ArgIn eq '-') {
    $hIn = \*STDIN;
  } else {
    # FileHandle->new() without 2nd-arg 'r' also passes for piped input expressions
    unless ($hIn = FileHandle->new($ArgIn)) {
      printf STDERR "%s. ERROR: unable to open %s%s\n", &MySub,
        $ArgIn||"''", $debug?', callers:':'';
      if ($debug||$bMe) {
        my ($CtI, $CurrCaller);
        while ($CurrCaller=(caller(++$CtI))[3] and $CtI<=3) {
          printf STDERR "  %s\n", $CurrCaller;
        }
      }
      return undef;
    }
  }
  binmode $hIn;

  # read source
  my @buffer;
  if ($opt{-LineLimit}) {
    while (defined($_=<$hIn>) and $.<=$opt{-LineLimit}) {
      push @buffer, $_;
    }
  } else {
    while (<$hIn>) { push @buffer, $_; }
  }

  # return file content
#  $debug and printf STDERR "%s. returning %d lines, last one:\n%s", &MySub, int(@buffer), $buffer[-1];
  return wantarray ? @buffer : join('',@buffer);
}


# print scalar to file
#
# INTERFACE
# - argument 1: - file path / output pipe
#               - filehandle reference
#               - string "-" interpreted as STDOUT
# - argument 2: string to be written
#
# - options:
#   -append     append output in existing file
#               Append mode automatically applies for filehandle output
#               arguments
#
# - return val: - success status (boolean)
#
# DESCRIPTION
# - in case of a file handle reference given, the file is closed
#
sub WriteFile {
  my ($ArgOut,$data,%opt) = @_;

  # take over supplied handle
  my $hOut;
  if (ref($ArgOut) =~ m/\b(FileHandle|GLOB)/) {
    $hOut = $ArgOut;
  } elsif ($ArgOut eq '-') {
    $hOut = \*STDOUT;
  }
  # ... or open target file
  else {
    $hOut = FileHandle->new($ArgOut,$opt{-append}?'a':'w');
    $hOut or return undef; 
  }

  # output
  print  $hOut $data;

  # return successfully
  return 1;
}


# create symbolic link to target file
#
# INTERFACE
# - argument 1: path of target file (to be linked)
# - argument 2: name/path of symbolic link
#
# - return val: success status (boolean)
#
# DESCRIPTION
# - linking will be enforced that way that an existing file/link will be
#   deleted in order to create the link as wanted
#
sub FileLink {
  my ($FileTgt,$FileLink,%opt) = @_;

  # argument validation
  unless ($FileLink and $FileTgt) { return 0 }
  if ($FileLink eq $FileTgt) { return 0 }

  # remove existing link
  if (-e $FileLink or -l $FileLink) {
    unlink ($FileLink);
  }

  # link file, exit SUB
  return symlink ($FileTgt, $FileLink);
}


# copy file
#
# INTERFACE
# - argument 1: source path, source file handle
# - argument 2: target path (may be directory path only), target file handle
#
# - options:
#   -debug      [STD]
#
# - return val: success status (boolean)
#
# DESCRIPTION
# - as an enhancement to &File::Copy::copy, check for existing links for the
#   target filename. Remove it if it's a linked file (like option -f to
#   system's cp).
#
# DEVELOPERS' NOTES
# - this is just framework/wrapping around &File::Copy::copy. Core functionality
#   resides in that function.
#
sub FileCopy {
  require File::Copy; File::Copy->import('&copy');
  my ($src,$tgt,%opt) = @_;
  my $debug = $opt{-debug};
  $debug and printf STDERR "%s. copying %s to %s\n", &MySub, $src||"''", $tgt||"''";

  # unlink for existing links at target location
  if (!ref($tgt) and -l $tgt) {
    $debug and printf STDERR "%s. target %s already exists as a link\n", &MySub, $tgt||"''";
    if (-d &PathLinkResol($tgt)) {
      $debug and printf STDERR "  is a linked directory path, keeping it\n";
    } else {
      $debug and printf STDERR "  removing it\n";
      unlink ($tgt);
    }
  }

  return &copy ($src, $tgt);
}


# simulate system's mv -f
#
# INTERFACE
# - argument 1: source path
# - argument 2: target path, may be directory path only
#
# - options:
#   -debug      [STD]
#
# - return val: success status (boolean)
#
# DESCRIPTION
# - this &mv works across physical file system borders
# - this &mv moves symbolic links as if they were files
#
sub mv {
  my ($src,$tgt,%opt) = @_;
  my $debug = $opt{-debug};
  $debug and printf STDERR "%s. moving %s to %s\n", &MySub, $src||"''", $tgt||"''";

  unless (-e $src) {
    $debug and printf STDERR "%s. ERROR: source file %s does not exist\n", &MySub, $src||"''";
    return 0;
  }

  # target is specified only by directory path
  if (-d $tgt) {
    $debug and printf STDERR "%s. target is an existing directory\n", &MySub;
    $tgt = $tgt .'/'. &PathSplit($src)->{name};
  }

  # source is a link
  # - unlink source, set new link at target location
  if (-l $src) {
    $debug and printf STDERR "%s. source is a link\n", &MySub;
    my $SrcLtgt = &PathLinkResol ($src);
    $debug and printf STDERR "%s. link target: %s\n", &MySub, $SrcLtgt;
    unlink ($tgt);
    return (symlink($SrcLtgt,$tgt) and unlink($src));
  }

  # copy / unlink for files
  if (-f $src) {
    $debug and printf STDERR "%s. source is a regular file\n", &MySub;
    if (-w $src) {
      unlink ($tgt);
      return (&FileCopy($src,$tgt) and unlink($src));
    } else {
      return 0;
    }
  }

  # unsuccessful return
  printf STDERR "%s. ERROR: no category for source or source doesn't exist\n", &MySub;
  return 0;
}


# touch
#
# INTERFACE
# - argument 1: pathname to touch
# - return val: success status (boolean)
#
sub touch {
  my ($path) = @_;
  return FileHandle->new($path,'a') ? 1 : 0;
}


################################################################################
# I/O handles, logging concept
################################################################################


# unbuffer file handle
#
# INTERFACE
# - argument 1*: typeglob reference to file handle, default: STDOUT
# - return val:  typeglob reference to affected file handle
#
sub Unbuffer {
  my $hdl = shift() || \*STDOUT;

  # select handle, save old selection
  my $OldHdl = select $hdl;

  # change to unbuffered mode
  $| = 1;

  select $OldHdl;
  $hdl;
}


$LibGlob{LogFile} = undef;


# open LOG file, redirect STDOUT and STDERR
#
# INTERFACE
# - options:
#   -file       specify a file path for logging
#   -prog       identity of the program, printed at the header of the
#               LOG file.
#   -stamp      phrase in creating the filename of the LOG file
#
sub LogOpen {
  require MainLib::FileTmp; MainLib::FileTmp->import('&PathUnique');
  my (%opt) = @_;

  # i can handle only one LOG file at a time
  if ($LibGlob{LogFile}) {
    printf STDERR "%s. ERROR: 2nd trial to initiate logging\n", &MySub;
    return;
  }
  my $LogDir = &PathLogdir() or exit 1;

  # function parameters
  my $prog  = $opt{-prog}  || $0;
  my $stamp = $opt{-stamp};
  if (!defined($stamp)) { (undef,$stamp) = ($prog=~m|^(.*/)?(\w+)|) }
  $LibGlob{LogFile} = $opt{-file} || &PathUnique (
    -dir     => $LogDir,
    -name    => sprintf ("%s%s.log",
                &TimeStr (-format=>'CompactComp'),
                $stamp ? "_${stamp}" : '',
                ),
    -touch => 1,
    );

  # open LOG file, redirect STDOUT and STDERR
  open (SAVESTDOUT, ">&STDOUT");
  open (SAVESTDERR, ">&STDERR");
  unless (open (STDOUT, ">>$LibGlob{LogFile}")) {
    printf STDERR "%s. ERROR: unable to open log file %s for writing\n", &MySub,
      $LibGlob{LogFile}||"''";
    exit 1;
  }
  open (STDERR, ">&STDOUT");

  # unbuffer output
  &Unbuffer (\*STDOUT);
  &Unbuffer (\*STDERR);

  # output LOG file header
  print  "LOG output - $prog\n";
  printf "  process ID: %d\n", $$;
  printf "  time: %s\n", &TimeStr();
  printf "  KPERLPATH: %s\n", $ENV{KPERLPATH}||$ENV{PERLPATH}||"''";
  print  "\n";
}


# close LOG file, redirect STDOUT
#
sub LogClose {

  # restore STDOUT and STDERR
  close STDOUT;
  close STDERR;
  open (STDOUT, ">&SAVESTDOUT");
  open (STDERR, ">&SAVESTDERR");

  # restore environment
  undef $LibGlob{LogFile};
}


# return reference for original STDOUT
#
# INTERFACE
# - return val: GLOB reference for original STDOUT
#
# DESCRIPTION
# - STDOUT may have been redirected in &LogOpen.
#
sub TrueStdout {

  # restore STDOUT and STDERR
  if (defined (fileno (SAVESTDOUT))) {
    return \*SAVESTDOUT;
  } else {
    return \*STDOUT;
  }
}


1;
# $Id: File.pm,v 1.27 2018/06/05 18:02:56 szafrans Exp $
