################################################################################
#
#  kPerl File I/O Library Group
#  Perl Object Library for Input-Buffered File Handle
#
#  copyright (c)
#    UPenn Philadelphia, Center for Bioinformatics, 2004 
#    Institute of Molecular Biotechnology Jena, Dept. Genome Analysis, 2003-2004          
#  author
#     Karol Szafranski, karol.szafranski@leibniz-fli.de
#
################################################################################
#
#  DESCRIPTION
#
# - purpose:
#   a non-physical file handle (e.g. an input stream from a pipe) cannot
#   perform the seek command. But, this may be needed if first a file format
#   is determined in one separate function and then the data itself is
#   read in another separate function.
#   This object provides a solution that way that the object organizes a
#   string buffer along with the represented file handle. A line that's once
#   read can be unshifted to the object (method ungets()) and will be returned
#   again by a call of $pObj->getline() or <$pObj>.
#
# - the container architecture of the string buffer
#   is of a stack type. That means, the most recent string that was buffered
#   by ungets() will be the first one to be read again from the handle object.
#
#     h := initially leading character in the original input handle
#     x := any character that has been read from the original input handle
#     f := a character that lies ahead of read position of the original
#          input handle
#     1,2 := a character that lies ahead of read position of the input buffer,
#          first and second reading command, respectively
#     ^ := character that lies immediatly ahead of read position of the
#          input handle object
#
#     a) read from input handle, e.g. Perl code `$str1 = <$h>;'
#        hxxxxxxxxxxxxffffffffffffffffffffffffffffffffffffffffffffffff...
#                     ^
#        again, read from input handle, Perl code `$str2 = <$h>;'
#        hxxxxxxxxxxxxxxxxxxxxxxxxffffffffffffffffffffffffffffffffffff...
#                                 ^
#
#     b) buffer SECONDLY read string to handle object, Perl code
#        $h->ungets($str2);'
#        hxxxxxxxxxxxxxxxxxxxxxxxxffffffffffffffffffffffffffffffffffff...
#                     222222222222
#                     ^
#        then, buffer FIRSTLY read string to handle object, Perl code
#        $h->ungets($str1);'
#        hxxxxxxxxxxxxxxxxxxxxxxxxffffffffffffffffffffffffffffffffffff...
#        1111111111111222222222222
#        ^
#
#   It is recommended to use ungets() in the outlined way such that the
#   order of characters is always restored to the original order. Keep
#   attention if you use chop() or chomp() to process the input.  E.g.,
#   Perl code
#
#     $str = <$h>;
#     chomp($str);
#     #...
#     $h->ungets($str);
#
#   will not restore the handle to the original situation.
#
# - recorder function
#   A convenient way to restore the previous read position of an input handle
#   is to use the recorder mode. The method record() will invoke recording
#   of all subsequent read actions. After some reading, the previous read
#   position can be restored by calling rewind().
#   The current limitation is that you cannot do buffered reading while
#   recording at the same time.
#
# - individual description of functions can be found at the beginning of the
#   code blocks
#
################################################################################
#
#  OBJECT OPERATIONS
#
#   <> (scalar)    alias to method getline()
#   <> (array)     *** not implemented ***
#   bool           filehandle definition status
#
#
#  OBJECT METHODS  for external access
#
# - specific
#   new            create filehandle object
#                  - from existing FileHandle or typeglob reference
#                  - from scratch calling FileHandle->new()
#   new_from_fd    create filehandle object
#   ungets         unshift line string(s) to buffer
#
# - FileHandle behaviour
#   open           *** not implemented ***
#   tell           return current position in stream
#   eof            like standard perl function
#   seek           *** not implemented ***
#   getline        read line from object
#
#
#  OBJECT DATA STRUCTURE
#  (hash)
#
#   buffer         buffer of unread strings; array reference
#   FH             reference to original filehandle
#   bRecord        recording is active
#
################################################################################
#
#  FUNCTIONS, DATA
#
#   overload
#   $DEBUG
#
# - housekeeping
#   &new
#   &_FH_use
#   &_buff_is_head
#
# - buffer interface
#   &ungets
#   &_push
#
# - recording mode
#   &buffrecord
#   &buffrewind
#
# - mediate FileHandle behaviour
#   &tell
#   &eof
#   &seek  *** not implemented ***
#   &getline
#   &AUTOLOAD
#     close   works via &AUTOLOAD
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - currently, unshifted or pushed scalars that get stored in $this->{buffer}
#   are not checked for being real lines rather than non-line substrings or
#   multi-line strings.
#
# - missing methods that are defined for FileHandle
#   - first, see section "OBJECT METHODS" for methods having stated
#     "*** not implemented ***"
#   - read() is missing because of a non-conventional calling syntax of read():
#     The function is expected to write to a scalar argument.
#     [check this again for FileHandle object]
#
# - BUG
#   overload of tell() does not work in a non-OO calling fashion, like
#     $pos = tell $pFileHandleInBuffer;
#   It did neither work in any previous version (of MainLib::FileHandleInbuff).
#   The problem is that we are not able to overload perl's built-in function.
#
# - look also for notes in the header of each function block
#
################################################################################

package FileHandle::InBuffer;

# includes
#use strict; use warnings;  # OK 20041106
use FileHandle;  # we'll propagate to methods of that module!
use MainLib::StrRegexp qw($reEndl);
use Math::kCalc qw(&Sum);

# overload
use overload (
'bool'      => sub { $_[0]->{FH} },
'tell'      => "tell",
'*{}'       => sub { $_[0]->_FH_use() },
            # this is needed for expected functioning of FileHandle in non-OO
            # fashion expressions, like `$pos = tell $pFileHandleInBuffer;'
#'<=>'       => sub { $_[2] ? fcmp ($_[1],${$_[0]}) : fcmp (${$_[0]},$_[1]) },
'<>'        => sub { wantarray ? $_[0]->getlines() : $_[0]->getline() },
            # the explicit function rather depends on wantscalar/wantarray
);

# package-wide constants and variables
our $DEBUG;


################################################################################
# housekeeping
################################################################################


# constructor
#
# INTERFACE
# - argument 1+: arguments
#                - typically file or filehandle argument
#                - but may be a list of arguments working with the FileHandle
#                  constructor new()
#
# - return val:  - object reference
#                - undef if an error occurs
#
sub new {

  # initialize object
  bless (my $this={});
  my (undef,@arg) = @_;

  # construct filehandle from argument(s)
  if (my $ArgRef = ref($arg[0])) {
    if ($ArgRef =~ m/\bFileHandle/) {
      $this->{FH} = $arg[0];
    } elsif ($ArgRef eq 'GLOB') {
      $this->{FH} = FileHandle->new_from_fd($arg[0],'r');
    }
  }
  $this->{FH} ||= do {
    my $h = eval "FileHandle->new(\@arg)";
    if (!defined($h)) {
      die "ERROR: failed to construct FileHandle\n",
        $@ ? sprintf ("  error message: %s",$@):'',
        sprintf ("  called from %s in %s, line %s\n",
          caller(1) ? map{s/^-e$/cmd line script/;$_}(caller(1))[3,1,2] : ("*unknown*, probably main",'?','?') );
    }
    $h;
  };
  $DEBUG and printf STDERR "%s. generated filehandle %s\n", (caller(0))[3], $this->{FH};

  # initialize line buffer
  $this->{buffer} = [];

  # misc. attributes
  $this->{bRecord} = 0;

  # return object
  return $this;
}


# another constructor
#
# INTERFACE
# - argument 1: arguments for FileHandle->new_from_fd()
# - return val: - object reference
#               - undef if an error occurs
#
sub new_from_fd {
  my $class = shift;
  my $h = FileHandle->new_from_fd(@_);
  return $class->new($h);
}


# access wrapped FileHandle
#
sub _FH_use {
  my ($this) = @_;
  my $FH = $this->{FH};
  if (! $FH) {
    die sprintf ("ERROR: undefined FileHandle in %s, called from %s",
      (caller(0))[3], (caller(1))[3]);
  }
  return $FH;
}


# determine if the buffer is head of reading queue
#
# DESCRIPTION
# - this is mostly useful to routinely check if the buffer is not in recording
#   mode AND there is anything to read from the buffer.
#
sub _buff_is_head {
  my ($this) = shift;
  return (!$this->{bRecord} and int(@{$this->{buffer}}));
}


################################################################################
# buffer interface
################################################################################


# unshift string(s) to buffer of filehandle object
#
# INTERFACE
# - argument 1*: string(s)
#
# DESCRIPTION
# - see the remarks on buffer architecture
#
sub ungets {
  my $this = shift;
  # do not buffer empty strings!
  my @line = grep{length($_)} @_;
  # add lines to buffer
  unshift @{$this->{buffer}}, @line;
}


# push string(s) to buffer of filehandle object
#
# INTERFACE
# - argument 1*: string(s)
#
# DESCRIPTION
# - see the remarks on buffer architecture
#
sub _push {
  my $this = shift;
  # do not buffer empty strings!
  my @line = grep{length($_)} @_;
  # add lines to buffer
  push @{$this->{buffer}}, @line;
}


################################################################################
# recording mode
################################################################################


# remind $this->{bRecord} reflecting recorder activity


# start recording, activate reading from buffer
#
# INTERFACE
# - argument 1*: string(s)
#
sub buffrecord {
  my ($this) = @_;
  # start recording
  # reading from head of original FileHandle becomes active automatically
  $this->{bRecord} = 1;
}


# stop recording, activate reading from buffer
#
# INTERFACE
# - argument 1*: string(s)
#
sub buffrewind {
  my ($this) = @_;
  # warn for inactive recording mode
  if (!$this->{bRecord}) {
    printf STDERR "%s. WARNING: called in non-recording mode\n", (caller(0))[3];
  }
  # stop recording (if active)
  # reading from head of buffer becomes active automatically
  $this->{bRecord} = 0;
}


################################################################################
# mediate FileHandle behaviour
################################################################################


# current position in "virtual" filehandle
#
# INTERFACE
# - return val: current read position
#
sub tell {
  my ($this) = @_;

  my $pos = $this->_FH_use()->tell();
  if ($this->_buff_is_head()) {
    $pos -= &Sum ( map{length($_)} @{$this->{buffer}} );
  }
  return $pos;
}


# flag end of "virtual" filehandle
#
# INTERFACE
# - return val: end flag
#
sub eof {
  my ($this) = @_;

  # anything in buffer?
  $this->_buff_is_head() and return 0;

  # anything in original filehandle?
  return $this->_FH_use()->eof();
}


# jump to position in filehandle stream - not possible
#
sub seek {
  my ($this) = shift;
  # buffer is in use -> Hm...
  if ($this->_buff_is_head()) {
    $DEBUG and printf STDERR "%s. ERROR: seek method not implemented, sorry\n", (caller(0))[3];
    return 0;
  }
  # original FileHandle is at head of reding -> no need to hussle
  else {
    return $this->_FH_use()->seek(@_);
  }
}


# read line from "virtual" filehandle
#
# INTERFACE
# - return val: line string
#
sub getline {
  my $this = shift;
  my $line;

  # if active, read from buffer
  my ($bGot, $bCplete);
  if ($this->_buff_is_head()) {
    while (my $sBuf = scalar shift @{$this->{buffer}}) {
      $bGot = 1;
      if ($sBuf =~ m/$reEndl/o) {
        ($line||='') .= $` . $&;
        if (length($')) {
          unshift @{$this->{buffer}}, $';
        }
        $bCplete = 1;
        last;
      } else {
        ($line||='') .= $sBuf;
      }
    }
  }

  # read remaining line from original FileHandle
  if (! $bCplete) {
    my $FH = $this->_FH_use();
    my $lineplus = $FH->getline();
    if (defined ($lineplus)) {
      ($line||='') .= $lineplus;
    }
  }

  # return line
  return $line;
}


# read lines from "virtual" filehandle
#
# INTERFACE
# - return val: line string
#
sub getlines {
  my ($this) = shift;
  my (@line);

  # line in buffer?
  while ($this->_buff_is_head()) {
    push @line, $this->getline();
  }

  # read remaining input from real filehandle
  return @line, $this->_FH_use()->getlines();
}


# missing methods that may be just propagated
#
sub AUTOLOAD {

  # what was called?
  our $AUTOLOAD;
  my $AUTOLOAD_sub = (split('::',$AUTOLOAD))[-1];
  my ($this) = shift @_;
  my $FH=$this->{FH} or return undef;
  if (! $FH->can($AUTOLOAD_sub)) {
    die sprintf ("ERROR: method %s undefined, even in %s", $AUTOLOAD, ref($FH)),
      sprintf ("called from %s, line %s\n",
        caller(1) ? (caller(1))[3,2] : ("*unknown*, probably main",'?') );
  }

  # propagate call in context of wantarray/wantcalar
  my @ret;
  $DEBUG and printf STDERR "%s. evaluating:\n  %s\n", (caller(0))[3],
    "\$FH->$AUTOLOAD_sub(\@_)";
  if (wantarray) {
    @ret = eval "\$FH->$AUTOLOAD_sub(\@_)";
  } else {
    @ret = scalar eval "\$FH->$AUTOLOAD_sub(\@_)";
  }
  if (!@ret and $@ ne '') {
    die sprintf ("ERROR calling %s, error message:\n%s", $AUTOLOAD,
        $@||'__empty__'),
      sprintf ("called from %s, line %s\n",
        caller(1) ? (caller(1))[3,2] : ("*unknown*, probably main",'?') );
  }

  # transmit return value
  return wantarray ? @ret : $ret[0];
}


1;
# $Id: InBuffer.pm,v 1.10 2018/06/05 18:02:56 szafrans Exp $
