################################################################################
#
#  kPerl Sequence Laboratory
#  Object Library for HMM-Coded Sequence Motif
#
#  copyright (c)
#  Karol Szafranski on behalf of IMB Jena, Dept. Genome Analysis, 2002-2004,
#    szafrans@imb-jena.de
#  Karol Szafranski on behalf of FLI Jena, Genome Analysis Group, 2006,
#    szafrans@fli-leibniz.de
#
################################################################################
#
#  DESCRIPTION
#
# - purpose:
#   this class works as an interface for the hidden Markov model suite
#   "HMMer package".
#
# - dependencies:
#   - The search engine, a binary compiled from C, has to be installed in
#     $CorePath{call}{HmmSearch}
#   - The motif definition format is described in the documentation of the
#     HMMer suite.
#   - The inherited class "DataFileSync" manages the synchronisation definition
#     file and internal data structure representation of the motif definition.
#
# - core features:
#   - I/O between ASCII file format and data structure representation of
#     motif definition
#   - parsing of hmmsearch output
#
# - individual description of functions can be found at the beginning of the
#   code blocks
#
################################################################################
#
#  OBJECT OPERATORS
#
#   *** NONE ***
#
#
#  OBJECT METHODS  for external access
#
# - housekeeping
#   new           create object, initialise via ini(@args)
#   ini           initialise object with motif definition from file
#   AddSwitch     modify object options
#   Load          load (overwrite) motif definition from file
#                 Better use method Sync() than this function to trigger
#                 economic synchronisation to file.
#   Save          save (overwrite) motif definition to file
#                 Better use method Sync() than this function to trigger
#                 economic synchronisation to file.
#   Sync          test for need of synchronisation action
#                 Possibly, flush data between poles of synchronisation. See
#                 class "MainLib::DataFileSync" for details.
#   SyncConnect   --"--  auto-set on successful initialisation
#   SyncMaster    --"--  auto-set according to the data type of the
#                        initialisation argument
#   SyncValid     --"--
#   Clone         return copy of object
#
# - data management and access
#   Def           read/write motif definition
#                 *** planned (really needed?) ***
#   DefFile       read/write name of definition file
#                 alias to MainLib::DataFileSync::SyncFile()
#   DefType       return type of motif definition (last name of package)
#   DefSeqType    read sequence type of motif definition, kPerl's syntax
#   ID            read/write motif identifier
#   Thresh        read scoring threshold for motif match search
#                 This value is always "0" in HMMer motif definitions, but
#                 it's provided for compatibility with other motif definition
#                 objects.
#   Valid         check validity of motif definition
#                 *** fake implementation ***
#   Width         read/write matrix width
#   RevCompl      return object for reverse-complement motif
#
# - functionality
#   Search        return hits for search on sequence using motif
#
#
#  OBJECT DATA STRUCTURE
#  (hash)
#
#   def           reference on motif definition data
#                 On initialisation, a hash data structure has to be provided,
#                 which contains at least the fields:
#                   id, matrix
#                 substructure of motif definition data:
#     alph          sequence alphabet covered by the matrix definition
#     AlphType      sequence type: nucleotide/protein
#     descr         matrix description
#     id            motif identifier
#     matrix        reference on matrix array, access hierarchy:
#                     1st dimension (array): sequence position
#                     2nd dimension (hash): symbols & transition cases
#     ...           more motif definition fields, like GA, HMM-B->D,
#                   HMM-transit, NULE, NULT, XT etc.
#     orient        motif orientation: +1 / -1
#                   always implicitly +1 in file format
#   RevCompl      reference to object of reverse-complement motif
#   SeqHits       count of hits in scanned sequences
#   SeqScanned    count of scanned sequences
#   switch        hash reference for object switches, cf. method AddSwitch()
#     -debug        print debug protocol to STDERR
#     -SyncOnce     lazy synchronisation
#     -SyncOnDestr  on destruction, automatically save definition changes back
#                   to file.
#     -TmpPreserve  do not unlink temporary files, propagate option -preserve
#                   to global manager of temporary files
#   Sync*         see class "MainLib::DataFileSync" for details
#   width         matrix width
#
################################################################################
#
#  FUNCTIONS, DATA
#
#   @ISA
#   @EXPORT_OK
#   %_LibParam
#
# - housekeeping
#   $_LibParam{TmpManag}
#   &new  see MainLib::DefaultObjHash.pm
#   &ini
#   &AddSwitch
#   &_LocalSwitch  see MainLib::DefaultObjHash.pm
#   $_LibParam{*} - file format resources
#   &Load
#   &Save
#   &Clone
#   &DESTROY
#
# - data management and access
#   &_DefTouched
#   &DefFile
#   &DefSeqType
#   &DefType
#   &ID
#   &Thresh
#   &Width
#   &Valid
#   &RevCompl
#
# - search
#   motif search result data struture  see MotifLib.pm
#   &Search
#   &SearchStrand
#   &HmmsearchParse (non-object function)
#
#
#  STD OPTIONS
#
#  -debug      print debug protocol to STDERR
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - look also for notes in the header of each function block
#
# - hmmsearch needs enormous memory resources (several Gbyte) when searching
#   on hundreds of Mbp (e.g. human chromosomes). This object-based interface
#   might circumvent these restrictions by splitting the template sequence
#   into several pieces, and joining the search results together afterwards.
#
################################################################################

package SeqLab::MotifHMMer;

# includes
use strict; #use warnings;  # OK 20060803
use FileHandle;
use MainLib::DefaultObjHash;
use MainLib::Data;
use MainLib::Path qw (%CorePath &PathExpand);
use MainLib::File qw(&ReadFile &WriteFile);
use MainLib::FileTmp;
use MainLib::DataFileSync;
use MainLib::Misc qw(&MySub);
use Math::Calc qw(&Min &Max);
use SeqLab::SeqBench qw(&SeqType &SeqStrPure);
use SeqLab::SeqFormat qw(&SeqidWord &SeqentryToFasta);

# inheritance
our @ISA;
push @ISA, qw(MainLib::DefaultObjHash MainLib::DataFileSync);

# symbol export
# - only for non-method functions
# - no default export
use Exporter;
push @ISA, qw(Exporter);
our @EXPORT_OK = qw (&HmmsearchParse);

# package-wide constants and variables
my %_LibParam;


################################################################################
# housekeeping
################################################################################


# handle temporary files globally
# - encapsulation in a sub{} allows MainLib::FileTmp to identify the calling
#   package
my $pcFT = sub{ $_LibParam{TmpManag} ||= MainLib::FileTmp->new(); };
&$pcFT;


# parametric initialisation
#
# INTERFACE
# - argument 1: source argument:
#               - path of sequence input file
#               - reference to data structure
#                 see explanation on top passage about $this->{def}
#               - reference to an object of __PACKAGE__
#                 This will work like $this->Clone()
#
# - options:    handled by $this->AddSwitch()
#
# - return val: - object reference
#               - undef if an error occured
#
sub ini {
  my ($this, $arg, %opt) = @_;
  my $bMe = ((getlogin()||getpwuid($<)) eq 'szafrans') ? 1 : 0;

  # initialise object, keep it blessed
  %$this = ();
  # enter object switches
  $this->AddSwitch(%opt);
  my $debug = $this->{switch}{-debug};

  if (0) { }

  # initialise object with definition file
  elsif (! ref($arg)) {
    if (! -r $arg) {
      printf STDERR "%s. ERROR: unable to read motif source file %s\n", &MySub, $arg||"''";
      return undef;
    }
    $this->{SyncFile} = $arg;
    $this->{SyncMaster} = 'file';
    my $bConnect = $this->SyncConnect (my$bValid=$this->SyncValid());
    unless ($bValid and $bConnect) {
      printf STDERR "%s. ERROR: unable to sync to file %s, valid %s, connected %s\n", &MySub,
        $arg||"''", $bValid?'yes':'no', $bConnect?'yes':'no';
      return undef;
    }
  }

  # initialise object with definition data structure
  # *** not implemented ***
  elsif (ref($arg) eq 'HASH') {
    printf STDERR "%s. ERROR: initialisation with data structure not implemented\n", &MySub;
    return undef;
    $this->Def($arg);
    $this->{SyncMaster} = 'memory';
    $this->SyncMemTouch();
  }

  # initialise object from existing object
  # WARNING: there are two conflicting sync bindings now!
  elsif (ref($arg) eq __PACKAGE__) {
    $debug||$bMe and printf STDERR "%s. initialising with existing object (ref: %s, ID: %s)\n", &MySub,
      ref($arg)||"''", $arg->ID()||"''";
    return $arg->Clone();
  }

  # initialisation error
  else {
    printf STDERR "%s. ERROR: unknown initialisation argument\n", &MySub;
    return undef;
  }

  # return
  return $this;
}


# enter object switches
#
# INTERFACE
# - argument 1*: hash of switches
# - return val:  success status (boolean)
#
# DESCRIPTION
# - for description of object switches see top
#
sub AddSwitch {
  my ($this, %oopt) = @_;
  my ($bErr);

  # loop over switches
  while (my($key,$val) = each(%oopt)) {
    if (0) {}

    # delegate -debug to "MainLib::FileTmp"
    elsif ($key eq '-debug' and ($val||0)>1) {
      $bErr ||= ! $_LibParam{TmpManag}->AddSwitch($key=>$val-1,-preserve=>1);
    }
    # delegate -TmpPreserve to "MainLib::FileTmp"
    elsif ($key eq '-TmpPreserve') {
      $bErr ||= ! $_LibParam{TmpManag}->AddSwitch(-preserve=>$val);
    }

   # from here on, each case block exclusively defines the action of object
   # re-shaping associated with that switch
    if (0) {}

    # delegate these to "MainLib::DataFileSync"
    elsif ($key =~ m/^-Sync/) {
      $bErr ||= ! &MainLib::DataFileSync::AddSwitch($this,$key,$val);
    }

    # options that we just have to enter
    else {
      if (defined $val) { $this->{switch}{$key} = $val; }
      else       { delete $this->{switch}{$key}; }
    }
  }

  # return success status
  return !$bErr;
}


# translation of HMM file header fields to object fields
# - the "default" field specifies a value that is written by method
#   $this->Save() if no certain value was specified.
#
$_LibParam{HeadLabel} = [
  { file=>'ALPH',  code=>'AlphType', default=>undef },
  { file=>'CKSUM', code=>undef,      default=>undef },
  { file=>'COM',   code=>undef,      default=>undef },
  { file=>'CS',    code=>undef,      default=>'no' },
  { file=>'DATE',  code=>undef,      default=>undef },
  { file=>'DESC',  code=>'descr',    default=>undef },
  { file=>'EVD',   code=>'EVD',      default=>undef },
  { file=>'GA',    code=>'GA',       default=>undef },
  { file=>'LENG',  code=>'width',    default=>undef },
  { file=>'MAP',   code=>undef,      default=>'no' },
  { file=>'NAME',  code=>'id',       default=>undef },
  { file=>'NSEQ',  code=>undef,      default=>1 },
  { file=>'NULE',  code=>'NULE',     default=>undef },
  { file=>'NULT',  code=>'NULT',     default=>undef },
  { file=>'RF',    code=>undef,      default=>'no' },
  { file=>'XT',    code=>'XT',       default=>undef },
  ];

# HMM layers per definition line (physically: column)
#
$_LibParam{HmmLabel} = [ qw(weight insert transit) ];

# HMMer's syntax of sequence type
#
$_LibParam{SeqType2hmmer} = {
  nucleotide => 'Nucleic',
  protein    => 'Amino',
  };
$_LibParam{SeqTypeFromHmmer} = {
  Amino   => 'protein',
  Nucleic => 'nucleotide',
  };


# read motif definition file
#
# INTERFACE
# - options:
#   -debug      [STD]
#   -file       source file path, default $this->{SyncFile}
#
# - return val: success status (boolean)
#
# DESCRIPTION
# - Note that this method is free of arguments!
# - Loading the motif file is necessary for: validity check, preparation of
#   reverse-complement matrix (essential for reverse-strand search), etc.
#
# DEVELOPER'S NOTES
# - see HMMer documentation for specification of the matrix definition format.
# - Beware of using $this->AnyAttribute() methods for assignment of attributes!
#   They will probably result in an endless recursion of this function via
#   $this->Sync().
#
sub Load {
  my ($this, %opt) = @_;
  my %lopt = $this->_LocalSwitch(%opt);
  my $debug = $lopt{-debug} || 0;
  $debug and printf STDERR "%s. entered\n", &MySub;

  # open source file
  my $ArgIn = $lopt{-file} || $this->{SyncFile};
  my $hIn;
  if (ref($ArgIn) =~ m/\b(FileHandle|GLOB)/) {
    $hIn = $ArgIn;
  } else {
    $hIn = FileHandle->new($ArgIn) or return 0;
  }

  # read definition header
  # - translate file format field descriptors to internal keys
  $this->{def} = {};
  my ($line, @smb);
  while (defined ($line=<$hIn>)) {
    chomp ($line);
    if ($line=~m/^(\w+)\s+(.+)/ and $1 ne 'HMM') {
      my $pLbl = (grep { $_->{file} eq $1 } @{$_LibParam{HeadLabel}})[0] or next;
      $pLbl->{code} or next;
      if ($pLbl->{file} eq 'ALPH') {
        $this->{def}{$pLbl->{code}} = $_LibParam{SeqTypeFromHmmer}{$2};
      } else {
        $this->{def}{$pLbl->{code}} = $2;
      }
    }
    elsif ($line =~ m/^HMM\s+(.+)/) {
      $line = $1;
      @smb = grep { length($_) } split(/\s+/,$line);
      $this->{def}{alph} = [ @smb ];
      defined($line=<$hIn>) or return 0;
      $this->{def}{'HMM-transit'} = [ grep{ length($_) } split(/\s+/,$line) ];
      defined($line=<$hIn>) or return 0;
      $this->{def}{'HMM-B->D'} = [ grep{ length($_) } split(/\s+/,$line) ];
      last;
    }
  }
  if ($debug) {
    printf STDERR "%s. motif definition header parsed:\n", &MySub;
    print  STDERR map { "  $_->{file} -> $_->{code}: $this->{def}{$_->{code}}\n" }
      @{$_LibParam{HeadLabel}};
    printf STDERR "  seq symbols: %s\n", join(' ',@{$this->{def}{alph}});
  }
  $this->{def}{id} ||= ($ArgIn=~m/^(.+\/)?([^ \t.]+)/)[1];
  $this->{def}{orient} ||= +1;
  unless ($this->{def}{width}) {
    printf STDERR "%s %s. ERROR: NULL-length motif definition in file %s\n", &MySub,
      $this->{def}{id}||"''", $ArgIn||"''";
    return 0;
  }

  # data lines
  # - field "width" is ensured to be defined
  my $pMtfMat = $this->{def}{matrix} = [];
  for (my $CtI=0; $CtI<$this->{def}{width}; $CtI++) {
    $$pMtfMat[$CtI] = {};
    foreach my $HmmLabel (@{$_LibParam{HmmLabel}}) {

      # read next line
      unless (defined ($line=<$hIn>)) {
        printf STDERR "%s %s. ERROR: premature end of motif definition file\n", &MySub, $this->{def}{id}||"''";
        return 0;
      }
      chomp ($line);

      # split line into array of positional weights / insertions / transitions
      # definition: line = matrix column = single seq position
      if ($HmmLabel eq 'transit') {
        (undef, @{$$pMtfMat[$CtI]{$HmmLabel}}) =
          grep{ length($_) } split(/\s+/,$line);
      } else {
        (undef, @{$$pMtfMat[$CtI]{$HmmLabel}}{@smb}) =
          grep{ length($_) } split(/\s+/,$line);
      }
    }
  }
  if ($debug>1) {
    printf STDERR "%s. read motif definition, re-output:\n", &MySub;
    &DataPrint ($this->{def}, -handle=>\*STDERR, -space=>2);
  }

  # update sync, return success status
  if ($opt{-file} and $opt{-file} ne $this->SyncFile()) { $this->Sync() }
  return $this->Valid();
}


# save motif definition to file
#
# INTERFACE
# - local options:
#   -append     append rather than overwrite
#               append will always be true if -file specifies an output file
#               handle
#   -debug      [STD]
#   -file       target file path or file handle, default $this->{SyncFile}
#
# - return val: success status (boolean)
#
# DEVELOPER'S NOTES
# - Avoid an endless recursion of $this->Save() + $this->Sync() calls.
# - Avoid using $this->AnyAttribute() methods to determine attribute values!
#   They may result in an endless recursion of this function via $this->Sync().
#
sub Save {
  my ($this, %opt) = @_;
  my %lopt = $this->_LocalSwitch(%opt);
  my $debug = $lopt{-debug};
  my $ArgOut = $lopt{-file} || $this->{SyncFile};
  my $bAppend = $lopt{-append};
  $debug and printf STDERR "%s. entered\n", &MySub;

  # confirm synchronisation if this is not a sync call
  if ($ArgOut ne $this->{SyncFile} and (caller(1))[3]!~m/\bSync$/) { $this->Sync() }

  # anything to do?
  unless ($ArgOut) { return undef }
  unless ($this->Valid()) {
    printf STDERR "%s. ERROR: cannot save invalid motif definition %s\n", &MySub,
      $this->{def}{id}||"''";
    return 0;
  }

  # open file, write definition header
  my ($hOutMtf);
  if (ref($ArgOut) =~ m/^(FileHandle|GLOB)$/) {
    $hOutMtf = $ArgOut;
  } else {
    $hOutMtf = FileHandle->new($ArgOut,$bAppend?'a':'w') or return 0;
  }
  $debug and printf STDERR "%s. saving motif definition %s to %s, caller %s->%s\n", &MySub,
    $this->{def}{id}||"''", $ArgOut, (caller(2))[3], (caller(1))[3];
  printf $hOutMtf "HMMER2.0\n";
  # write header fields which have a value defined or which have a default value
  foreach my $pLbl (grep { (defined($_->{code}) and length($this->{def}{$_->{code}}||'')) or $_->{default} }
    @{$_LibParam{HeadLabel}}
  ) {
    if ($pLbl->{file} eq 'ALPH') {
      printf $hOutMtf "%-6s%s\n", $$pLbl{file}, $_LibParam{SeqType2hmmer}{$this->{def}{$$pLbl{code}}};
    } else {
      printf $hOutMtf "%-6s%s\n", $$pLbl{file},
        $$pLbl{code} ? $this->{def}{$$pLbl{code}} : $$pLbl{default};
    }
  }
  printf $hOutMtf "HMM   %s\n", join (' ', map { sprintf('%6s',$_) } @{$this->{def}{alph}});
  printf $hOutMtf "       %s\n", join (' ', map { sprintf('%6s',$_) } @{$this->{def}{'HMM-transit'}});
  printf $hOutMtf "       %s\n", join (' ', map { sprintf('%6s',$_) } @{$this->{def}{'HMM-B->D'}});

  # data lines
  for (my $CtI=0; $CtI<$this->{def}{width}; $CtI++) {
    printf $hOutMtf "    %2s %s\n", $CtI+1,
      join (' ', map { sprintf('%6s',$_) } @{$this->{def}{matrix}[$CtI]{weight}}{@{$this->{def}{alph}}});
    printf $hOutMtf "     - %s \n",
      join (' ', map { sprintf('%6s',$_) } @{$this->{def}{matrix}[$CtI]{insert}}{@{$this->{def}{alph}}});
    printf $hOutMtf "     - %s \n",
      join (' ', map { sprintf('%6s',$_) } @{$this->{def}{matrix}[$CtI]{transit}});
  }

  # finish matrix definition
  print  $hOutMtf "//\n";
  # return successfully
  return 1;
}


# return copy of object
#
# INTERFACE
# - return val: new object's reference
#
# DEBUG, CHANGES, ADDITIONS
# - this function possibly does not make sense as long as synchronisation is
#   still done to the same file as was done by the template object.
#
sub Clone {
  my ($this) = @_;

  # prepare copy
  my $pCopy = &DataClone({ %$this }) or return undef;
  bless $pCopy;

  return $pCopy;
}


# destroy object
#
sub DESTROY {
  my ($this) = @_;
  &MainLib::DataFileSync::DESTROY ($this);
}


################################################################################
# data management and access
################################################################################


# update object data after motif defintion was modified
#
sub _DefTouched {
  my ($this) = @_;
  delete $this->{RevCompl};
    # we have to be careful with that previous call. What if we would touch the
    # reverse-complement of a cross-linked pair of matrix definitions?
  $this->SyncMemTouch(-warn=>1);
  return;
}


# read/write name of motif defintion file
#
sub DefFile { return SyncFile(@_) }


# read motif defintion type
#
sub DefType { return (split('::',__PACKAGE__))[-1] }


# sequence type of motif definition (read only)
#
# INTERFACE
# - return val: sequence type
#
sub DefSeqType {
  my ($this) = @_;

  # synchronise object with file
  # synchronisation does not need to be active
  $this->SyncValid() and $this->Sync();

  # return value
  return $this->{def}{AlphType};
}


# defintion identifier
#
# INTERFACE
# - argument 1*: motif identifier (write mode)
# - return val:  motif identifier
#
sub ID {
  my ($this, $arg) = @_;

  # synchronise object with file
  # synchronisation does not need to be active
  $this->SyncValid() and $this->Sync();

  # set motif identifier
  if (defined $arg) {
    $this->{def}{id} = $arg;
    $this->_DefTouched();
  }

  # determine motif identifier from motif definition file
  else {
    # synchronise object with file if file is master instance
    # synchronisation does not need to be active
    $this->SyncValid() and $this->SyncMaster() eq 'file' and $this->Sync();
  }

  # return value
  return $this->{def}{id};
}


# score threshold, always "0" in HMMer motif definitions
#
sub Thresh { return 0 }


# check validity of motif definition
#
# INTERFACE
# - return val:  validity (boolean)
#
# DESCRIPTION
# - do not confuse this function with SyncValid()
#
# DEBUG, CHANGES, ADDITIONS
# - currently, this is a fake implementation
#
sub Valid {
  my ($this) = @_;
  return 1;
}


# width of motif definition
#
# INTERFACE
# - argument 1*: motif width (write mode)
# - return val:  motif width
#
sub Width {
  my ($this, $arg) = @_;

  # synchronise object with file
  # synchronisation does not need to be active
  $this->SyncValid() and $this->Sync();

  # set matrix width
  if (defined $arg) {
    $this->{def}{width} = $arg;
    $#{$this->{def}{matrix}} = $arg - 1;
    $this->_DefTouched();
  }

  # return value
  return $this->{def}{width};
}


# reverse-complement HMM
#
# INTERFACE
# - options:
#   ...         all of methods new/ini
#   -SyncFile   file for synchronisation, default: new *.tmp file
#   -SyncMaster master side of synchronisation, forced to "memory". That's the
#               only way it makes sense
#
# - return val: - object reference
#               - undef if an error occurred
#
sub RevCompl {
  my ($this, %opt) = @_;
  my %lopt = $this->_LocalSwitch(%opt);
  my $debug = $lopt{-debug};
  $debug and printf STDERR "%s. entered\n", &MySub;
  if ($this->DefSeqType() ne 'nucleotide') {
    printf STDERR "%s %s. WARNING: don't try to reverse-complement non-nucleotide HMM (type %s)\n", &MySub,
      $this->ID(), $this->DefSeqType()||"''";
    return undef;
  }
  # synchronisation for template object done
  $debug and printf STDERR "%s. passed synchronisation\n", &MySub;
  my ($pMtfRev, $CtI, %tmp);

  # copy backbone data structure
  $pMtfRev = &DataClone( {%$this} ) or return undef;
  foreach (qw(SyncConnect SyncFile SeqHits SeqScanned)) {
    delete $pMtfRev->{$_};
  }
  $this->{def}{orient} ||= +1;
  $pMtfRev->{def}{orient} = $this->{def}{orient} * -1;

  # reverse-complement motif definition
  # - leave unchanged: Plan7 framework transitions, B->D, transition labels, ...
  $pMtfRev->{def}{XT} = $this->{def}{XT};
  $pMtfRev->{def}{'HMM-transit'} = [ @{$this->{def}{'HMM-transit'}} ];
  $pMtfRev->{def}{'HMM-B->D'} = [ @{$this->{def}{'HMM-B->D'}} ];

  # complement motif definition data lines
  # - complement symbols (match states and insert states)
  my $pMtfRevMat = $pMtfRev->{def}{matrix};
  for ($CtI=0; $CtI<$this->{def}{width}; $CtI++) {
    @{$$pMtfRevMat[$CtI]{weight}}{qw(A C G T)} = @{$this->{def}{matrix}[$CtI]{weight}}{qw(T G C A)};
    @{$$pMtfRevMat[$CtI]{insert}}{qw(A C G T)} = @{$this->{def}{matrix}[$CtI]{insert}}{qw(T G C A)};
  }
  # - change order of insert states and transitions, associate with the next
  #   following data line to provoke correct order after reversal
  foreach (qw(insert transit)) {
    $tmp{$_} = $$pMtfRevMat[-1]{$_};
  }
  for ($CtI=$this->{def}{width}-1; $CtI>0; --$CtI) {
    foreach (qw(insert transit)) {
      $$pMtfRevMat[$CtI]{$_} = $$pMtfRevMat[$CtI-1]{$_};
    }
  }
  foreach (qw(insert transit)) {
    $$pMtfRevMat[0]{$_} = $tmp{$_};
  }
  # - interchange begin/end transitions:
  $tmp{transit} = &DataClone($$pMtfRevMat[1]{transit});
  $$pMtfRevMat[1]{transit}[7] = $$pMtfRevMat[-1]{transit}[7];
  $$pMtfRevMat[-1]{transit}[7] = $tmp{transit}[7];

  # reverse-complement motif definition
  # - reverse matrix positions
  @$pMtfRevMat = reverse (@$pMtfRevMat);

  # borne object, initialise synchronisation
  # this bless call will also work with derived classes
  bless ($pMtfRev,ref($this));
  unless ($lopt{-SyncFile}) {
    $lopt{-SyncFile} = $_LibParam{TmpManag}->Create(-touch=>1);
  }
  $debug and printf STDERR "%s. \%\%lopt:\n", &MySub;
  $debug and &DataPrint (\%lopt, -space=>2, -handle=>\*STDERR);
  $pMtfRev->AddSwitch (%lopt, -SyncConnect=>0, -SyncMaster=>'memory', -SyncMemTouch=>1);

  # return
  return $pMtfRev;
}



################################################################################
# search
################################################################################


# identify and locate instances of search pattern
#
# INTERFACE
# - argument 1: query sequence string
#
# - options:
#   ...         all of &HmmsearchParse
#   -best       return n best scoring hit sites only
#   -debug      [STD]
#   -HitSurrd   lengths of stored surrounding sequences, default: 0
#               This takes effect in &SearchStrand
#   -isPure     no need to purify sequence string
#   -strands    which strand to search
#               0   both strands (default)
#               -1  minus strand only
#               1   plus strand only
#
# - return val: - reference to result data structure
#               - undef if an error occurred
#
# DESCRIPTION
# - the query sequence string will be purified prior to search. Use switch
#   -isPure to suppress redundant work.
# - field definitions for the returned result table (array of hashes) can be
#   found in SeqLab::MotifLib.pm
#
sub Search {
  my ($this, $sSeq, %opt) = @_;
  my %lopt = $this->_LocalSwitch(%opt);
  my $debug = $lopt{-debug};
  $debug and printf STDERR "%s. entered\n", &MySub;

  # process / test search sequence
  unless ($lopt{isPure}) {
    $sSeq = &SeqStrPure ($sSeq, -upper=>1);
  }
  unless ($sSeq) { return undef }

  # need sync connection, ensure sync with motif defintion file
  unless ($this->SyncConnect() and $this->Sync()) {
    printf STDERR "%s %s. ERROR: missing sync connection\n", &MySub, $this->ID();
    return undef;
  }
  $debug and printf STDERR "%s. passed synchronisation\n", &MySub;

  # test for fitting sequence alphabets
  if (&SeqType($sSeq,-basic=>1) ne $this->DefSeqType()) {
    printf STDERR "%s %s. ERROR: seq type %s doesn't fit to motif seq type\n",
      &MySub, $this->ID(), &SeqType($sSeq,-type=>'basic');
    return undef;
  }

  my (@MtfHit);

  # locate positively oriented instances
  if (($opt{-strands}||0) >= 0) {
    push @MtfHit, map{@{$_||[]}} $this->SearchStrand($sSeq,%opt);
    $debug and printf STDERR "%s. motif %s, seq #%d, strand +1: %d hits\n", &MySub,
      $this->ID(), $this->{SeqScanned}, int(@MtfHit);
  }
  # locate negatively oriented instances
  if (($opt{-strands}||0) <= 0) {
    push @MtfHit, map{@{$_||[]}} $this->SearchStrand($sSeq,-strand=>-1,%opt);
    $debug and printf STDERR "%s. motif %s, seq #%d, both strands: %d hits\n", &MySub,
      $this->ID(), $this->{SeqScanned}, int(@MtfHit);
  }

  # select best scoring hits
  if ($lopt{-best}) {
    # *** implement me ***
  }

  # sort complete list of matches, return result
  $this->{SeqScanned} ++;
  $this->{SeqHits} += int @MtfHit;
  return [ sort { $a->{offset}<=>$b->{offset} } @MtfHit ];
}


# identify and locate HMM matches in one strand
#
# INTERFACE
# - argument 1: sequence
#
# - options:
#   ...         all of &HmmsearchParse
#   -debug      [STD]
#   -HitSurrd   lengths of stored surrounding sequences, default: 0
#   -strand     which sequence strand to search, default: +1
#
# - return val: - reference to instance data structure
#               - undef if an error occurred
#
# DESCRIPTION
# - field definitions for the returned result table (array of hashes) can be
#   found in SeqLab::MotifLib.pm
# - note that the sequence is supplied as single entry to the hmmsearch binary,
#   out of context of multi-sequence input.  Therefore, the expectancy score
#   calculation of hmmsearch won't make any sense.
#   A default cutoff score of 0.0 will be set while running hmmsearch in order
#   to circumvent hits reported due to default expect thresholding behaviour of
#   hmmsearch.
#
sub SearchStrand {
  my ($this, $sSeq, %opt) = @_;
  my %lopt = $this->_LocalSwitch(%opt);
  my $debug = $lopt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $strand = $lopt{-strand} || 1;
  my $HitSurrd = $lopt{-HitSurrd};
  $debug and printf STDERR "%s. entered, -strand %d\n", &MySub, $lopt{-strand}||1;

  # delegate reverse-complement search to recursive instance
  if ($strand == -1) {
    $this->{RevCompl} ||= do {
      my $pMtfRevcompl = $this->RevCompl();
      unless ($pMtfRevcompl->SyncConnect(1) and $pMtfRevcompl->Sync()) {
        printf STDERR "%s %s. ERROR: synchronisation failed on temporary reverse-complement motif object\n", &MySub, $this->ID();
        return undef;
      }
      $pMtfRevcompl;
    };
    delete $opt{-strand};
    return $this->{RevCompl}->SearchStrand($sSeq,%opt);
  }

  # debug synchronisation
  # - synchronisation has been performed in calling function (method Search()
  #   or method Search(-strand=>-1))
  if ($debug) {
    printf STDERR "%s. sync check:\n", &MySub;
    $this->SyncDebug(-indent=>2);
  }

  # prepare sequence file, reserve error log, prepare call string
  my $PathSeq = $_LibParam{TmpManag}->Create();
  &WriteFile ($PathSeq, &SeqentryToFasta({id=>($this->DefType().'-arg'),sequence=>$sSeq}));
  $debug and printf STDERR "%s. seq saved: length %d, file %s\n", &MySub,
    length($sSeq), $PathSeq;
  my $PathErr = $_LibParam{TmpManag}->Create(-touch=>1);
  if (! -x $CorePath{call}{HmmSearch}) {
    printf STDERR "%s. missing executable \"%s\" at %s\n", &MySub,
      'HmmSearch', $CorePath{call}{HmmSearch};
    exit 1;
  }
  my $CallThresh = defined($this->{def}{GA}) ? '--cut_ga' : '-T 0.0';
  my $call = join (' ', $CorePath{call}{HmmSearch}, $CallThresh,
    $this->{SyncFile}, $PathSeq);

  # call and parse input
  # - it would be nice to get the return code in case an error occurs
  #   But, how to get it?
  my $pRslt = &HmmsearchParse ("$call 2>$PathErr |", %lopt, -debug=>$dbg2);
  if (!$pRslt or -s $PathErr) {
    printf STDERR "%s. ERROR in search function, system call:\n  %s\n", &MySub, $call||"''";
    printf STDERR "  error log:\n%s", scalar &ReadFile($PathErr);
    return [];
  }

  # prepare list of hits
  my (@hit);
  foreach my $pHit (@{$$pRslt{match}}) {
    my %HitInst = (
      MotifID  => $this->ID(),
      length   => $$pHit{SeqLen},
      offset   => $$pHit{SeqPos}[0],
      orient   => $this->{def}{orient},
      score    => $$pHit{score},
      ScoreExp => $$pHit{expect},
      instance => substr ($sSeq, $$pHit{SeqPos}[0]-1, $$pHit{SeqLen}),
    );
    if ($HitSurrd) {
      # grab surrounding sequence from sequence string
      $HitInst{ante} = substr ($sSeq,
        &Max(0,$HitInst{offset}-1-$HitSurrd), &Min($HitSurrd,$HitInst{offset}-1));
      my $StrPosRgt = $HitInst{offset}+$HitInst{length}-1;
      $HitInst{post} = substr ($sSeq, $StrPosRgt,
        &Min($HitSurrd,length($sSeq)-$StrPosRgt) );
    }
    push @hit, { %HitInst };
  }

  # tidy up, return hits
  &DataDecross ($pRslt);
  if (!$debug and !$lopt{-TmpPreserve}) { unlink $PathSeq, $PathErr; }
  return \@hit;
}


# parsed hmmsearch report (data structure)
#
# DESCRIPTION
#
# %rslt
#   Hash containing whole result data structure
#
# $rslt{main}
#   Hash containing framing info about the query, as found in the
#   hmmsearch output.
#
#   DbPath        file path of sequence database
#   DbAlph        symbol alphabet of sequence database, *** not implemented ***
#   FlagNoHit     no hit at all in the hmmsearch output
#   FlagCritical  parsing complete, but critical error occured. No matches,
#                 but no according message
#   HmmPath       file path of HMM
#   HmmId         HMM identifier
#   HmmAlph       symbol alphabet of HMM, *** not implemented ***
#   HmmLength     length of HMM (= node number)
#   MatchNum      number of reported matches (according to match blocks)
#   path          original path of hmmsearch report
#   ProgDate      HMMer suite release date
#   ProgVersion   HMMer suite version number
#   ScoreHighest  highest reached match score
#   ScoreLowest   lowest reached match score
#   warn          array of message paragraphs for warnings
#
# $rslt{seq}
#   Hash representing database entries yielding matches. Sequences are
#   organised in a hash using SeqID as a key. Sequence data sub-structure
#   is again a hash:
#
#   id            sequence ID
# ( id_short      sequence ID, shortened as used throughout alignment blocks
#                 This is only valid throughout &HmmsearchParse
#   descr         description of the match entry, shortened as from hmmsearch
#                 report
#   match         reference to array of matches associated with this database
#                 entry.
#   MatchNum      number of matches for this database entry, redundant to
#                 int (@{$rslt{seq}{$SeqID}{match})
#
# $rslt{match}
#   Array of matches to database entries. Each match is represented by a
#   hash having following keys:
#
# ( align         reference to array of alignment lines (order: HMM, match,
#                 sequence). Sequences are concatenated. This data sub-structure
#                 needs option -align to be set in &HmmsearchParse.
#   expect        expect value for the HSP
#   HmmPos        HMM positions of match range, range object allowing hash
#                 access using syntax:
#                 ($off,$end) = ($pMatch{HmmPos}[-1], $pMatch{HmmPos}[1])
#                 positions are stored in all-day notation
#   HmmLen        length of query sequence in the alignment range
#   IdxNum        array position of match for same database sequence,
#                 computational notation
#   orient        relative orientation of sequence in match, always +1
#   score         score value
#   seq           reference to sequence data sub-structure in $rslt{seq}
#   SeqPos        sequence positions of match range, range object allowing
#                 hash access using syntax:
#                 ($off,$end) = ($pMatch{SeqPos}[-1], $pMatch{SeqPos}[1])
#                 positions are stored in all-day notation
#   SeqLen        length of the sequence entry stretch inside the match
#
# example: $$pReport{seq}{'P03394'}{match}[0]{orient}
#            means
#          relative strandedness in highest scoring match of database sequence
#          'P03394'.
#


# parse hmmsearch output into data structure
#
# INTERFACE
# - argument 1: source:
#               - path of source file (hmmsearch output)
#               - file handle (typeglob or FileHandle object reference)
#
# - options:
#   -align      parse alignments
#   -debug      [STD]
#
# - return val: - reference to data structure
#               - undef if an error occurred
#
# DESCRIPTION
# - for hmmsearch output data structure see descriptions above.
#
sub HmmsearchParse {
  my ($ArgIn, %opt) = @_;
  my $debug = $opt{-debug};
  my (%rslt);

  ##############################################################################
  # parsing engine
  {
    my ($hIn, %ParseStep, $line, $LineCt);
    my ($pSeqCurr, $pMatchCurr);
    my ($StepPrev, $StepCurr, $StepErr, $bStepSucc);

    # definitions of parsing subfunctions
    # - argument interface for parsing sub-functions
    #   arguments:  NONE
    #   options:    depends on, we're flexible in that respect
    #   return val: success status (boolean)
    #   global data:
    #     %rslt
    #     my declarations above
    #     $line     there's a special convention that it will contain the line
    #               of the next syntactical block
    %ParseStep = (

      # read valid line
      ReadLineValid => { id=>'ReadLineValid',
        func => sub {
          if (defined ($line=<$hIn>)) {
            chop ($line);
            $LineCt ++;
            return 1;
          } else {
            $StepErr = 'EOF';
            return 0;
          }
        } },

      # 1st header block
      FakeOff => { id=>'FakeOff',
        next => [ 'head1' ],
        },

      # 1st header block
      head1 => { id=>'head1',
        next => [ 'head2' ],
        func => sub {

          # top header: HMMer version
          &{$ParseStep{ReadLineValid}{func}}() or return 0;
          unless ($line =~ m/^hmmsearch/) { return 0 }
          &{$ParseStep{ReadLineValid}{func}}() or return 0;
          if ($line =~ m/^HMMER\s+(\S+)\s+\((.+)\)/) {
            $rslt{main}{ProgVersion} = $1;
            $rslt{main}{ProgDate} = $2;
          }
          while (1) {
            &{$ParseStep{ReadLineValid}{func}}() or return 0;
            if ($line =~ m/^(- ){10}/) { last }
          }

          # read until beginning of next block
          while (&{$ParseStep{ReadLineValid}{func}}()) {
            if (0) { }
            elsif ($line =~ m/^HMM file:\s+(\S+)\s+\[(\S+)\]/) {
              $rslt{main}{HmmId} = $2;
              $rslt{main}{HmmPath} = &PathExpand($1);
            }
            elsif ($line =~ m/^Sequence database:\s+(.*)$/) {
              $rslt{main}{DbPath} = $1;
            }
            elsif ($line =~ m/^(- ){10}/) { return 1 }
          }
          return 0;
        } },

      # 2nd header block
      head2 => { id=>'head2',
        next => [ 'StatSeq' ],
        func => sub {
          # don't need to test for proper report section

          # read until beginning of next block
          while (&{$ParseStep{ReadLineValid}{func}}()) {
            if ($line =~ m/^Scores for complete sequences/) { return 1 }
          }
          return 0;
        } },

      # sequence statistics table
      StatSeq => { id=>'StatSeq',
        next => [ 'StatMatch' ],
        func => sub {
          my (@ReColConstr, $ReCol, @LinePt);

          # test for proper report section
          unless ($line =~ m/^Scores for complete sequences /) { return 0 }

          # determine column spacing
          for (1..2) { &{$ParseStep{ReadLineValid}{func}}() or return 0 }
          @ReColConstr = map { {len=>length($_)} } ($line =~ m/^(-+ +)(-+ +)(-+ +)(-+ +)/);
          unless (int(@ReColConstr)) {
            $StepErr = 'regexp failed (table head)';
            return 0;
          }
          $ReCol = join ('', map { "(.{$_->{len}})" } @ReColConstr);

          # read table lines
          while (&{$ParseStep{ReadLineValid}{func}}()) {
            length($line) or last;
            if ($line=~m/^\s+\[no hits/ and !(keys %{$rslt{seq}})) { last }
            # grab sequence information from table line
            # split to: ID, descr, score, expect, N hits
            @LinePt = map { s/\s+$//; s/^\s+//; $_; } ($line=~m/^$ReCol(.+)/);
            unless (int(@LinePt)) {
              $StepErr = 'regexp failed (data line)';
              return 0;
            }
            $pSeqCurr = $rslt{seq}{$LinePt[0]} = {
              id       => $LinePt[0],
              descr    => $LinePt[1],
              MatchNum => $LinePt[4],
              };
            $$pSeqCurr{id_short} = &SeqidWord($LinePt[0]);
            $$pSeqCurr{id_short} =~ s/^(.{1,10}).+/$1/;
            $$pSeqCurr{id_short} .= ' ' x (10-length($$pSeqCurr{id_short}));
            if ($debug) {
              printf STDERR "%s. sequence info:\n", &MySub;
              printf STDERR "  ID: %s, short: %s\n", $$pSeqCurr{id}, $$pSeqCurr{id_short};
              printf STDERR "  description: %s\n", $$pSeqCurr{descr};
            }
          }

          # read until beginning of next block
          while (&{$ParseStep{ReadLineValid}{func}}()) {
            if ($line =~ m/^Parsed for domains:/) { return 1 }
          }
          return 0;
        } },

      # match statistics table
      StatMatch => { id=>'StatMatch',
        next => [ 'MatchHead', 'MatchNone' ],
        func => sub {
          my (@ReColConstr, $ReCol, @LinePt);

          # test for proper report section
          unless ($line =~ m/^Parsed for domains:/) { return 0 }

          # determine column spacing, construct regexp
          for (1..2) { &{$ParseStep{ReadLineValid}{func}}() or return 0 }
          @ReColConstr = map { {len=>length($_)} } ($line =~ m/^(-+ +)(-+ +)(-+ +)(-+)( +)(-+ +)(-+)( +)(-+ +)/);
          unless (int(@ReColConstr)) {
            $StepErr = 'regexp failed (table head)';
            return 0;
          }
          # columns #2 to #4 may contain strings broader than outlined in the header
          splice @ReColConstr, 0, 3, ( { full=>'(\s*\S+ )' } ) x 3;
          $ReColConstr[3]{xtra} = '\S*';
          $ReCol = join ('', map{ $_->{full}||"(.{$_->{len}}$_->{xtra})" } map{ $_->{xtra}||='';$_ } @ReColConstr);

          # read table lines
          while (&{$ParseStep{ReadLineValid}{func}}()) {
            length($line) or last;
            if ($line=~m/^\s+\[no hits/ and !(keys %{$rslt{seq}})) { last }
            # grab sequence information from table line
            # split to: ID, descr, score, expect, N hits
            @LinePt = map { s/\s+$//; s/^\s+//; $_; } ($line=~m/^$ReCol(.+)/);
            unless (int(@LinePt)) {
              $StepErr = 'regexp failed (data line)';
              return 0;
            }
            unless ($pSeqCurr = $rslt{seq}{$LinePt[0]}) {
              $StepErr = 'missing seq entry';
              return 0;
            }
            $LinePt[1] =~ s|/\s*\d+$||;
            $pMatchCurr = {
              IdxNum => $LinePt[1] - 1,
              seq    => $pSeqCurr,
              SeqPos => [$LinePt[2], $LinePt[3]],
              HmmPos => [$LinePt[5], $LinePt[6]],
              orient => '+1',
              score  => $LinePt[8],
              expect => $LinePt[9],
              };
            $$pMatchCurr{HmmLen} = &Max(0,$$pMatchCurr{HmmPos}[1]-$$pMatchCurr{HmmPos}[0]) + 1;
            $$pMatchCurr{SeqLen} = &Max(0,$$pMatchCurr{SeqPos}[1]-$$pMatchCurr{SeqPos}[0]) + 1;
            push @{$rslt{match}}, ($$pSeqCurr{match}[$$pMatchCurr{IdxNum}] = $pMatchCurr);
            if ($debug) {
              printf STDERR "%s. new match entry:\n", &MySub;
              printf STDERR "  seq ID: %s\n", $$pSeqCurr{id};
              printf STDERR "  match #: %d\n", $$pMatchCurr{IdxNum} + 1;
              printf STDERR "  seq range: %d..%d (length %d)\n",
                @{$$pMatchCurr{SeqPos}}, $$pMatchCurr{SeqLen};
            }
          }

          # nothing in here, that is not reported in the match headers
          # read until beginning of next block
          while (&{$ParseStep{ReadLineValid}{func}}()) {
            if ($line =~ m/^Alignments of top-scoring/) {
              &{$ParseStep{ReadLineValid}{func}}() and return 1;
            }
          }
          return 0;
        } },

      # match header
      MatchNone => { id=>'MatchNone',
        next => [ 'tail' ],
        func => sub {
          # match header line
          $line =~ m/^\s+\[no hits/ or return 0;

          # read until 1st line of next block
          for (1..2) { &{$ParseStep{ReadLineValid}{func}}() or return 0; }
          return 1;
        } },

      # match header
      MatchHead => { id=>'MatchHead',
        next => [ 'MatchAln' ],
        func => sub {

          # match header line
          $line =~ m/^(\S+): domain (\d+) of (\d+), from / or return 0;
          if ($opt{-align}) {
            # set $pMatchCurr for proper placement of following alignment parsing
            my $IdxNum = $2 - 1;
            $pSeqCurr = $rslt{seq}{$1} ||= {};
            $pMatchCurr = $$pSeqCurr{match}[$IdxNum];
          }

          # read 1st line of next block
          return &{$ParseStep{ReadLineValid}{func}}() ? 1 : 0;
        } },

      # match alignment block
      MatchAln => { id=>'MatchAln',
        next => [ 'MatchAln', 'MatchHead', 'tail' ],
        func => sub {
          my $indent;

          # test for match alignment block
          $line =~ m/^( +(\*?->)?)([a-zA-Z\.]+)/ or return 0;
          $indent = length($1);
          if ($opt{-align}) {
            $$pMatchCurr{align}[0] .= $3;
          }

          # 2nd and 3rd line
          &{$ParseStep{ReadLineValid}{func}}() or return 0;
          if ($opt{-align}) {
            $line =~ m/^.{$indent}([a-zA-Z\. \+]+)/ or return 0;
            $$pMatchCurr{align}[1] .= $1;
          }
          &{$ParseStep{ReadLineValid}{func}}() or return 0;
          if ($opt{-align}) {
            $line =~ m/^ {2}$$pSeqCurr{id_short} +\d+ +([A-Z\-]+)/ or return 0;
            $$pMatchCurr{align}[2] .= $1;
          }

          # read until 1st line of next block
          for (1..2) { &{$ParseStep{ReadLineValid}{func}}() or return 0; }
          return 1;
        } },

      # report tail
      tail => { id=>'tail',
        func => sub {
          # read until end of report
          while (&{$ParseStep{ReadLineValid}{func}}()) { }
            # nothing at the moment, we're interested in
          return 1;
        } },

      # what remains from &BlastParse
      WhatRemains => { id=>'WhatRemains',
        func => sub {
          my ($buffer, $CtHspSeg);

          if ($line =~ m/^(\S+): domain (\d+) of (\d+) /) { }

        ##### report header

          elsif ($line =~ m/^\w*hmmsearch\w*/) {
            $rslt{main}{HmmAlph} = 'nucleotide';
            $rslt{main}{DbAlph} = 'nucleotide';
          }

        ##### other stuff to parse beside matches

          # warnings
          elsif ($line =~ m/^WARNING:  /) {
            $buffer = $line;
            while (defined($line=<$hIn>) and $line!~m/^$/ and $line!~m/^Searching/) {
              $buffer .= $line;
            }
            push @{$rslt{main}{warn}}, $buffer;
          }

          # no hits
          elsif ($line =~ m/^\s+[*]{3} NONE [*]{3}/) {
            $rslt{main}{FlagNoHit} = 1;
          }

        } }
      );

  ##############################################################################
  # pre-work, parsing control

    # get/create input handle
    if (ref($ArgIn) =~ m/\b(FileHandle|GLOB)/) {
      $hIn = $ArgIn;
    } else {
      unless ($hIn = FileHandle->new($ArgIn)) {
        $debug and printf STDERR "%s. ERROR: unable to open input file %s\n", &MySub, $ArgIn||"''";
        return undef;
      }
      $rslt{main}{path} = $ArgIn;
    }
    if ($debug) {
      my $PathRslt = $_LibParam{TmpManag}->Create();
      &WriteFile ($PathRslt, scalar &ReadFile($hIn));
      printf STDERR "%s. hmmsearch report saved as: %s\n", &MySub, $PathRslt;
      $hIn = FileHandle->new($PathRslt);
      $rslt{main}{path} = $ArgIn = $PathRslt;
    }

    # block parse loop
    $StepPrev = 'FakeOff';
    ParseStepping: {
      my $StepCurrIt;
      foreach $StepCurrIt (@{$ParseStep{$StepPrev}{next}}) {
        $StepCurr = $StepCurrIt;
        $bStepSucc = &{$ParseStep{$StepCurr}{func}}();

        # successful -> continue parsing
        if ($bStepSucc) {
          $StepPrev = $StepCurr;
          redo ParseStepping;
        }
        if ($StepErr) { last }
      }
    } # end ParseStepping

    # unsuccessful -> error handling
    if (! ref($ArgIn)) { close($hIn) }
    unless ($bStepSucc) {
      printf STDERR "%s. ERROR: parsing failed, source %s line %d\n", &MySub, $ArgIn, $LineCt;
      printf STDERR "  syntax blocks: previous %s, current %s\n", $StepPrev, $StepCurr;
      printf STDERR "  error category: %s\n", $StepErr||"''";
      if ($debug) {
        print  STDERR "what's been parsed so far:\n";
        &DataPrint (&HmmsearchStructDecross(\%rslt,-copy=>1), -handle=>\*STDERR);
      }
      return undef;
    }
  } # end of parsing

  ##############################################################################
  # final work on data structure

  # combine data from sub-structures
  # - lowest/highest score reached
  # - overall number of matches
  $rslt{main}{ScoreLowest}  = &Min (map { $_->{score} } @{$rslt{match}});
  $rslt{main}{ScoreHighest} = &Max (map { $_->{score} } @{$rslt{match}});
  $rslt{main}{MatchNum} = int @{$rslt{match}};

  # refine data in sub-structures
  if (! $debug) {
    foreach (values %{$rslt{seq}}) { delete $_->{id_short} }
  }

  # report parsed structure
  if ($debug) {
    print  STDERR "what's been parsed:\n";
    &DataPrint (&HmmsearchStructDecross(\%rslt,-copy=>1), -handle=>\*STDERR);
  }

  # exit SUB
  return \%rslt;
}


# un-cross-link hmmsearch report data structure
#
# INTERFACE
# - argument 1: reference to report data structure
# - options:
#   -copy       create un-cross-linked copy of data structure
# - return val: reference to data structure
#
# DESCRIPTION
# - This is a specialised implementation of &DataDecross. It works to produce
#   output that display un-cross-linked at the preferred data sub-structure
#   level.
#
sub HmmsearchStructDecross {
  my ($pData, %opt) = @_;

  # clone data structure?
  if ($opt{-copy}) {
    $pData = &DataClone ($pData);
  }

  # un-cross-link:
  #  $$pData{match}[*]{seq}
  #  $$pData{seq}{*}{match}
  foreach (@{$$pData{match}}) {
    $_->{seq} = "$_->{seq}";
  }
  foreach (values %{$$pData{seq}}) {
   $_->{match} = "$_->{match}";
  }
  
  return $pData;
}


1;
# $Id: MotifHMMer.pm,v 1.21 2008/06/11 08:44:58 szafrans Exp $
