################################################################################
#
#  kPerl Alignment Laboratory
#  Library for GAP4 Database Handling
#
#  copyright (c)
#  Karol Szafranski on behalf of IMB Jena, Genome Analysis, 1999-2000,2002,2004,
#    szafrans@imb-jena.de
#
################################################################################
#
#  DESCRIPTION
#
# - this code makes extensive use of GAP4 Tcl scripts. For functioning
#   the the GAP4 environment needs to be set poperly.
#   - set $ENV{STADENROOT}
#   - source $ENV{STADENROOT}/staden.profile
#
# - individual description of functions at the beginning of the code blocks
#
################################################################################
#
#  FUNCTIONS, DATA
#
#   @EXPORT
#   %LibGlob
#   %_LibParam
#
# - basic information
#   $_LibParam{TmpManag}
#   $LibGlob{ErrorMsg}
#   &GapNameFields
#
# - process management
#   $_LibParam{RegexpGaperror}
#   &GapSafeCall
#   &GapSafeRead
#
# - GAP4 data I/O
#   GAP4 hard data structure
#   &GapDataStruct
#   &GapHash
#   GAP4 sequence data structure
#   &GapSeqStruct
#
#
#  STD OPTIONS
#
#   -debug      print debug protocol to STDERR
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - look also for notes in the header of each function block
#
################################################################################

package SeqAlign::Gap;

# includes
use strict; #use warnings;  # OK 20070112
use MainLib::Data;
use MainLib::Path;
use MainLib::File;
use MainLib::FileTmp;
use MainLib::Misc;
use Math::Calc;
use Math::Range;

# symbol export
use Exporter qw(import);
our @EXPORT = qw (
  &GapNameFields
  &GapSafeCall &GapSafeRead
  &GapDataStruct &GapHash &GapSeqStruct
  );

# package-wide constants and variables
our %LibGlob;
my %_LibParam;


################################################################################
# basic information
################################################################################


# handle temporary files globally
# - encapsulation in a sub{} allows MainLib::FileTmp to identify the calling
#   package
my $pcFT = sub{
  $_LibParam{TmpManag} = MainLib::FileTmp->new(); 
  $_LibParam{TmpManagErr} = MainLib::FileTmp->new(-suffix=>'err'); 
  };
&$pcFT;


# error handling
$LibGlob{ErrorMsg} = undef;


# split project path to name fields
#
# INTERFACE
# - argument 1: path of gap project
#
# - options:
#   -debug      [STD]
#
# - return val: - reference to name fields
#                 exists   database file exists as a binary
#                 full     complete database path
#                 dir      directory path including trailing '/'
#                 name     database name (without version character)
#                 valid    database name is valid for creation of database:
#                          directory exists, no conflicting file(s)
#                 version  version character
#               - undef if an error occurred
#
sub GapNameFields {
  my ($PathArg, %opt) = @_;
  my ($debug);
  my ($PathProj, %field);

  # function parameters
  $debug = $opt{-debug};

  # expand path
  $PathProj = &PathExpand ($PathArg);

  # split to fields
  unless ($PathProj =~ m|^(.*/)?([^ .]+)(.([a-zA-Z0-9]))?$|) {
    $debug and printf STDERR "%s. ERROR: unable to split project path to name fields\n", &MySub;
    return undef;
  } else {
    $field{dir}     = $1 || './';
    $field{name}    = $2;
    $field{version} = $4 || '0';
    $field{full}    = "$field{dir}$2.$field{version}";
  }

  # check existence / validity
  $field{exists} = (-B $field{full} and -B "$field{full}.aux");
  if ($field{exists}) {
    $field{busy} = -f "$field{full}.BUSY";
  }
  $field{valid} = $field{exists} || (-d $field{dir} and
    ! -f $field{full} and ! -f "$field{full}.aux" and
    ! -f "$field{full}.BUSY");

  # exit SUB
  return \%field;
}


################################################################################
# process management
################################################################################

# recognise errors
$_LibParam{RegexpGaperror} = 'SERIOUS|FATAL|Segmentation [fF]ault';


# invoke GAP4 script call safely
#
# INTERFACE
# - argument 1: GAP4 subprocess call, plain command line
#
# - options:
#   -debug      [STD]
#
# - return val: subprocess exit code
#
sub GapSafeCall {
  my ($call,%opt) = @_;
  my $debug = $opt{-debug};
  my $FileErr = $_LibParam{TmpManagErr}->Create();

  # invoke report, read pipe
  $debug and printf STDERR "%s. calling '$call'\n", &MySub;
  # redo block
  my ($ret,$PlainErr); {
    $ret = int (system ("$call 2>$FileErr") / 256);
    if (-e $FileErr) {
      if (($PlainErr=&ReadFile($FileErr)) =~ m/$_LibParam{RegexpGaperror}/o) {
        $debug and printf STDERR "%s. unexpected ERROR in Tcl script call, repeating call\n", &MySub;
        redo;
      }
      unlink $FileErr;
    }
  }

  # exit SUB
  $LibGlob{ErrorMsg} = $PlainErr || undef;
  return $ret;
}


# read from GAP4 script process safely
#
# INTERFACE
# - argument 1: GAP4 script call
#
# - options:
#   -debug      [STD]
#
# - return val: script STDOUT output
#
sub GapSafeRead {
  my ($call, %opt) = @_;
  my $debug = $opt{-debug};
  my $FileErr = $_LibParam{TmpManagErr}->Create();

  my ($PlainErr, $ret, $GapStdout);

  # invoke report, read pipe
  $debug and printf STDERR "%s. calling '$call'\n", &MySub;
  { # redo block
    $GapStdout = &ReadFile ("$call 2>$FileErr |");
    if (! defined $GapStdout) {
      $debug and printf STDERR "%s. read ERROR in Tcl script pipe, repeating call\n", &MySub;
      redo;
    }
    if (($PlainErr=&ReadFile($FileErr)) =~ m/$_LibParam{RegexpGaperror}/o) {
      $debug and printf STDERR "%s. unexpected ERROR in Tcl script call, repeating call\n", &MySub;
      redo;
    }
    $debug or unlink $FileErr;
  }

  # exit SUB
  $LibGlob{ErrorMsg} = $PlainErr || undef;
  return wantarray ? ($GapStdout,$ret) : $GapStdout;
}


################################################################################
# GAP4 data I/O
################################################################################


# GAP4 hard data structure
#
# DESCRIPTION
# - &GapDataStruct parses this from GAP4 project via showdata.tcl
# - data structure (original):
#   + marks processed fields
#
#   %database
#     ...           central database fields
#     Nannotations  number of allocated annotation data sets
#     free_annotations  first entry in the chain of free annotations
#     Ncontigs      number of allocated contig data sets
#     num_contigs   number of active contig data sets
#     Nreadings     number of allocated reading data sets
#     num_readings  number of active reading data sets
#
#     @contig       array of contig data sets, entry '0' is undefined. Each
#                   contig entry is represented by hash %contig
#     @reading      array of reading data sets, entry '0' is undefined. Each
#                   reading entry is represented by hash %reading
#     @annotation   array of annotation data sets, entry '0' is undefined. Each
#                   annotation entry is represented by hash %annotation
#
#   %contig         hash of following structure:
#     annotations   first entry in the chain of annotations, specified by number
#     data_type     data type: 'contig'
#     data_num      data number
#     left          number of leftmost reading
#     length        contig length
#     notes         text record number
#     right         number of rightmost reading
#
#   %reading        hash of following structure:
#     ...           central reading fields
#     annotations   first entry in the chain of annotations, specified by number
#     confidence    text record number, data from Experiment format field AV
#   + data_type     data type: 'reading'
#   + data_num      data number
#     end           end of used sequence 
#     left          number of leftward neighbour
#     length        reading length including hidden data, cmp.
#                   'sequence_length'
#     name          text record number
#     notes         text record number
#     orig_positions
#                   text record number, data from Experiment format field ON
#     right         number of rightward neighbour
#   + referee       array of referring sequence entries
#     sequence      text record number
#     sequence_length
#                   length of visible part of reading, cmp. 'length'
#     start         offset of used sequence (computational counting) or
#                   length of the hidden sequence range. The measured hidden
#                   sequence range is that 5' of the visible range in the
#                   contig topology. So, it'll be the hidden sequence END
#                   if the reading is minus-oriented in the contig.
#     template      template record number
#     trace_name    text record number
#     trace_type    text record number
#
#   %annotation     hash of following structure:
#     ...           central annotation fields
#     annotation    text record number
#   + data_type     data type: 'annotation'
#   + data_num      data number
#     length        annotation length
#     next          next entry in the chain of annotations, specified by number
#     position      offset of the annotation. The annotation range is always
#                   positively directed. The position counting is biological.
#                   The position includes the hidden range of the reading if the
#                   annotation is on a reading.
#   + prev          previous entry in the chain of annotations
#   + referee       array of referring sequence/annotation entries.
#                   Normally there's only one referee, but in erroneous
#                   database constellations there may be more.
#   + ref_base      non-annotation referer of annotation entry. That is
#                   the root of the reference chain leading to this annotation
#                   entry.
#     strand        which strand is the annotation referring
#                   0  positive
#                   1  negative
#     type          GAP4 annotation type, e.g. 'COMM' 
#
# - niced data structure (what's different to original data structure):
#   + marks processed fields
#
#   %contig         hash of following structure:
#     id            string of name of leftmost reading
#     lefttxt       string of name of leftmost reading
#    (notes)        deleted
#     righttxt      string of name of rightmost reading
#
#   %reading        hash of following structure:
#     id            string of name text field rather than text record number
#                   'name'
#    (name)         deleted, see 'id'
#    (notes)        deleted
#    (orig_positions)  deleted
#    (sequence)     deleted
#     template      name of template rather than template record number
#     trace_type    name of trace type rather than text record number
#     trace_name    name of trace name rather than text record number
#
#   %annotation     hash of following structure:
#    (annotation)   deleted, see 'text'
#     text          formerly 'annotation'
#

$_LibParam{TclHash}{KeyDatabase}   = [ qw(actual_db_size annotations clones contig_order contigs data_class free_annotations free_notes freerecs max_gel_len maximum_db_size notes notes_a Nannotations Nclones Ncontigs Nfreerecs Nnotes Nreadings Ntemplates num_contigs num_readings Nvectors readings templates vectors version) ];
$_LibParam{TclHash}{KeyContig}     = [ qw(annotations left length notes right) ];
$_LibParam{TclHash}{KeyReading}    = [ qw(annotations chemistry confidence end left length name notes orig_positions position primer right sense sequence sequence_length start strand template trace_name trace_type) ];
$_LibParam{TclHash}{KeyAnnotation} = [ qw(annotation length next position strand type) ];


# invoke data report and parse it into data structure
#
# INTERFACE
# - argument 1:    path of GAP4 database
#
# - options:
#   -debug         [STD]
#   -nice          return niced data structure as described above
#   -timer         print time performance protocol to STDERR
#
# - return val:    - reference to data structure
#                  - undef if an error occurred
#
# DESCRIPTION
# - description of GAP4 hard data structure see above
#
sub GapDataStruct {
  my ($PathProj,%opt) = @_;
  my $debug = $opt{-debug};
  my $dbg2 = $debug ? $debug-1 : undef;
  my $bTimer = $opt{-timer}; my $time;

  ##############################################################################
  # pre-work

  # invoke report, read data
  if($bTimer) { $time = &Sum((times)[0,2]) }
  my $FileBuffer = $_LibParam{TmpManag}->Create(-touch=>1);
  my $call = "$CorePath{call}{GapShowData} $PathProj >$FileBuffer";
  if (&GapSafeCall ($call,%opt)) {
    die sprintf "%s. ERROR: call failed\n  $call\n", &MySub;
  }
  my $pGapData = &DataRead ($FileBuffer, %opt);
  $debug or unlink $FileBuffer;
  if ($opt{-timer}) {
    printf STDERR "%s. CPU time for retrieving GAP4 data: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = (times)[0];
  }
  if ($debug) {
    printf STDERR "%s. read raw data from $PathProj:\n%s", &MySub;
    printf STDERR map { sprintf ("  %s: %d\n", $_, int(@{$$pGapData{$_}})-1) }
      qw(contig reading annotation);
    $FileBuffer = $_LibParam{TmpManag}->Create();
    my $hOut = FileHandle->new($FileBuffer,'w');
    foreach (@{$$pGapData{annotation}}) {
      printf $hOut "%s\t%s\t%s\n",
        $_->{data_num}, $_->{type}, ($_->{annotation} =~ m/$/m and $`);
    }
  }

  ##############################################################################
  # simple expansions
  my ($ItField,$pReferee);

  # enter data type, data number
  foreach $ItField (qw(contig reading annotation)) {
    for (my $i=1; $i<@{$$pGapData{$ItField}}; $i++) {
      $$pGapData{$ItField}[$i]{data_type} = $ItField;
      $$pGapData{$ItField}[$i]{data_num} = $i;
    }
  }

  # structure for annotation free list
  $$pGapData{FreeAnnotations} = {
    data_type   => 'FreeAnnotations',
    data_num    => 0,
    annotations => $$pGapData{database}{free_annotations},
    };

  # some renaming depending on preferred fashion of data structure
  # - original: delete expanded fields, ...
  # - niced ($opt{-nice}: delete original fields which have been expanded
  if ($opt{-nice}) {
    foreach (@{$$pGapData{contig}}) {
      $_->{id} = $_->{lefttxt};
      delete $_->{notes};
    }
    foreach (@{$$pGapData{reading}}) {
      $_->{id} = $_->{nametxt};
      delete $_->{name};
      delete $_->{nametxt};
      delete $_->{notes};
      delete $_->{orig_positions};
      delete $_->{sequence};
      $_->{template} = $_->{templatetxt};
      delete $_->{templatetxt};
      $_->{trace_type} = $_->{trace_type_txt};
      delete $_->{trace_type_txt};
      $_->{trace_name} = $_->{trace_name_txt};
      delete $_->{trace_name_txt};
    }
    foreach (@{$$pGapData{annotation}}) {
      $_->{text} = $_->{annotationtxt};
      delete $_->{annotationtxt};
      delete $_->{annotation};
    }
  } else {
    foreach (@{$$pGapData{contig}}) {
      delete $_->{lefttxt};
      delete $_->{righttxt};
    }
    foreach (@{$$pGapData{reading}}) {
      delete $_->{nametxt};
      delete $_->{templatetxt};
      delete $_->{trace_type_txt};
      delete $_->{trace_name_txt};
    }
    foreach (@{$$pGapData{annotation}}) {
      delete $_->{annotationtxt};
    }
  }

  ##############################################################################
  # check central database fields

  # loop over referees
  foreach $ItField (qw(contig reading)) {
    if ($#{$$pGapData{$ItField}} != $$pGapData{database}{"num_${ItField}s"}) {
      printf STDERR "%s. inconsistency in number of %ss: %d vs. %d\n", &MySub,
        $ItField, $$pGapData{database}{"num_${ItField}s"}, $#{$$pGapData{$ItField}};
    }
  }

  ##############################################################################
  # analyse chains of readings

  # loop over referee roots
  foreach my $pRefRoot ($$pGapData{contig}, $$pGapData{reading}) {
    # loop over referees, array position 0 is empty
    for (my $i=1; $i<@$pRefRoot; $i++) {
      $pReferee = $$pRefRoot[$i];

      # loop over left/rigth fields
      GapDataStructReadRef:
      foreach $ItField (qw(left right)) {

        # misdirected reference?
        unless ($$pReferee{$ItField}) { next }
        if ($#{$$pGapData{reading}} < $$pReferee{$ItField}) {
          printf STDERR "%s. %s %d, field %s refers to non-existing reading #%d\n", &MySub,
            $$pReferee{data_type}, $$pReferee{data_num}, $ItField, $$pReferee{$ItField};
          next;
        }

        # who's referred to?
        my $pRead = $$pGapData{reading}[$$pReferee{$ItField}];

        # loop for refering reading?
        # check array of entered referees for reference target
        if ($$pReferee{data_type} eq 'reading') {
          for (my $j=0; $j<@{$$pRead{referee}||[]}; $j++) {
            if ($pReferee eq $$pRead{referee}[$j]) {
              next GapDataStructReadRef;
            }
          }
        }

        # enter new referee
        $$pRead{referee} ||= [];
        push @{$$pRead{referee}}, $pReferee;
      }
    }
  }

  ##############################################################################
  # analyse chains of annotations
  my $CtAnnot=0;

  # loop over referee roots
  foreach my $pRefRoot ($$pGapData{contig}, $$pGapData{reading}, [ '', $$pGapData{FreeAnnotations} ]) {
    # loop over referees, array position 0 is empty
    for (my $i=1; $i<@$pRefRoot; $i++) {
      my $pRefBase = $pReferee = $$pRefRoot[$i];

      # initial annotation entry
      my $AnnotPrev = 0;
      my $AnnotNum = $$pReferee{annotations} or next;
      my $pAnnot = $$pGapData{annotation}[$AnnotNum];

      # go through annotation chain
      GapDataStructAnnotRef:
      while (1) {
        $CtAnnot ++;

        # verify annotation entry
        unless (%$pAnnot) {
          printf STDERR "%s. %s %d refers to non-existing annotation: %d\n", &MySub,
            $$pReferee{data_type}, $$pReferee{data_num}, $AnnotNum;
          last;
        }

        # number of reference root and previous annotation in chain
        $$pAnnot{ref_base} = $pRefBase;
        $$pAnnot{prev} = $AnnotPrev;

        # is this referee part of a loop?
        # check array of entered referees for reference target
        for (my $j=0; $j<@{$$pAnnot{referee}||[]}; $j++) {
          if ($pReferee eq $$pAnnot{referee}[$j]) {
            last GapDataStructAnnotRef;
          }
        }

        # enter referee
        $$pAnnot{referee} ||= [];
        push @{$$pAnnot{referee}}, $pReferee;

        # turn to next
        $AnnotPrev = $AnnotNum;
        $pReferee = $pAnnot;
        $AnnotNum = $$pAnnot{next} or last;
        $pAnnot = $$pGapData{annotation}[$AnnotNum];
      }
    }
  }

  ##############################################################################
  # report

  if ($opt{-timer}) {
    printf STDERR "%s. CPU time for data expansion and analysis: %.3f s\n", &MySub, (times)[0]-$time;
    $time = (times)[0];
  }

  if ($debug) {
    foreach my $pRead (@{$$pGapData{reading}}) {
      printf STDERR "reading %d\n", $$pRead{data_num};
      map { printf "  $_  $$pRead{$_}\n" } keys %$pRead;
      foreach $pReferee (@{$$pRead{referee}}) {
        if ($pReferee eq 'loop') {
          print STDERR "previous referee looped\n";
          next;
        }
        printf STDERR "referee:\n";
        map { printf "  $_  $$pReferee{$_}\n" } keys %$pReferee;
      }
    }
    foreach my $pAnnot (@{$$pGapData{annotation}}) {
      printf STDERR "annotation %d\n", $$pAnnot{data_num};
      printf STDERR "  prev:  %d\n", $$pAnnot{prev};
      printf STDERR "  next:  %d\n", $$pAnnot{next};
      printf STDERR "  1st referee:  %s %d\n", $$pAnnot{referee}[0]{data_type}, $$pAnnot{referee}[0]{data_num};
    }
  }

  # exit SUB
  return $pGapData;
}


# format data structure to GAP4 data structure (Tcl keyed list)
#
# INTERFACE
# - argument 1: data type
# - argument 2: data reference
#
# - options:
#   -debug      [STD]
#
# - return val: - keyed list (plain text)
#               - undef if an error occurred
#
# DESCRIPTION
# - description of GAP4 hard data structure see above
#
sub GapHash {
  my ($DataType, $pData, %opt) = @_;
  my ($debug, $key);
  my (%list, $ListPlain, @temp);

  # function parameters
  $debug = $opt{-debug};
  $key = $DataType;
  substr ($key, 0, 1) = 'Key' . uc (substr ($key, 0, 1));

  # format hash to string
  @list{@{$_LibParam{TclHash}{$key}}} = @{$pData}{@{$_LibParam{TclHash}{$key}}};
  while (@temp = each %list) {
    $ListPlain .= sprintf ('{%s} ', join (' ', @temp));
  }
  return $ListPlain;
}


# GAP4 sequence data structure
#
# DESCRIPTION
# - &GapSeqStruct parses this from GAP4 'show relationships' report
# - a leading 'o' mark data entries which are returned from &GapSeqStruct
#   only if an appropriate option is set.
#
# contig->@      reference to array of contigs, each of them represented
#                by a hash, see %contig
#
# %contig
#   number       number of the contig (GAP4's positional contig number)
#   id           identifier of the leftmost reading
#   idnum        number of the leftmost reading
#   length       contig length
#   read         reference to hash of readings, indexed by reading ID
#
# %read
#   id           identifier of the reading
#   idnum        number of the reading
#   length       reading length
#   pos          contig position of the reading ({'-1'}: left, {'1'}: right)
#   orient       orientation of the reading (1 for plus, -1 for minus)
#   contig       back-reference to contig entry
#


# represent GAP4 contig set by data structure
#
# INTERFACE
# - argument 1:    path of contig source, which may be:
#   GAP4 database  invoke 'show relationships' report and parse it
#   GAP4 showrel   parse it
#
# - options:
#   -debug         [STD]
#   -OutReport     save showrelationship report to specified file
#                  This works on true GAP4 projects only
#   -SlcCnum       select for minimum clone number in contig
#   -SlcContig     reference to array of contig IDs
#                  The data structure will be restricted to the specified
#                  contigs
#   -SlcLen        select according to contig length during construction
#                  of data structure. Argument may be either:
#                  - reference range object or similar data structure
#                  - string-fashioned range specification. Minimum syntax:
#                    minimum value for contig length
#   -SlcRnum       select for minimum reading number in contig
#
# - return val:    - reference to GAP4 sequence data structure
#                  - undef if an error occurred
#
# DESCRIPTION
# - for a description of GAP4 sequence data structure see above
# - this function has its own Tcl error handling independent of
#   &GapSafeRead / &GapSafeCall.
#
sub GapSeqStruct {
  my ($ArgSrc,%opt) = @_;
  my $debug = $opt{-debug};
  my $SlcCtg = join (' ', map { s/^#/\\#/; $_; } @{$opt{-SlcContig}});
  my $SlcLen = $opt{-SlcLen};
  if ($SlcLen) {
    $SlcLen = ref($SlcLen)? Math::Range->new($SlcLen) : Math::Range->new_parsed($SlcLen);
    unless ($SlcLen) {
      die sprintf "%s. ERROR in range argument '%s'\n", &MySub, $opt{-SlcLen};
    }
  }

  # construct call for reading assembly source
  my $FileErr = $_LibParam{TmpManagErr}->Create();
  my $call;
  # source is GAP4 database
  if (-B $ArgSrc) {
    my $calltee = $opt{-OutReport}? "| tee $opt{-OutReport}":'';
    $call = "$CorePath{call}{GapShowRelat} $ArgSrc $SlcCtg $calltee 2>$FileErr |";
  }
  # source is existing "show relationships" report
  else {
    $call = $ArgSrc;
  }

  ##############################################################################
  # parse

  my %report;
  { # redo block
    # invoke report via pipe (or read written report file)
    # we don't use &GapSafeRead here cause it would slurp large amounts
    #   of memory to store the full plain show_relationships report
    unless (open (INREPORT, $call)) {
      $debug and printf STDERR "%s. open failed: $call\n", &MySub;
      unlink $FileErr;
      return undef;
    }

    my ($line,$pCtg);
    while (defined ($line=<INREPORT>)) {

      # parse contig header section
      # enter next contig
      if ($line =~ m/^ CONTIG LINES $/) {
        $pCtg = {};

        # select for minimum read number in contig
        # this applies to the previously parsed contig
        if ($opt{-SlcRnum} and exists($report{contig}) and @{$report{contig}}
          and int (keys %{$report{contig}[-1]{read}}) < $opt{-SlcRnum}) {
          $debug and printf STDERR "%s. contig skipped due to reading number threshold\n", &MySub;
          pop @{$report{contig}};
        }

        # skip two header lines
        # get contig characterstics
        for (1..3) { $line=<INREPORT>; }
        if ($line =~ m/^ +(\d+) +(\d+) +(\d+) +(\d+)/) {
          $$pCtg{number} = $1;
          $$pCtg{length} = $2;
          $debug and printf STDERR "%s. got contig %d, length %d\n", &MySub, $$pCtg{number}, $$pCtg{length};

          # select for contig length
          if ($SlcLen and 
              ($$pCtg{length} < $$SlcLen[0] or ($$SlcLen[1] and $$pCtg{length} > $$SlcLen[1]))
          ) {
            $debug and printf STDERR "%s. contig skipped due to length threshold, num %d, length %d\n", &MySub,
              $$pCtg{number}, $$pCtg{length};
          } else {
            push @{$report{contig}}, $pCtg;
          }
        } else {
          $debug and printf STDERR "%s. ERROR: contig parameters expected but not found, line %d\n", &MySub, $.;
        }
      }

      # enter next read entry in current contig
      elsif ($line =~ m/^([a-zA-Z0-9._-]+) +(\d+) +(\d+) +(-?\d+) +(\d+) +(\d+)/) {
        my $pRead = {};
        $$pRead{id} = $1;
        $$pRead{idnum} = $2;
        $$pRead{contig} = $pCtg;

        # read orientation, length, contig position
        $$pRead{orient} = &Sign($4);
        $$pRead{length} = abs($4);
        $$pRead{pos}{'-1'} = $3;
        $$pRead{pos}{'1'} = $$pRead{pos}{'-1'} + $$pRead{length} - 1;

        # diverse
        $$pCtg{id} ||= $$pRead{id};
        $$pCtg{idnum} ||= $$pRead{idnum};

        # enter into contig
        $$pCtg{read}{$$pRead{id}} = $pRead;
      }

    } # end while for line loop
    close INREPORT;

    # inquiry error log
    my $PlainErr;
    if (-e $FileErr and length($PlainErr=&ReadFile($FileErr))) {
      if ($PlainErr =~ m/$_LibParam{RegexpGaperror}/o) {
        $debug and printf STDERR "%s. unexpected ERROR in Tcl script call, repeating call\n", &MySub;
        redo;
      } else {
        printf STDERR "%s. ERROR: non-Tcl error in call\n  $call\n", &MySub;
        print STDERR $PlainErr;
        die '';
      }
    }

  } # end redo block
  unlink $FileErr;
  if (-e $FileErr) { printf STDERR "%s. WARNING: temporary file %s still exists\n", &MySub, $FileErr; }

  # exit SUB
  return \%report;
}


1;
# $Id: Gap.pm,v 1.13 2007/06/15 14:54:29 szafrans Exp $
