################################################################################
#
#  kPerl Alignment Laboratory
#  Library for Scaffold Set Data Handling
#
#  copyright (c)
#    Fritz Lipmann Institute Jena, Genome Analysis Group, 2007
#    Institute of Molecular Biotechnology Jena, Dept. Genome Analysis, 2000-2001,2004
#  author
#    Karol Szafranski, karol.szafranski@leibniz-fli.de
#
################################################################################
#
#  DESCRIPTION
#
# - individual description of functions at the beginning of the code blocks
#
# - this code funtamentally relies on read identifier syntax rules which are
#   case-specific (project-specific) in some respect. For details see
#   DESCRIPTION notes in SeqAlign::Assembly.pm.
#
################################################################################
#
#  FUNCTIONS, DATA
#
#   @EXPORT
#
# - basics
#   scaffold set data structure
#
# - scaffold set data generation, I/O, and report
#   &ScaffdStruct
#   &ScaffdBridgeWinner  (not exported)
#   &ScaffdJoinFind2nd  (not exported)
#   &ScaffdStructToContigStruct
#
# - scaffolds in scaffold set data
#   &ScaffdPlain
#   &ScaffdSingle
#   &ScaffdJoin
#   &ScaffdBreak  (not exported)
#   &ScaffdContigs
#
# - bridges in scaffold set data
#   &ScaffdBridgePlain  (not exported)
#   &ScaffdBridgeCloneGap  (not exported)
#
# - contig ends in scaffold set data
#   &ScaffdCendStatist  (not exported)
#   &ScaffdCendPartners  (not exported)
#   &ScaffdCendLocat
#
#
#  STD OPTIONS
#
#   -debug      print debug protocol to STDERR
#   -timer      print time-performance protocol to STDERR
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - look also for notes in the header of each function block
#
################################################################################

package SeqAlign::ScaffdAssemb;

# includes
#use strict; use warnings;  # OK 20040810
use MainLib::StrRegexp qw(&TimeStr);
use MainLib::Data;
use MainLib::Misc qw(&MySub);
use Math::kCalc;
use Math::Range;
use Math::Statist;
use SeqLab::SeqBench qw(%SyntaxTranslNtdrc);
use SeqAlign::Assembly;
use ReadWatch::Read;
use ReadWatch::Library;

# symbol export
use Exporter qw(import);
our @EXPORT = qw (
  &ScaffdStruct &ScaffdStructToContigStruct
  &ScaffdPlain &ScaffdSingle &ScaffdJoin &ScaffdContigs
  &ScaffdCendLocat
  );


################################################################################
# basics
################################################################################


# scaffold set data structure
#
# DESCRIPTION
# - The contig data structure from &SeqAlign::Assembly::ContigStruct is
#   expanded. This contig data stucture is described in &SeqAlign::Assembly.pm.
#
# => new sub-structure %scaffd, globally accessible via
#    $ContigStruct{scaffd}{$id}:
#    id             identifier of the leftmost contig end contributing to the
#                   scaffold.
#  ( length         length sum of member contigs
#                   persistently consistent after data structure refinement
#    cend           reference to array (2 elements) of contig ends representing
#                   the scaffold ends. Order of contig end elements reflects
#                   scaffold orientation.
#  ( CtContig       contig counter divided into 3 categories (hash):
#                   'member', 'conflict', 'satellite'
#                   persistently consistent after data structure refinement
#  ( member         reference to array of member contigs
#                   persistently consistent after data structure refinement
#  o SrcSpecif      reference to source specificity information
#                   set option -SrcSpecif in &ScaffdStruct
#
# => new sub-structure %bridge, globally accessible via
#    $ContigStruct{bridge}{$cend_idnum1}{$cend_idnum2}:
#    clone          reference to array of bridging clones (entry in root clone index)
#    cend           reference to array (2 elements) of contig ends joint by
#                   the bridge, sorted by contig end id. The order corresponds
#                   to the 2D index order in @{$ContigStruct{bridge}}.
#    CEndDist       contig distance estimate, reference to hash:
#                   mean  mean
#                   s     std. deviation
#    CEndGap        contig gap length estimate, reference to hash:
#                   mean  mean
#                   s     std. deviation
#    isWinner       this bridge is a winner in pass I of scaffold mapping
#    score          score of bridge (currently, only in case of a join)
#
# => new sub-structure %report, globally accessible via
#    $ContigStruct{report}:
#    for ever-changing details see &ScaffdStruct
#
# => add into %contig:
#    scaffd         reference to scaffold holding this contig
#
# => add into %Cend:
#    bridge         reference to array of bridge data structures
#  ( BridgeWon      winner of bridges at this contig end
#                   this field is temporary, cf. &ScaffdBridgeWinner
#    BridgeSat      reference to array of bridge data structures, which may
#                   represent gap-filling contigs
#    BridgeConfl    reference to array of bridge data structures, which are
#                   in conflict against each other
#    ScaffdBridge   reference to scaffold-joining bridge
#    ScaffdJoin     reference to scaffold-joined contig end
#
# => add into %clone (global data structure):
#    field          hash of clone name fields
#                   This is worked out early in bridge analysis for all
#                   available clones.
#    CEndDist       contig end distance estimate based on this contig-bridging
#                   reads. Introduced by &ScaffdBridgeCloneGap.
#    CEndDistSum    sum of contig end distances of contig-bridging reads.
#                   This equals to the length of clone insert that stretches
#                   over known contig sequence. Introduced by
#                   &ScaffdBridgeCloneGap.
#    PairError      additional diagnosis (cf.
#                   &SeqAlign::Gap::ContigStruct(...,-pair=>1)), made during
#                   clone length / contig distance analysis for bridges.
#    relied         score (range [0.0,1]) that describes the reliability of
#                   the clone in the context of all observations, e.g. DNA prep
#                   plates that repeatedly showed pairing of their fwd/rev read.
#                   *** implement me ***
#


################################################################################
# scaffold set data generation, I/O, and report
################################################################################


# generate scaffold set data structure from contig data source
#
# INTERFACE
# - argument 1:    contig data source path or reference to list of paths
#                  cmp. argument concept of &ContigStructMeta
#
# - options:
#   -debug         [STD]
#   -export        export specified number of scaffolds into separate
#                  databases
#   -OutReport     save showrelationship report to specified file
#   -OutStump      invoke output of files and define output path stump
#   -RcCloneLen    use this rc file for clone length data
#                  For procedural details see &ReadWatch::Library::...
#   -RcTgtSpecif   use this rc file for source specificity data in
#                  conjunction with switch -SlcSpecif. For procedural details
#                  see &ReadWatch::Library::...
#   -RelyOn        define reads which are reliable (see &ContigStruct)
#   -SlcEnds       define contig end range for pair analysis (use is not
#                  recommended)
#   -SlcLen        select scaffolds according to their length during 
#                  construction of data structure. Argument may be either:
#                  - reference range object or similar data structure
#                  - string-fashioned range specification. Minimum syntax:
#                    minimum value for contig length
#   -SlcSingle     enter singlet contigs into list of scaffolds if longer
#                  or equal to specified length
#   -SlcSpecif     select scaffolds for source specificity (argument specifies
#                  source).
#                  This forces switch -SrcSpecif to 1. Cmp. switch -RcTgtSpecif
#   -SlcSpecifDlt  minimum delta of specificity measure in conjunction with
#                  switch -SlcSpecif
#   -SrcSpecif     enter source specificity information into scaffold
#                  data sub-structures
#                  this is automatically enforced by: -SlcSpecif -SlcSpecifDlt
#   -timer         [STD]
#
# - return val:    - reference to scaffold set data structure
#                  - undef if error occurs
#
# DEBUG, CHANGES, ADDITIONS
# - construct bridges while looping over clones (possibly, do some extra
#   constructive work on clone data structure) rather than looping over reads.
#
sub ScaffdStruct {
  my ($pArgSrc,%opt) = @_;
  my $debug = $opt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $opt{-timer}; my $time;
  my $SlcLen = $opt{-SlcLen};
  if ($SlcLen) {
    $SlcLen = ref($SlcLen)? Math::Range->new($SlcLen) : Math::Range->new_parsed($SlcLen);
    unless ($SlcLen) {
      die sprintf "%s. ERROR in range argument '%s'\n", &MySub, $opt{-SlcLen};
    }
  }
  my $OutStump = $opt{-OutStump};
  $ReadWatch::Library::LibGlob{CloneLen}{WarnFileChg} = 1;

  unless (ref($pArgSrc)) { $pArgSrc = [ $pArgSrc ]; }

  # get contig set data structure from contig data source
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pScaffdStruct = &ContigStructMeta ($pArgSrc,
    -cend      => $opt{-SlcEnds} || 0,  # default: calculate cend binding for all reads
    -pair      => 1,  # pair analysis always!
    -RelyOn    => $opt{-RelyOn},
    -OutReport => $opt{-OutReport},
     defined($opt{-SlcCnum}) ?
   (-SlcCnum   => $opt{-SlcCnum}) : (-SlcCnum => 2),
               # singlet reads typically do not make sense, but,
               # allow caller to interfere with this rule.
               # NOTE: $opt{-SlcSingle} means something completely different
    -debug     => $dbg2);
  # error message was done in &SeqAlign::Assembly::ContigStruct
  unless ($pScaffdStruct) { return undef }
  # summary debug of contig set data structure in &SeqAlign::Assembly::ContigStruct
  if ($bTimer) {
    printf "%s. CPU time for retrieving contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = &Sum((times)[0,2]);
  }

  ##############################################################################
  # sample bridging clones at contig ends => bridge data structure
  my ($pCtg,$pCend,$pClone,$pBridge);

  # loop over contig ends
  foreach $pCtg (values %{$$pScaffdStruct{contig}}) {
    foreach $pCend (values %{$$pCtg{end}}) {
      $debug and printf STDERR "%s. analysing contig end: %s\n", &MySub,
        &CendId($pCend,-format=>'loose',-source=>(@{$$pScaffdStruct{source}}>1),-debug=>$dbg2);

      # select interesting (possibly bridging) clone end reads from contig ends
      # cmp. &ProgSeqExtra
      foreach my $pRead (@{$$pCend{read}}) {
        my $pField = &ReadidToFields ($$pRead{id});
        $$pField{prd} or next;
        $pClone = $$pScaffdStruct{clone}{$$pField{cln}};
        $$pClone{field} = $pField;
        $debug and printf STDERR "  analysing read %s, SeqStatus: %.1f, PairStatus %.1f\n",
          $$pRead{id}, $$pClone{SeqStatus}, $$pClone{PairStatus}||=0;

        # we look for:
        # - clone sequenced from both ends
        # - reads on clone are dispersed over several contigs
        if ($$pClone{SeqStatus} and $$pClone{PairStatus}>0 and $$pClone{PairStatus}<1) {

          # validate contig end data linkage for current read
          unless ($$pRead{cend} and %{$$pRead{cend}}) {
            printf STDERR "%s. ERROR: missing Cend data structure: read %s, clone %s\n", &MySub,
              $$pRead{id}||"''", $$pClone{id}||"''";
            next;
          }

          # pick first counter-read
          # pick bridged contig ends
          my $pReadCntr = (grep { $pRead ne $_ } map { @{$_||[]} }
            &DataTreeSlc ($pClone, [[$ReadidField{prdAnti}{$$pField{prd}}],[0,'all']]) )[0];
          unless ($pReadCntr) {
            printf STDERR "%s. ERROR: didn't find counter-reading for read %s (clone %s)\n", &MySub,
              $$pRead{id}||"''", $$pClone{id}||"''";
            print  STDERR "  should be present according to PairStatus\n";
            next;
          }
          unless ($$pReadCntr{cend} and %{$$pReadCntr{cend}}) {
            printf STDERR "%s. ERROR: didn't find counter-contig end "
              . "- but found counter-reading in foreign contig\n", &MySub;
            printf STDERR "    (read %s, counter-read %s, counter-read's end dist. %d)\n",
              $$pRead{id}||"''", $$pReadCntr{id}||"''", $$pReadCntr{CEndDist};
            next;
          }

          # confirm distance validity
          my $cutoff = &CloneLenCutoff ($$pField{lib}||'fake',
            -rc=>$opt{-RcCloneLen}, -default=>1, -debug=>$dbg2) || 0;
          if ($$pRead{CEndDist}+$$pReadCntr{CEndDist} > $cutoff) {

            # mark clone for being erroneous
            $$pClone{PairStatus} = 0;
            if ($cutoff) {
              $$pClone{PairError} = sprintf ("end distance sum = %d (cutoff %s), %s / %s",
                $$pRead{CEndDist}+$$pReadCntr{CEndDist},
                &CloneLenCutoff($$pField{lib}||'fake',-rc=>$opt{-RcCloneLen},-default=>1),
                $$pRead{id}, $$pReadCntr{id},
                );
            } else {
              &CloneLenCutoff ($$pField{lib}||'fake', -rc=>$opt{-RcCloneLen}, -default=>1, -debug=>1);
              printf STDERR "%s. ERROR: non-retrievable clone length cutoff "
                ."for read %s (clone %s, lib %s)\n", &MySub,
                $$pRead{id}||"''", $$pClone{id}||"''", $$pField{lib}||"''";
              $$pClone{PairError} = 'clone length cutoff not retrievable';
            }
            next;
          }

          # definitely found a valid contig-bridging clone
          # bridging cend1 with cend2
          my ($pCend1,$pCend2) = sort { &CendIdSort($$a{id},$$b{id}); }
            $$pRead{cend}, $$pReadCntr{cend};

          # either enter clone into existing bridge data structure
          if ($pBridge = $$pScaffdStruct{bridge}{$$pCend1{id}}{$$pCend2{id}}) {
            unless (grep { $_ eq $pClone } @{$$pBridge{clone}}) {
              push @{$$pBridge{clone}}, $pClone;
            }
          }

          # or generate new bridge data structure
          else {
            $pBridge = $$pScaffdStruct{bridge}{$$pCend1{id}}{$$pCend2{id}} = {
              clone => [ $pClone ],
              cend  => [ $pCend1, $pCend2 ],
              };
            push @{$$pCend1{bridge}}, $pBridge;
            push @{$$pCend2{bridge}}, $pBridge;
          }
        }
      }
    }
  }

  # finish this step
  if ($bTimer) {
    printf STDERR "%s. CPU time for 1st-pass bridge analysis: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = &Sum((times)[0,2]);
  }
  unless (%{$$pScaffdStruct{bridge}||{}}) {
    print STDERR "no bridges found, probably ERROR\n";
    return undef;
  }

  ##############################################################################
  # gap length expectancy

  # loop over bridges
  foreach $pBridge (map { @{$_||[]} }
    &DataTreeSlc ($$pScaffdStruct{bridge}, [[0,'all'],[0,'all']])
  ) {
    unless (@{$$pBridge{clone}}) { next }
    my (@BridgeCEndDist, $pCEndDistEstim);

    # mean / SD of clone gaps
    foreach $pClone (@{$$pBridge{clone}}) {
      $pCEndDistEstim = &ScaffdBridgeCloneGap ($pClone,
        -RcCloneLen=>$opt{-RcCloneLen}, -debug=>$debug);
      unless ($$pCEndDistEstim{s}) {
        printf STDERR "%s. ERROR: no clone length estimate for clone %s\n", &MySub, $$pClone{id}||"''";
        next;
      }
      push @BridgeCEndDist, $pCEndDistEstim;
    }
    unless (@BridgeCEndDist) { next }

    # contig end distance estimate 0
    # - set up rational distance range parameters.
    # - the following passage (inside block) may also serve as re-definiton
    #   recipee, based on parameters that failed to yield rational estimate
    #   results.
    my (%range);
    $range{CtRedo} = 4;
    BridgeCEndDistRedo: { # redo block
      $range{median} = $$pBridge{CEndDist}{mean}
        = &Mean (map {$_->{mean}} @BridgeCEndDist);
      $range{HalfWidth} = &Max (3000, map {2*$_->{s}} @BridgeCEndDist);
      $range{step} = &Min (map {$_->{s}/8} @BridgeCEndDist);

      # contig end distance estimate I
      # - one single bridging clone: easy, just take the clone-based distance
      #   estimate
      if (@BridgeCEndDist == 1) {
        $$pBridge{CEndDist} = $BridgeCEndDist[0];
      }

      # contig end distance estimate II
      # - several bridging clones: determine the combination of all clone-based
      #   distance estimates over rational ranges of D.
      # - sample data points of combined distribution functions
      my (@CEndDistPlot, $GaussPos) = ();
      for ($GaussPos = $$pBridge{CEndDist}{mean} - $range{HalfWidth};
           $GaussPos < ($$pBridge{CEndDist}{mean} + $range{HalfWidth} + 1);
           $GaussPos += $range{step}
      ) {
        my @PosGaussVal;  # need to get re-initialized here
        foreach my $pGauss (@BridgeCEndDist) {
          push @PosGaussVal, &GaussVal(@{$pGauss}{'mean','s'},$GaussPos);
        }
        push @CEndDistPlot, [ $GaussPos, &Multiply(@PosGaussVal)||1e-100 ];
      }
      if (@BridgeCEndDist > 1) {

        # Gauss sample properties on combined distribution function
        $$pBridge{CEndDist} = &SampleMetricsWgt (\@CEndDistPlot);
        # rich debug:
        if ($debug or ! $$pBridge{CEndDist}) {
          printf STDERR "%s. %s sample metrics for bridge:\n", &MySub,
            $$pBridge{CEndDist}? 'inspection of':'ERROR: unable to calculate';
          print  STDERR &ScaffdBridgePlain($pBridge);
          printf STDERR "  contig distance expectancy plot: sum, clone1%s\n",
            (@BridgeCEndDist>1)? sprintf('..clone%d',int(@BridgeCEndDist)):'';
          foreach my $pGaussCoo (@CEndDistPlot) {
            printf STDERR '   %s => %s', @$pGaussCoo;
            foreach my $pGauss (@BridgeCEndDist) {
              printf STDERR ' : %s', &GaussVal(@{$pGauss}{'mean','s'},$$pGaussCoo[0]);
            }
            print  STDERR "\n";
          }
          if (! $debug) { $range{median} = $$pBridge{CEndDist}{mean}; }
        }

        # did distribution maximum get out of sight?
        # - compare: new_mean +/- s with scan_mean +/- width
        if (($$pBridge{CEndDist}{mean} - 2.0*$$pBridge{CEndDist}{s})
              < ($range{median} - $range{HalfWidth}) or
            ($$pBridge{CEndDist}{mean} + 2.0*$$pBridge{CEndDist}{s})
              > ($range{median} + $range{HalfWidth})
        ) {
           printf STDERR "  distance estimate currently: %d +/- %d\n",
             @{$$pBridge{CEndDist}}{'mean','s'};
           $range{median} = $$pBridge{CEndDist}{mean};
           $range{HalfWidth} = &Max (4*$$pBridge{CEndDist}{s}, abs($range{median}), $range{HalfWidth});
           $range{CtRedo} or printf STDERR "%s. redo in contig end distance estimation for bridge at: %s\n", &MySub,
             &CendId($$pBridge{cend}[0],-format=>'loose',-source=>(@{$$pScaffdStruct{source}}>1));
           if ($range{CtRedo} --) { redo BridgeCEndDistRedo }
        }
      }

      # finally calculate gap size estimate from probability distribution of
      # contig end distances
      @CEndDistPlot = grep { $_->[0]>0 } @CEndDistPlot;
      unless (@CEndDistPlot) { @CEndDistPlot = [0, 1]; }
      $$pBridge{CEndGap} = &SampleMetricsWgt (\@CEndDistPlot);
    }
  }
  if ($bTimer) {
    printf STDERR "%s. CPU time for 2nd-pass bridge analysis: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = &Sum((times)[0,2]);
  }

  ##############################################################################
  # main report information
  my (%file, ${LineFeed}, ${space});

  $$pScaffdStruct{report}{time} = &TimeStr();
  $$pScaffdStruct{report}{CtContig} = int keys %{$$pScaffdStruct{contig}};
  $$pScaffdStruct{report}{CtRead} = int keys %{$$pScaffdStruct{read}};
  $$pScaffdStruct{report}{CtClone} = int keys %{$$pScaffdStruct{clone}};
  $$pScaffdStruct{report}{source} = sprintf ('source file%s: %s', (@$pArgSrc>1)?'s':'', join(', ',@$pArgSrc));
  $$pScaffdStruct{report}{header} = <<PSICONTIGMAP_HEAD_END;
$main::ProgFile -$main::ProgMode
date/time: $$pScaffdStruct{report}{time}
PSICONTIGMAP_HEAD_END
  $$pScaffdStruct{report}{database} = <<PSICONTIGMAP_PROJ_END;
$$pScaffdStruct{report}{source}
NOTE: single-read contigs filtered
contigs: $$pScaffdStruct{report}{CtContig}
reads: $$pScaffdStruct{report}{CtRead}
clones: $$pScaffdStruct{report}{CtClone}
PSICONTIGMAP_PROJ_END
  $$pScaffdStruct{report}{theader} =
    join ('',  map { "# $_\n" } grep { length($_) }
    split (/\n/, $$pScaffdStruct{report}{header}));
  $$pScaffdStruct{report}{tdatabase} = "# CONTIG DATA SOURCE\n" .
    join ('',  map { "# $_\n" } grep { length($_) }
    split (/\n/, $$pScaffdStruct{report}{database}));

  ##############################################################################
  # plain report on bridges

  ${LineFeed} = "\n";

  if ($OutStump) {
    $file{RptPlain} = "${OutStump}_bridge.txt";
    unless (open (OUTRPTPLAIN, ">$file{RptPlain}")) {
      printf STDERR "ERROR: unable to open file %s for output\n", $file{RptPlain}||"''";
      return undef;
    }
    print  OUTRPTPLAIN "$$pScaffdStruct{report}{header}\n";

    # list contig bridges
    print  OUTRPTPLAIN "${LineFeed}${LineFeed}";
    print  OUTRPTPLAIN "CONTIG BRIDGES${LineFeed}";
    if (%{$$pScaffdStruct{bridge}}) {
      print  OUTRPTPLAIN "${LineFeed}";
      foreach $pBridge (sort {
          &CendIdSort($$a{cend}[0]{id},$$b{cend}[0]{id}) or
          &CendIdSort($$a{cend}[1]{id},$$b{cend}[1]{id})
        } @{ &DataTreeSlc($$pScaffdStruct{bridge},[[0,'all'],[0,'all']]) }
      ) {
        print  OUTRPTPLAIN &ScaffdBridgePlain($pBridge);
      }
    } else {
      print  OUTRPTPLAIN "*** NONE ***${LineFeed}";
    }

    # list clone pairing errors
    print  OUTRPTPLAIN "${LineFeed}${LineFeed}";
    print  OUTRPTPLAIN "CLONE PAIRING ERRORS${LineFeed}";
    my @CloneErr = grep { $_->{PairError} } values %{$$pScaffdStruct{clone}};
    if (@CloneErr) {
      print  OUTRPTPLAIN "${LineFeed}";
      print  OUTRPTPLAIN "clone\tdiagnosis\tcontigs${LineFeed}";
      foreach $pClone (sort {
        $$a{field}{lib} cmp $$b{field}{lib} or
        $$a{field}{plt} <=> $$b{field}{plt} or
        $$a{field}{coo} cmp $$b{field}{coo}
      } @CloneErr) {
        printf OUTRPTPLAIN "%s\t%s\t%s${LineFeed}",
          $$pClone{id}, $$pClone{PairError},
          join (',', sort{ $a <=> $b }map{ $_->{contig}{id} }
            @{ &DataTreeSlc($pClone,[['^-?[01]$','regexp'],[0,'all']],-unique=>1)||[] });
      }
    } else {
      print  OUTRPTPLAIN "*** NONE ***${LineFeed}";
    }

    # end document
    close OUTRPTPLAIN;
    if ($bTimer) {
      printf STDERR "%s. CPU time for bridge analysis report: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
      $time = &Sum((times)[0,2]);
    }
  }

  ##############################################################################
  # scaffold map first-pass
  my ($pScaffd);

  # loop over all contig ends
  # determine winning bridge for contig
  foreach $pCend (map { @{$_||[]} }
    &DataTreeSlc($$pScaffdStruct{contig},[[0,'all'],['end'],[0,'all']])
  ) {
    &ScaffdBridgeWinner ($pCend, -debug=>$dbg2);
  }
  # do scaffold-joins for both-side-winning bridges
  foreach $pBridge (grep {
      ($_->{cend}[0]{BridgeWon} eq $_ and $_->{cend}[1]{BridgeWon} eq $_)
    } map {@{$_||[]}} &DataTreeSlc($$pScaffdStruct{contig},
      [[0,'all'],['end'],[0,'all'],['BridgeWon']],-unique=>1)
  ) {
    if (&ScaffdJoin (@{$$pBridge{cend}}, -debug=>$dbg2)) {
      map { $_->{ScaffdBridge}=$pBridge } @{$$pBridge{cend}};
    }
    # honor winning bridge
    $$pBridge{isWinner} = 1;
  }

  # intermediate process sum-up
  $$pScaffdStruct{report}{map1} = {
    BridgeStatist =>
               "clones:\n" .
      sprintf ("  total: %d\n", int values %{$$pScaffdStruct{clone}}) .
      sprintf ("  double-side sequenced: %d\n", int(grep{ $_->{SeqStatus} }values %{$$pScaffdStruct{clone}})) .
      sprintf ("  suspicious concerning contig end distance sum - filtered: %d\n", int (grep { exists($_->{PairError}) and $_->{PairError}=~m/^end distance/ } values %{$$pScaffdStruct{clone}})) .
      sprintf ("  bridging: %d\n", int(map{ @{$_||[]} }&DataTreeSlc($$pScaffdStruct{bridge},[[0,'all'],[0,'all'],['clone'],[0,'all']],-unique=>1)) ) .
               "bridges:\n" .
      sprintf ("  total: %d\n", int(map{ @{$_||[]} }&DataTreeSlc($$pScaffdStruct{bridge},[[0,'all'],[0,'all']])) ) .
      sprintf ("  winners: %d\n", int(grep{ exists($_->{isWinner}) and $_->{isWinner} }map{ @{$_||[]} }&DataTreeSlc($$pScaffdStruct{bridge},[[0,'all'],[0,'all']])) ) .
      sprintf ("  joining: %d\n", int(grep{ exists($_->{cend}) and @{$_->{cend}} and exists($_->{cend}[0]{ScaffdBridge}) and $_ eq $_->{cend}[0]{ScaffdBridge} }map{ @{$_||[]} }&DataTreeSlc($$pScaffdStruct{bridge},[[0,'all'],[0,'all']])) ) .
               '',
    CendStatist => &ScaffdCendStatist ($pScaffdStruct),
    CendConfl   => [ grep { exists($_->{bridge}) and @{$_->{bridge}}>1 and !$_->{ScaffdJoin} }
       map { @{$_||[]} } &DataTreeSlc ($$pScaffdStruct{contig}, [[0,'all'],['end'],[0,'all']]) ],
    scaffd      => int (values %{$$pScaffdStruct{scaffd}}),
    singlet     => int (grep{ not $_->{scaffd} }values(%{$$pScaffdStruct{contig}})),
    };
  foreach $pScaffd (values %{$$pScaffdStruct{scaffd}}) {
    $$pScaffdStruct{report}{map1}{ScaffdMember} += int @{ scalar &ScaffdContigs ($pScaffd) };
  }
  if ($bTimer) {
    printf STDERR "%s. CPU time for 1st-pass scaffold map: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = &Sum((times)[0,2]);
  }

  ##############################################################################
  # scaffold map - second pass
  # far-range search for consistent scaffold-joining contig ends

  ${LineFeed} = "<BR>\n";
  ${space} = '&nbsp;';

  # start report document
  $file{Map2aDebug} = $OutStump ? "${OutStump}_map2a_debug.html" : '/dev/null';
  open  (MAP2DEBUG, ">$file{Map2aDebug}");
  print  MAP2DEBUG "${LineFeed}${LineFeed}";
  print  MAP2DEBUG "FIND PSEUDO-JOINS ON SECOND LEVEL - MAP IIa${LineFeed}";

  # loop over first-pass-conflicting contig ends
  foreach $pCend (sort { &CendIdSort ($$a{id}, $$b{id}) } @{$$pScaffdStruct{report}{map1}{CendConfl}}) {
    # we may already have joined current contig end coming from the facing side
    $$pCend{ScaffdJoin} and next;

    # do analysis
    print MAP2DEBUG &ScaffdJoinFind2nd($pCend,-html=>1);
  }
  close MAP2DEBUG;

  # intermediate process sum-up
  # here debug only
  $$pScaffdStruct{report}{map2a} = {
    CendStatist => &ScaffdCendStatist ($pScaffdStruct),
    CendConfl   => [ grep { exists($_->{bridge}) and @{$_->{bridge}}>1 and !$_->{ScaffdJoin} }
       map { @{$_||[]} } &DataTreeSlc ($$pScaffdStruct{contig}, [[0,'all'],['end'],[0,'all']]) ],
    scaffd      => int (values %{$$pScaffdStruct{scaffd}}),
    singlet     => int (grep { not $_->{scaffd} } values %{$$pScaffdStruct{contig}}),
    };
  foreach $pScaffd (values %{$$pScaffdStruct{scaffd}}) {
    $$pScaffdStruct{report}{map2a}{ScaffdMember} += int @{ scalar &ScaffdContigs ($pScaffd) };
  }

  # start report document for 2b pass
  $file{Map2bDebug} = $OutStump ? "${OutStump}_map2b_debug.html" : '/dev/null';
  open  (MAP2DEBUG, ">$file{Map2bDebug}");
  print  MAP2DEBUG "${LineFeed}${LineFeed}";
  print  MAP2DEBUG "FIND PSEUDO-JOINS ON SECOND LEVEL - MAP IIb${LineFeed}";

  # loop over 2a-pass-conflicting contig ends
  # - do one step back to same direction-facing contig end
  # - do analysis via &ScaffdJoinFind2nd
  { my @tmp;
    foreach $pCend (@{$$pScaffdStruct{report}{map2a}{CendConfl}}) {
      push @tmp, (&ScaffdCendLocat($pCend)||());
    }
    foreach $pCend (sort { &CendIdSort($$a{id},$$b{id}) }
      map{ $_->{cend}{'-1'}{'1'}[0]||() }@tmp
    ) {
      print MAP2DEBUG &ScaffdJoinFind2nd($pCend,-html=>1);
    }
    close MAP2DEBUG;
  }

  # intermediate process sum-up
  $$pScaffdStruct{report}{map2} = {
    CendStatist => &ScaffdCendStatist ($pScaffdStruct),
    CendConfl   => [ grep { exists($_->{bridge}) and @{$_->{bridge}}>1 and !$_->{ScaffdJoin} }
       map{@{$_||[]}} &DataTreeSlc ($$pScaffdStruct{contig}, [[0,'all'],['end'],[0,'all']]) ],
    scaffd      => int (values %{$$pScaffdStruct{scaffd}}),
    singlet     => int (grep { not $_->{scaffd} } values %{$$pScaffdStruct{contig}}),
    };
  foreach $pScaffd (values %{$$pScaffdStruct{scaffd}}) {
    $$pScaffdStruct{report}{map2}{ScaffdMember} += int @{ scalar &ScaffdContigs ($pScaffd) };
  }

  # process timing
  if ($bTimer) {
    printf STDERR "%s. CPU time for 2nd-pass scaffold map: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = &Sum((times)[0,2]);
  }

  ##############################################################################
  # data refining and scaffold selection

  # add all singlet contigs with size > specified
  if ($opt{-SlcSingle}) {
    foreach $pCtg (
      grep { !$_->{scaffd} and $_->{length}>=$opt{-SlcSingle} }
      values %{$$pScaffdStruct{contig}}
    ) {
      &ScaffdSingle ($pCtg);
    }
  }

  # enter into scaffold data structures:
  # - list of members
  # - categorized counts for contributing contigs
  # - sum of member contig length
  foreach $pScaffd (values %{$$pScaffdStruct{scaffd}}) {
    @{$$pScaffd{CtContig}}{'member','satellite','conflict'} = map{ int(@{$_||[]}) }
      (($$pScaffd{member}, undef) = &ScaffdContigs ($pScaffd));
    $$pScaffd{CtContig}{all} = &Sum (@{$$pScaffd{CtContig}}{'member','satellite','conflict'});
    $$pScaffd{length} = &Sum (map { $_->{length} } @{$$pScaffd{member}});
  }

  # select for scaffold length
  if ($SlcLen) {
    foreach $pScaffd (grep { $_->{length} < $$SlcLen[0] }
            values %{$$pScaffdStruct{scaffd}}) {
      delete $$pScaffdStruct{scaffd}{$$pScaffd{id}};
    }
    if ($$SlcLen[1]) {
      foreach $pScaffd (grep { $_->{length} > $$SlcLen[1] }
              values %{$$pScaffdStruct{scaffd}}) {
        delete $$pScaffdStruct{scaffd}{$$pScaffd{id}};
      }
    }
  }

  # enter specificity measures on largest member contig
  if ($opt{-SrcSpecif} or $opt{-SlcSpecif}) {
    my @RcTgtSpecif = $opt{-RcTgtSpecif} ? (-rc => $opt{-RcTgtSpecif}) : ();
    foreach $pScaffd (values %{$$pScaffdStruct{scaffd}}) {
      $pCtg = (sort { $b->{length} <=> $a->{length} }
        @{ scalar &ScaffdContigs ($pScaffd) } )[0];
      $$pScaffd{SrcSpecif} = &TgtspecifTgtProbab ([keys %{$$pCtg{read}}],
        'Read', -target=>'genome', @RcTgtSpecif, -debug=>$dbg2);
    }
  }

  # select for source specificity
  if ($opt{-SlcSpecif}) {
    foreach $pScaffd (values %{$$pScaffdStruct{scaffd}}) {
      # the following may be calculated from the internal source specificity measures
      #   see $opt{-SrcSpecif}
      $pCtg = (sort { $b->{length} <=> $a->{length} }
        @{ scalar &ScaffdContigs($pScaffd) } )[0];
      &ContigSpecif ($pCtg, $opt{-SlcSpecif},
        -delta => $opt{-SlcSpecifDlt},
        -rc    => $opt{-RcTgtSpecif},
        -debug => $dbg2) or
        delete $$pScaffdStruct{scaffd}{$$pScaffd{id}};
    }
  }

  ##############################################################################
  # exit SUB successfully

  if ($debug) {
    printf STDERR "%s. final scaffold set data structure:\n", &MySub;
    printf STDERR "  scaffolds: %d\n", int(keys(%{$$pScaffdStruct{scaffd}}));
    printf STDERR "  contigs all/scaffolds: %d/%d\n", int(keys(%{$$pScaffdStruct{contig}})),
      do { my $CtI; foreach $pScaffd (values %{$$pScaffdStruct{scaffd}}) {
        $CtI += int (map { $_ }
          $$pScaffd{cend}[0], @{ &ScaffdCendLocat($$pScaffd{cend}[0])->{cend}{'-1'}{'1'} });
      } $CtI; };
    printf STDERR "  readings all/scaffolds: %d/%d\n", int(keys(%{$$pScaffdStruct{read}})),
      do { my $CtI; foreach $pScaffd (values %{$$pScaffdStruct{scaffd}}) {
        $CtI += int (map { keys %{$_->{contig}{read}} }
          $$pScaffd{cend}[0], @{ &ScaffdCendLocat($$pScaffd{cend}[0])->{cend}{'-1'}{'1'} });
      } $CtI; };
    printf STDERR "  clones: %d\n", int(keys(%{$$pScaffdStruct{clone}}));
  }
  return $pScaffdStruct;
}


# calculate winner under array of contig bridges for single contig end
#
# INTERFACE
# - argument 1: reference to contig end data structure
#               see SeqAlign::Assembly.pm
#
# - options
#   -debug      [STD]
#
# - return val: winning score
#
# DESCRIPTION
# - algorithm to get the bridge winner:
#   0. no bridge candidate => shit
#   A. there's only one bridge candidate => he's the winner
#      bridging single-read contigs may be neglected here
#   B. there's only one bridge condidate represented by >= 2 clones
#   Z. none of 0/A/B => so, there's a conflict
#   this algorithm doesn't check if the counter-contig has the same
#   winning bridge. So, the winner needn't to represent a valid scaffold-join.
# - bridge winner scoring
#   -1. no winner, conflict
#    0. no winner at all
#    1. winner represented by 1 clone
#    2. winner represented by >= 2 clones
# - see &ScaffdStruct for listing of fields which are entered into %Cend
#   data structure
#
sub ScaffdBridgeWinner {
  my ($pCend, %opt) = @_;
  my ($debug);
  my ($CtBridge, @BridgeStrong, @BridgeConfid);

  # function parameters
  $debug = $opt{-debug};
  $$pCend{bridge} or return 0;

  # 0. no bridge, no winner, score 0
  unless ($$pCend{bridge} and $CtBridge = int(@{$$pCend{bridge}})) {
    $$pCend{BridgeWon} = {};
    $$pCend{BridgeWon}{score} = 0;
  }

  # A1. only one candidate bridge => winner, score 1/2
  elsif ($CtBridge == 1) {
    $$pCend{BridgeWon} = $$pCend{bridge}[0];
    $$pCend{BridgeWon}{score} = (@{$$pCend{BridgeWon}{clone}} >= 2) ? 2 : 1;
  }

  # B. only one candidate bridge >= 2 clones, score 2
  elsif ( (@BridgeStrong = grep { @{$_->{clone}}>=2 } @{$$pCend{bridge}}) == 1) {
    $$pCend{BridgeWon} = $BridgeStrong[0];
    $$pCend{BridgeWon}{score} = 2;
  }

  # A2. only one confident candidate bridge => winner, score 0.5
  elsif ( (@BridgeConfid =
    grep { (abs($_->{CEndDist}{mean}) - 2.5*$_->{CEndDist}{s}) < 0 } @{$$pCend{bridge}}
    ) == 1
  ) {
    $$pCend{BridgeWon} = $$pCend{bridge}[0];
    $$pCend{BridgeWon}{score} = 0.5;
  }

  # Z. no winner, conflict, score -1
  else {
    $$pCend{BridgeWon} = $main::BridgeWonFake;
    $$pCend{BridgeWon}{score} = -1;
    $$pCend{BridgeConfl}   = @BridgeStrong ? [ @BridgeStrong ] : undef;
    $$pCend{BridgeConfl} ||= @BridgeConfid ? [ @BridgeConfid ] : undef;
    $$pCend{BridgeConfl} ||= $$pCend{bridge};
  }

  # avoid this:  *** implement me ***
  # find satellite bridges
  if ($$pCend{BridgeWon}{score} > 0) {
    $$pCend{BridgeSat} = [ grep {
      $_ ne $$pCend{BridgeWon} and
      (abs($_->{CEndDist}{mean}) - 2.8*$_->{CEndDist}{s}) < 0 and
      1 } @{$$pCend{bridge}} ];
  }

  # debug
  $debug and printf STDERR "%s. contig end %s, contig reads %d, bridges %d, winning score %s\n", &MySub,
    $$pCend{id}, int(values(%{$$pCend{contig}{read}})),
    int(@{$$pCend{bridge}}), $$pCend{BridgeWon}{score}||'NONE';

  # exit SUB
  return $$pCend{BridgeWon}{score};
}


# find contig end true partners on 2nd bridge level
#
# INTERFACE
# - argument 1: reference to contig end data structure
# - return val: plain debug messages
#
# DEBUG, CHANGES, ADDITIONS
# - create a meta-bridge for scattered reference evidence.
#   The meta-bridge field would refer to an array of evidencing bridges.
#
sub ScaffdJoinFind2nd {
  my ($pCend, %opt) = @_;
  my ($pScaffdStruct, $DebugPlain, ${LineFeed}, ${space});
  my (%CendCandL1, $pCendFrom, @CendSpringBrd, $pCendCand, %CendCandL2,
      @CendCandHot, $CendSlcID, $pCandHotScaffd, %CendJoinL1, $pCendFront);

  # function parameters
  ${LineFeed} = $opt{-html} ? "<BR>\n" : "\n";
  ${space}    = $opt{-html} ? '&nbsp;' : ' ';
  $pScaffdStruct = $$pCend{contig}{root};

  # report entry
  $DebugPlain .= "${LineFeed}analysing contig end: $$pCend{id}${LineFeed}";

  ##############################################################################
  # candidate retrieval

  # sample candidates on 1st bridge level
  undef %CendCandL2;
  undef @CendSpringBrd;
  %CendCandL1 = map { ($_->{id}, {id=>$_->{id},ref=>$_}) }
    &ScaffdCendPartners ($pCend);

  # count candidate references on 2nd bridge level
  foreach $pCendFrom ($pCend,
    map { &CendCntrCend($_)||() }
    map { $_->{ref} } values %CendCandL1
  ) {
    push @CendSpringBrd, $pCendFrom;
    foreach $pCendCand (&ScaffdCendPartners($pCendFrom)) {
      $CendCandL2{$$pCendCand{id}} ||=
        {id=>$$pCendCand{id},ref=>$pCendCand};
      $CendCandL2{$$pCendCand{id}}{ct} ++;
    }
  }

  # look for hot candidates, that are referred to at least 2 times on 2nd bridge level
  @CendCandHot = map { $_->{ref} } grep { $_->{ct} > 1 } values %CendCandL2;

  # report candidate retrieval process
  $DebugPlain .= sprintf "contig end candidates on 1st bridge level: %d - %s${LineFeed}",
    int(keys %CendCandL1), join(', ',map{ $_->{id} }values %CendCandL1);
  $DebugPlain .= sprintf "spring-board contig ends: %d - %s${LineFeed}",
    int(@CendSpringBrd), join(', ',map{ $_->{id} }@CendSpringBrd);
  $DebugPlain .= sprintf "contig end candidates on 2nd bridge level: %d - %s${LineFeed}",
    int(keys %CendCandL2),
    join(', ',map{ sprintf('%s %dx',$_->{id},$_->{ct}) }values %CendCandL2);
  $DebugPlain .= sprintf "hot contig end candidates: %d %s${LineFeed}",
    int(@CendCandHot),
    int(@CendCandHot) ? ' - '.join(', ',map{ $_->{id} }@CendCandHot) : '';
  @CendCandHot or return $DebugPlain;

  ##############################################################################
  # one single candidate on 2nd level
  if (@CendCandHot == 1) {

    # report bridge situation
    $CendSlcID = sprintf "^(%s)", join ('|',
      keys(%CendCandL1), map{ $_->{id} }@CendSpringBrd,@CendCandHot);
    foreach my $pBridge (map { @{$_||[]} }
      &DataTreeSlc ($$pScaffdStruct{bridge},
        [[$CendSlcID,'regexp'],[$CendSlcID,'regexp']])
    ) {
      $DebugPlain .= &ScaffdBridgePlain ($pBridge, -html=>1);
    }
    # create meta-bridge entry
    # *** implement me ***

    # report scaffold properties of first hot candidate
    $pCandHotScaffd = &ScaffdCendLocat ($CendCandHot[0]);
    $DebugPlain .= sprintf ("first hot contig end candidate is member in scaffold?: %s${LineFeed}",
      $$pCandHotScaffd{scaffd}?'yes':'no');
    if ($$pCandHotScaffd{scaffd}) {
      $DebugPlain .= sprintf "${space}${space}scaffold: %s${LineFeed}", $$pCandHotScaffd{scaffd}{id};
      $DebugPlain .= sprintf "${space}${space}contig ends ahead: %d%s${LineFeed}",
        int(@{$$pCandHotScaffd{contig}{'1'}}),
        int(@{$$pCandHotScaffd{contig}{'1'}}) ? ' - '.join(', ',map{ $_->{id} }@{$$pCandHotScaffd{cend}{'1'}{'1'}}) : '';
      $DebugPlain .= ${LineFeed}, &ScaffdPlain($$pCandHotScaffd{scaffd},-html=>1);
    }

    # sample intervening contigs, pick nearest contig end
    undef %CendJoinL1;
    foreach $pCendFrom (grep { $_ ne $pCend } @CendSpringBrd) {
      if (grep { $_ eq $CendCandHot[0] } &ScaffdCendPartners($pCendFrom)) {
        $CendJoinL1{$$pCendFrom{contig}} = { ref=>&CendCntrCend($pCendFrom)};
        $CendJoinL1{$$pCendFrom{contig}}{BridgeBack} = (
          grep { $_->{cend}[0] eq $pCend or $_->{cend}[1] eq $pCend }
          @{$CendJoinL1{$$pCendFrom{contig}}{ref}{bridge}} )[0];
        $CendJoinL1{$$pCendFrom{contig}}{BridgeAhead} = (
          grep { $_->{cend}[0] eq $CendCandHot[0] or $_->{cend}[1] eq $CendCandHot[0] }
          @{$$pCendFrom{bridge}} )[0];
      }
    }

    # determine order of several intervening contigs
    if ((keys %CendJoinL1) > 1) {
      foreach $pCendCand (values %CendJoinL1) {
      # $pCendCand is not a contig end reference !!!
        $$pCendCand{median} = $$pCendCand{BridgeBack}{CEndDist}
          - $$pCendCand{BridgeAhead}{CEndDist}{mean};
      }
    }
  }

  elsif (@CendCandHot == 0) {
    return $DebugPlain;
  }

  ##############################################################################
  # several candidates on 2nd level

  # no implementation yet
  else {
    printf STDERR "%s. WARNING: no implementation for second-level multi-candidate decision\n", &MySub;
    return $DebugPlain;
  }

  ##############################################################################
  # rebuild scaffold

  # do (re-)joins of contigs
  $pCendFront = $pCend;
  # loop over contig ends facing towards the initial contig end
  foreach $pCendCand (( map { $_->{ref} }
                        sort { $$a{median} <=> $$b{median} }
                        values %CendJoinL1 ), $CendCandHot[0]) {

    # scaffold-join already exists
    if ($$pCendFront{ScaffdJoin} ne $pCendCand) {
      $DebugPlain .= sprintf "join %s to %s already exists${LineFeed}", $$pCendFront{id}, $$pCendCand{id};
    }

    # create scaffold-join as outlined
    else {
      if ($$pCendFront{ScaffdJoin}) {
        &ScaffdBreak ($pCendFront) or return $DebugPlain;
      }
      if ($$pCendCand{ScaffdJoin}) {
        &ScaffdBreak ($pCendCand) or return $DebugPlain;
      }
      &ScaffdJoin ($pCendFront, $pCendCand) or return $DebugPlain;
      $DebugPlain .= sprintf ("joining %s to %s${LineFeed}", $$pCendFront{id}, $$pCendCand{id});
    }

    # find next-step contig end facing offwards the initial contig end
    $pCendFront = &CendCntrCend ($pCendCand);
  }

  return $DebugPlain;
}


# convert scaffold set data structure to contig set data structure
#
# INTERFACE
# - argument 1: reference to scaffold set data structure
#               data structure will be changed
#
# - options:
#   -debug      [STD]
#   -spacer     insert a non-sequence spacer at the contig junction,
#               default: 0
#
# - return val: - reference to contig set data structure
#               - undef if error occurs
#
# DESCRIPTION
# - scaffolds are converted to contigs data structures
# - reads in new data structure cannot contain contig end binding information
# - annotations are not supported so far
#
# DEBUG, CHANGES, ADDITIONS
# - annotations are not supported so far
#
sub ScaffdStructToContigStruct {
  my ($pScaffdStruct, %opt) = @_;
  my ($debug, $dbg2, $spacer);
  my ($bFirstConvert, $pCtgStruct, $pCtgNew);
  my ($pScaffd, $ScaffdPos, @ScaffdCend, $pCend, $pCtg, $pRead);
  my ($CtRead);

  # function parameters
  $debug = $opt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $spacer = $opt{-spacer} || 0;

  ##############################################################################
  # scaffold conversion

  # is this the first scaffold conversion
  $bFirstConvert = ! int grep { /^CtgOrig:/ } map { $_->{id} }
    values %{$$pScaffdStruct{read}};

  # loop over scaffolds
  foreach $pScaffd (values %{$$pScaffdStruct{scaffd}}) {
    $ScaffdPos = 0;
    $pCtgNew = {};

    # loop over left contig ends in current scaffold
    @ScaffdCend = ($$pScaffd{cend}[0], @{ &ScaffdCendLocat ($$pScaffd{cend}[0])->{cend}{'-1'}{'1'} });
    foreach $pCend (@ScaffdCend) {
      $pCtg = $$pCend{contig};
      $$pCtgNew{id} ||= $$pCtg{id};
      $$pCtgNew{length} = $ScaffdPos + $$pCtg{length};
      $CtRead += int keys %{$$pCtg{read}};

      # create new read entry for original contig
      if ($bFirstConvert) {
        $pRead = {
          id       => 'CtgOrig:'. $$pCtg{id},
          pos      => { '-1'=>1, '1'=>$$pCtg{length} },
          length   => $$pCtg{length},
          orient   => 1,
          cend     => undef,
          CEndDist => undef,
          clone    => undef,
        };
        $$pCtg{read}{$$pRead{id}} = $pRead;
      }

      # add all reads of member contig to new scaffold-contig
      foreach $pRead (values %{$$pCtg{read}}) {

        # update read orientation/position according to contig orientation
        if ($$pCend{idnum} > 0) {
          $$pRead{orient} *= -1;
          $$pRead{pos}{'-1'} = $$pCtg{length} - $$pRead{pos}{'1'}  + 1;
          $$pRead{pos}{'1'}  = $$pCtg{length} - $$pRead{pos}{'-1'} + 1;
        }

        # update read position according to contig position
        $$pRead{pos}{'-1'} += $ScaffdPos;
        $$pRead{pos}{'1'}  += $ScaffdPos;

        # minimise read data structure
        map { delete $$pRead{$_} } qw(annotations cend clone CEndDist idnum);
        $$pRead{contig} = $pCtgNew;

        # enter read into contig read index
        $$pCtgNew{read}{$$pRead{id}} = $pRead;
      }
      
      # move on in scaffold position
      $ScaffdPos += $$pCtg{length} + $spacer;
    }

    # construct contig clone index
    $$pCtgNew{clone} = &ReadTreeIndex ($$pCtgNew{read},
      -format=>'clone', -idref=>1, -debug=>$dbg2);

    # add new contig globally
    $$pCtgStruct{contig}{$$pCtgNew{id}} = $pCtgNew;
  }

  ##############################################################################
  # global tidy up
  # - skip scaffold data structures
  # - old contig data structures
  # - contig end data structures
  # global indices
  # - reads
  # - clones

  # loop over contigs
  # - minimise contig data structures
  # - delete contig end data structures
  foreach $pCtg (values %{$$pScaffdStruct{contig}}) {
    foreach $pCend (values %{$$pCtg{end}}) {
      %$pCend = ();
    }
    map { delete $$pCtg{$_} } qw(clone end number read ScaffdJoin);
    $$pCtg{root} = $pCtgStruct;
  }
  if ($bFirstConvert) {
    $$pCtgStruct{CtgOrig} = $$pScaffdStruct{contig};
  } else {
    $$pCtgStruct{CtgOrig} = $$pScaffdStruct{CtgOrig};
  }

  # construct new root read index
  # loop over contigs
  foreach $pCtg (values %{$$pCtgStruct{contig}}) {
    # the following is much faster than
    # %{$$pCtgStruct{read}}  = ( %{$$pCtgStruct{read}}, %{$$pCtg{read}} );
    # cause processed in linear time with rising number of contigs
    while (my($key,$val) = each(%{$$pCtg{read}})) {
      $$pCtgStruct{read}{$key} = $val;
    }
  }

  # root clone index
  # need not to be updated

  ##############################################################################
  # exit SUB successfully

  if ($debug) {
    printf STDERR "%s. final contig set data structure:\n", &MySub;
    printf STDERR "  contigs: %d\n", int(keys %{$$pCtgStruct{contig}});
    printf STDERR "  readings old/new: %d/%d\n", $CtRead, int(keys %{$$pCtgStruct{read}});
    printf STDERR "  clones: %d\n", int(keys %{$$pCtgStruct{clone}});
  }
  return $pCtgStruct;
}


################################################################################
# scaffolds in scaffold set data
################################################################################


# return plain text report for scaffold
#
# INTERFACE
# - argument 1: reference to scaffold data structure
#
# - options
#   -html       HTML-formatted text
#
# - return val: plain text
#
sub ScaffdPlain {
  my ($pScaffd, %opt) = @_;
  my (${LineFeed}, ${LineBreak}, ${space});
  my ($pCend, $pBridge, %CendHave);
  my (%CtContig, $pScaffdMemb);
  my ($ScaffdHead, $ScaffdMap, $ScaffdPlain);

  ##############################################################################
  # pre-work, scaffold description header

  # function parameters
  ${LineFeed}  = $opt{-html} ? "<BR>\n" : "\n";
  ${LineBreak} = $opt{-html} ? "\n" : '';
  ${space}     = $opt{-html} ? '&nbsp;' : ' ';

  # start scaffold
  $pCend = $$pScaffd{cend}[0];
  $CendHave{$pCend} = 1;
  # header
  $ScaffdHead .= sprintf "ID: %s${LineFeed}", $$pScaffd{id};
  @CtContig{'member','satellite','conflict'} = map { int(@{$_||[]}) }
    ($pScaffdMemb = scalar &ScaffdContigs($pScaffd));
  $CtContig{member}    and $ScaffdHead .=
    sprintf "member contigs: %d${LineFeed}", $CtContig{member};
  $CtContig{member}    and $ScaffdHead .=
    sprintf "total length of member contigs: %d${LineFeed}",
    $$pScaffd{length} = &Sum (map{ $_->{length} }@$pScaffdMemb);
  $CtContig{satellite} and $ScaffdHead .=
    sprintf "satellite contigs: %d${LineFeed}", $CtContig{satellite};
  $CtContig{conflict}  and $ScaffdHead .=
    sprintf "conflicting bridges: %d${LineFeed}", $CtContig{conflict};

  ##############################################################################
  # first contig in scaffold

  # in the remaining code we append text to two strings in parallel
  # - $ScaffdMap    which is the 'orient on first glance'-header line
  # - $ScaffdPlain  which is the large scaffold description paragraph

  # conflict at left scaffold end?
  if ($$pCend{BridgeConfl} and @{$$pCend{BridgeConfl}}) {
    $ScaffdMap .= sprintf "%sx%s ",
      $opt{-html} ? "<FONT COLOR='#D00000'>" : '',
      $opt{-html} ? "</FONT>" : '';
    $ScaffdPlain .= "${LineFeed}conflict 5':${LineFeed}";
    foreach $pBridge (@{$$pCend{BridgeConfl}}) {
      $ScaffdPlain .= &ScaffdBridgePlain ($pBridge, %opt);
    }
  }

  { # this block is repeated in the 'scaffold go on'-foreach block

    # start member contig section
    $opt{-html} and $ScaffdPlain .= "<A NAME='contig$$pCend{contig}{id}'>${LineBreak}";
    $ScaffdPlain .= ${LineFeed};

    # satellite bridges at left contig end?
    foreach $pBridge (@{$$pCend{BridgeSat}}) {
      $ScaffdPlain .= 'satellite '. &ScaffdBridgePlain ($pBridge, %opt);
    }

    # member contig
    $ScaffdMap .= sprintf ("%s-%s%s%s-%s",
      $SyntaxTranslNtdrc{End2Prime}{$$pCend{idnum}},
      $opt{-html} ? "<A HREF='#contig$$pCend{contig}{id}'>" : '',
        $$pCend{contig}{id},
        $opt{-html} ? "</A>" : '',
      $SyntaxTranslNtdrc{End2Prime}{-$$pCend{idnum}},
      );

    $ScaffdPlain .= sprintf ("contig %s: length %d, reads %d${LineFeed}",
      &ContigId($$pCend{contig},-source=>(@{$$pCend{contig}{root}{source}}>1)),
      $$pCend{contig}{length}, int(values %{$$pCend{contig}{read}}),
      );

    # satellite bridges at right contig end?
    foreach $pBridge (map{ @{$_||[]} } (&CendCntrCend($pCend)||{})->{BridgeSat}) {
      $ScaffdPlain .= 'satellite '. &ScaffdBridgePlain($pBridge,%opt);
    }
  }

  ##############################################################################
  # following bridges and contigs in scaffold

  while ($pCend = (&CendCntrCend($pCend)||{})->{ScaffdJoin}) {

    # loop?
    if ($CendHave{$pCend}) {
      $ScaffdMap .= sprintf "${LineBreak}%sloop%s %s-%s-%s",
        $opt{-html} ? "<FONT COLOR='#D00000'>&lt;" : '<',
        $opt{-html} ? "&gt;</FONT>" : '>',
        $SyntaxTranslNtdrc{End2Prime}{$$pCend{idnum}},
        $$pCend{contig}{id},
        $SyntaxTranslNtdrc{End2Prime}{-$$pCend{idnum}};
      last;
    }
    $CendHave{$pCend} = 1;

    # bridge
    if ($pBridge = $$pCend{ScaffdBridge} and %$pBridge) {
      $ScaffdMap .= sprintf ("${LineBreak} %s%s%s ",
        $opt{-html} ? sprintf ("<A HREF=\"#bridge%s_%s\">", $$pBridge{cend}[0]{id}, $$pBridge{cend}[1]{id}) : '',
        $opt{-html} ? '&lt;=&gt;' : '<=>',
        $opt{-html} ? '</A>' : '');

      $opt{-html} and $ScaffdPlain .= sprintf ("<A NAME=\"bridge%s_%s\">${LineBreak}",
        $$pBridge{cend}[0]{id}, $$pBridge{cend}[1]{id});
      $ScaffdPlain .= ${LineFeed};
      $ScaffdPlain .= sprintf "scaffold-join score: %.1f${LineFeed}", $$pCend{ScaffdBridge}{score};
      $ScaffdPlain .= &ScaffdBridgePlain ($pBridge, %opt);
    } else {
      $ScaffdMap .= sprintf ("${LineBreak} %s ",
        $opt{-html} ? '&lt;=&gt;' : '<=>');
    }

    # start member contig section
    $opt{-html} and $ScaffdPlain .= "<A NAME='contig$$pCend{contig}{id}'>${LineBreak}";
    $ScaffdPlain .= ${LineFeed};

    # satellite bridges at left contig end?
    foreach $pBridge (@{$$pCend{BridgeSat}}) {
      $ScaffdPlain .= 'satellite '. &ScaffdBridgePlain($pBridge,%opt);
    }

    # member contig
    $ScaffdMap .= sprintf ("%s-%s%s%s-%s",
      $SyntaxTranslNtdrc{End2Prime}{$$pCend{idnum}},
      $opt{-html} ? "<A HREF='#contig$$pCend{contig}{id}'>" : '',
        $$pCend{contig}{id},
        $opt{-html} ? "</A>" : '',
      $SyntaxTranslNtdrc{End2Prime}{-$$pCend{idnum}}
      );

    $ScaffdPlain .= sprintf ("contig %s: length %d, reads %d${LineFeed}",
      &ContigId ($$pCend{contig}, -source=>(@{$$pCend{contig}{root}{source}}>1)),
      $$pCend{contig}{length},
      int values %{$$pCend{contig}{read}}
      );

    # satellite bridges at right contig end?
    foreach $pBridge (map{ @{$_||[]} } (&CendCntrCend($pCend)||{})->{BridgeSat}) {
      $ScaffdPlain .= 'satellite '. &ScaffdBridgePlain($pBridge,%opt);
    }

    # conflict at right scaffold end?
    if ((&CendCntrCend($pCend)||{})->{BridgeConfl} and
      int(map{ @{$_||[]} } (&CendCntrCend($pCend)||{})->{BridgeConfl})
    ) {
      $ScaffdMap .= sprintf " %sx%s",
        $opt{-html} ? "<FONT COLOR='#D00000'>" : '',
        $opt{-html} ? "</FONT>" : '';
      $ScaffdPlain .= "${LineFeed}conflict 3':${LineFeed}";
      foreach $pBridge (map{ @{$_||[]} } (&CendCntrCend($pCend)||{})->{BridgeConfl}) {
        $ScaffdPlain .= &ScaffdBridgePlain($pBridge,%opt);
      }
      last;
    }
  }

  # join document parts
  # exit SUB
  return join ('', $ScaffdMap.${LineFeed}, $ScaffdHead, $ScaffdPlain);
}


# create scaffold data entry from singlet contig
#
# INTERFACE
# - argument 1: reference to contig data structure
#
# - options
#   -debug      [STD]
#
# - return val: reference to new or existing scaffold data structure
#
# DESCRIPTION
# - a contig that's already part of a scaffold won't be forced into a singlet
#   scaffold.
# - The reference to the basic contig set data structure is retrieved via
#   referenced contig data sub-structures. Keep it valid!
# - The order of the scaffold ends reflects contig orientation
#
sub ScaffdSingle {
  my ($pCtg, %opt) = @_;
  my ($debug, $pScaffdStruct);

  # function parameters
  $debug = $opt{-debug};
  if ($$pCtg{scaffd} and %{$$pCtg{scaffd}}) {
    $debug and printf STDERR "%s. WARNING: contig %s alredy part of scaffold %s - action skipped\n", &MySub,
      &ContigId($pCtg,-source=>(@{$$pCtg{root}{source}}>1)), $$pCtg{scaffd}{id};
    return $$pCtg{scaffd};
  }
  unless ($pScaffdStruct = $$pCtg{root}) {
    printf STDERR "%s. ERROR: contig set data structure reference in contig %s undefined\n", &MySub, $$pCtg{id};
    exit 1;
  }

  # create scaffold data structure
  $$pCtg{scaffd} = {
    id   => $$pCtg{end}{'-1'}{id},
    cend => [ @{$$pCtg{end}}{'-1','1'} ],
    };

  # enter reference into data structure root
  $$pScaffdStruct{scaffd}{$$pCtg{scaffd}{id}} = $$pCtg{scaffd};

  # exit SUB
  return $$pCtg{scaffd};
}


# scaffold-join two contig ends
#
# INTERFACE
# - argument 1: reference to contig end data structure 1
# - argument 2: reference to contig end data structure 2
#
# - options
#   -debug      [STD]
#
# - return val: - reference to scaffold data structure
#               - undef if an error occurs
#
# DESCRIPTION
# - The reference to the basic contig set data structure is retrieved via
#   referenced contig data sub-structures. Keep it valid!
# - The orientation of the resulting joint scaffold will be in order:
#   counter-CtgEnd 0 -> CtgEnd 0 -> CtgEnd 1 -> counter-CtgEnd 1
#
sub ScaffdJoin {
  my (@cend, %opt);
     ($cend[0], $cend[1], %opt) = @_;
  my ($pScaffdStruct, $debug, $CtScaffdBefore, $CtScaffdCandMemb);
  my (@Scaffd, $CtCend, $pCtg, @ScaffdMemb);

  # function parameters
  $debug = $opt{-debug};
  foreach $CtCend (0, 1) {
    unless ($cend[$CtCend] and %{$cend[$CtCend]}) {
      printf STDERR "%s. ERROR: contig end (arg #%d) undefined\n", &MySub, $CtCend;
      return undef;
    }
  }
  unless ($pScaffdStruct = $cend[0]{contig}{root}) {
    printf STDERR "%s. ERROR: contig set data structure reference in contig %s undefined\n", &MySub, $$pCtg{id};
    exit 1;
  }
  $CtScaffdBefore = int (keys %{$$pScaffdStruct{scaffd}});

  # pre-work on scaffold situation
  foreach $CtCend (0, 1) {

    # force supplied contig ends into scaffold data structures
    $Scaffd[$CtCend] = $cend[$CtCend]{contig}{scaffd} ||=
      &ScaffdSingle ($cend[$CtCend]{contig});

    # delete scaffolds from global index in contig set data structure
    delete $$pScaffdStruct{scaffd}{$Scaffd[$CtCend]{id}};

    # ensure contig ends are each enbodying an end of a scaffold
    #   => @Scaffd[0,1]
    unless (grep { $cend[$CtCend] eq $_ } @{$Scaffd[$CtCend]{cend}}) {
      printf STDERR "%s. ERROR: contig end %s is at neither end of scaffold %s\n", &MySub,
        $cend[$CtCend]{id}, $Scaffd[$CtCend]{id};
      printf STDERR "  contigs in scaffold %s\n%s", $Scaffd[$CtCend]{id},
        join ('', map{ "    $_->{id}\n" }map{ @{$_||[]} }scalar &ScaffdContigs($Scaffd[$CtCend]));
      printf STDERR "  ends of scaffold %s\n%s", $Scaffd[$CtCend]{id},
        join ('', map { "    $_->{id}\n" } @{$Scaffd[$CtCend]{cend}});
      return undef;
    }
  }
  $CtScaffdCandMemb = int(map{ @{$_||[]} }scalar &ScaffdContigs($Scaffd[0]))
    + int(map{ @{$_||[]} }scalar &ScaffdContigs($Scaffd[1]));

  # contig ends to be joined are part of the same scaffold?
  # don't produce looping scaffolds!!!
  if ($Scaffd[0] eq $Scaffd[1]) {
    printf STDERR "%s. ERROR: contig ends to be joined (%s, %s) are part of the same scaffold %s\n", &MySub,
      $cend[0]{id}, $cend[1]{id}, $Scaffd[0]{id};
    return undef;
  }

  # contig ends cannot be conflicting anymore
  foreach $CtCend (0, 1) {
    if ($cend[$CtCend]{BridgeConfl}) {
      $cend[$CtCend]{BridgeSat} = $cend[$CtCend]{BridgeConfl};
      delete $cend[$CtCend]{BridgeConfl};
    }
  }

  # create new scaffold => $Scaffd[2]
#  # sort cend entries in new scaffold structure
#  @{$Scaffd[2]{cend}} = sort { &CendIdSort ($$a{id}, $$b{id}) }
  $Scaffd[2]{cend} = [
    (grep { $_ ne $cend[0] } @{$Scaffd[0]{cend}} )[0],
    (grep { $_ ne $cend[1] } @{$Scaffd[1]{cend}} )[0] ];
  $Scaffd[2]{id} = $Scaffd[2]{cend}[0]{id};
  # enter new scaffold structure into global list
  $$pScaffdStruct{scaffd}{$Scaffd[2]{id}} = $Scaffd[2];

  # change scaffold references in contig data structures
  @ScaffdMemb = ( @{ scalar &ScaffdContigs($Scaffd[0]) },
                  @{ scalar &ScaffdContigs($Scaffd[1]) } );
  foreach $pCtg (@ScaffdMemb) {
    $$pCtg{scaffd} = $Scaffd[2];
  }

  # join contig ends
  $cend[0]{ScaffdJoin} = $cend[1];
  $cend[1]{ScaffdJoin} = $cend[0];

  # debug
  if ($debug) {
    printf STDERR "%s. joined contigs:\n  contig 1: end %s (scaffold %s/%s)\n  contig 2: end %s (scaffold %s/%s)\n  scaffold: %s/%s\n", &MySub,
      $cend[0]{id}, $Scaffd[0]{cend}[0]{id}, $Scaffd[0]{cend}[1]{id},
      $cend[1]{id}, $Scaffd[1]{cend}[0]{id}, $Scaffd[1]{cend}[1]{id},
      $Scaffd[2]{cend}[0]{id}, $Scaffd[2]{cend}[1]{id};
    printf STDERR "%s. scaffolds before / candidate members: before->after / after: %d / %d -> %d / %d\n", &MySub,
      $CtScaffdBefore,
      $CtScaffdCandMemb, int(map{ @{$_||[]} }scalar &ScaffdContigs($Scaffd[2])),
      int(keys %{$$pScaffdStruct{scaffd}});
  }

  # exit SUB successfully
  return $Scaffd[2];
}


# break scaffold at a contig end
#
# INTERFACE
# - argument 1: reference to contig end data structure
#
# - options
#   -bridge     supply reference to joining bridge data sub-structure
#               *** implement me ***
#   -debug      [STD]
#
# - return val: success status (boolean)
#
# DESCRIPTION
# - The reference to the basic contig set data structure is retrieved via
#   referenced contig data sub-structures. Keep it valid!
#
sub ScaffdBreak {
  my ($pCend, %opt) = @_;
  my ($pScaffdStruct, $debug, $CtScaffdBefore);
  my (%Scaffd, $pScaffdInfo, %CendEdge);
  my ($CtScaffdend, $pCtg, $pScaffdMemb);

  # function parameters
  $debug = $opt{-debug};
  $$pCend{ScaffdJoin} or return 1;
  unless ($pScaffdStruct = $$pCend{contig}{root}) {
    printf STDERR "%s. ERROR: contig set data structure reference in contig %s undefined\n", &MySub, $$pCtg{id};
    exit 1;
  }
  $CtScaffdBefore = int keys %{$$pScaffdStruct{scaffd}};

  # scaffold, scaffold info, break contig chain
  # delete scaffold from global index
  $Scaffd{'0'} = $$pCend{contig}{scaffd};
  $pScaffdInfo = &ScaffdCendLocat ($pCend) || {};
  %CendEdge = (
    '1'  => $$pScaffdInfo{cend}{'1'}{'-1'}[0],
    '-1' => $pCend,
    );
  foreach $CtScaffdend ('1', '-1') {
    delete $CendEdge{$CtScaffdend}{ScaffdJoin};
    delete $CendEdge{$CtScaffdend}{ScaffdBridge};
  }
  delete $$pScaffdStruct{scaffd}{$Scaffd{'0'}{id}};

  # generate new scaffold data structures
  foreach $CtScaffdend ('1', '-1') {

    # create new scaffold data structure
    $Scaffd{$CtScaffdend} = {
      cend => [ sort { &CendIdSort($$a{id},$$b{id}) }
                $CendEdge{$CtScaffdend},
                $$pScaffdInfo{cend}{$CtScaffdend}{$CtScaffdend}[-1] ],
      };
    $Scaffd{$CtScaffdend}{id} = $Scaffd{$CtScaffdend}{cend}[0]{id};

    # new scaffold should contain at least 2 member contigs
    $pScaffdMemb = scalar &ScaffdContigs ($Scaffd{$CtScaffdend});
    if (int(@$pScaffdMemb) >= 2) {

      # enter into global index
      $$pScaffdStruct{scaffd}{$Scaffd{$CtScaffdend}{id}} = $Scaffd{$CtScaffdend};

      # change scaffold references in contig data structures
      foreach $pCtg (@$pScaffdMemb) {
        $$pCtg{scaffd} = $Scaffd{$CtScaffdend};
      }

    } else {

      # delete scaffold references in contig data structures
      foreach $pCtg (@$pScaffdMemb) {
        delete $$pCtg{scaffd};
      }
    }
  }

  # debug
  if ($debug) {
    printf STDERR "%s. break scaffold %s..%s at %s (%s)\n", &MySub,
      $Scaffd{'0'}{cend}[0]{id}, $Scaffd{'0'}{cend}[1]{id},
      $CendEdge{'-1'}{id}, $CendEdge{'1'}{id};
    printf STDERR "  contig end half: %s..%s\n",
      $Scaffd{'-1'}{cend}[0]{id}, $Scaffd{'-1'}{cend}[1]{id};
    printf STDERR "  counter-half: %s..%s\n",
      $Scaffd{'1'}{cend}[0]{id}, $Scaffd{'1'}{cend}[1]{id};
    printf STDERR "  scaffolds before/after: %d/%d\n",
      $CtScaffdBefore, int(keys %{$$pScaffdStruct{scaffd}});
  }

  # exit SUB successfully
  return 1;
}


# return list(s) of member contigs for scaffold
#
# INTERFACE
# - argument 1: reference to scaffold data structure
#
# - options
#   -debug      [STD]
#   -join       return single reference on array containing all kind of
#               contigs (member/satellite/conflicts)
#
# - return val: - wantscalar: reference to array of member contigs,
#                 cf. option -join
#               - wantarray:
#                 - reference to array of member contigs
#                 - reference to array of satellite contigs
#                 - reference to array of conflicting contigs
#
# DESCRIPTION
# - note: to use it in a sorting routine over a large array this code
#   is quite too slow.
# - satellite contigs are filtered for member contigs and conflicting
#   contigs
# - conflicting contigs are filtered for member contigs
#
sub ScaffdContigs {
  my ($pScaffd, %opt) = @_;
  my ($debug);
  my (%CendDone, $pCend, $pCendRef, %contig, %memb);

  # function parameters
  $debug = $opt{-debug};

  # start at 'left' scaffold end
  unless ($pCend = $$pScaffd{cend}[0]) {
    printf STDERR "%s. ERROR: initial contig end undefined for scaffold %s\n", &MySub, $$pScaffd{id};
    return wantarray ? ([],[],[]) : [];
  }
  $debug and printf STDERR "%s. entered SUB\n", &MySub;

  { # redo block

    # avoid endless-loop for looped scaffolds
    $CendDone{$pCend} and last;
    $CendDone{$pCend} = 1;
    $debug and printf STDERR "%s. %d contig ends in index\n", &MySub,
      int(keys %CendDone);

    # member contigs
    push @{$contig{member}}, $$pCend{contig};

    # conflicting contigs and satellite contigs
    if (wantarray or $opt{-join}) {
      foreach $pCendRef ($pCend, &CendCntrCend($pCend)) {
        foreach (grep { $_ and $_ ne $$pCend{contig} } map { @{$_||[]} }
          &DataTreeSlc($$pCendRef{BridgeConfl},[[0,'all'],['cend'],[0,'all'],['contig']],-unique=>1)
        ) {
          $contig{conflict}{$_} = $_;
        }
        foreach (grep { $_ and $_ ne $$pCend{contig} }
        map { @{$_||[]} } &DataTreeSlc ($$pCendRef{BridgeSat}, [[0,'all'],['cend'],[0,'all'],['contig']], -unique=>1)) {
          $contig{satellite}{$_} = $_;
        }
      }
    }

    # go on through scaffold
    $pCend = (&CendCntrCend($pCend)||{})->{ScaffdJoin} and redo;
  }

  # non-redundant contig lists
  %{$contig{MemberIdx}} = map { ($_=>$_) } @{$contig{member}};
  %{$contig{conflict}}  = map { ($_=>$_) }
    grep { not $contig{MemberIdx}{$_} }
    values %{$contig{conflict}};
  %{$contig{satellite}} = map { ($_=>$_) }
    grep { not $contig{MemberIdx}{$_} and not $contig{conflict}{$_} }
    values %{$contig{satellite}};

  # join arrays
  if ($opt{-join}) {
    push @{$contig{member}}, values %{$contig{conflict}}, values %{$contig{satellite}};
    return \@{$contig{member}};
  }

  # exit SUB
  return wantarray ?
    ($contig{member},[values %{$contig{satellite}}],[values %{$contig{conflict}}]) :
     $contig{member};
}


################################################################################
# bridges in scaffold set data
################################################################################


# print entry of bridge data structure
#
# INTERFACE
# - argument 1: reference to bridge data structure
#
# - options
#   -html       HTML-formatted text
#
# - return val: plain text
#
sub ScaffdBridgePlain {
  my ($pBridge,%opt) = @_;
  my ${LineFeed} = $opt{-html} ? "<BR>\n" : "\n";
  my ${space}    = $opt{-html} ? '&nbsp;' : ' ';

  # bridge
  my ($BridgePlain);
  $BridgePlain .= sprintf "bridge %s / %s via %d clone%s${LineFeed}",
    $$pBridge{cend}[0]{id}, $$pBridge{cend}[1]{id},
    int(@{$$pBridge{clone}}), (int(@{$$pBridge{clone}})==1) ? '':'s';
  $BridgePlain .= sprintf "%squant. relation: contig ends' distance %d +/- %d bp, gap size %d +/- %d bp${LineFeed}",
    ${space} x 2, @{$$pBridge{CEndDist}}{'mean','s'},
    @{$$pBridge{CEndGap}}{'mean','s'};

  # contigs
  foreach my $CtCend (0, 1) {
    my $pCtg = $$pBridge{cend}[$CtCend]{contig};
    my $bCtgSource = (@{$$pCtg{root}{source}} > 1);
    $BridgePlain .= sprintf ("%scontig %s: length %d, reads %d${LineFeed}",
      ${space} x 2, &ContigId($pCtg,-source=>$bCtgSource), $$pCtg{length},
      int (values %{$$pCtg{read}}) );
  }

  # clones
  for (my $CtClone=0; $CtClone<@{$$pBridge{clone}}; $CtClone++) {
    my $pClone = $$pBridge{clone}[$CtClone];
    $BridgePlain .= sprintf ('%sclone %d: %s, ', ${space} x 2,
      $CtClone+1, $$pClone{id});
    $BridgePlain .= sprintf ("reliability %.3f, contig ends' distance %d +/- %d${LineFeed}",
      $$pClone{relied}||0, @{$$pClone{CEndDist}}{'mean','s'});
      # *** $$pClone{relied} not implemented yet ***
  }

  # exit SUB
  return $BridgePlain;
}


# contig end distance expectancy from single contig-bridging clone
#
# INTERFACE
# - argument 1: reference to clone data structure
#               this function adds fields to the data structure
#               CEndDistSum  ...
#               CEndDist     return value of this function
#
# - options
#   -debug      [STD]
#   -RcCloneLen use this rc file for clone length data
#               For procedural details see &ReadWatch::Library::...
#
# - return val: - reference to hash of Gauss parameters:
#                 mean  expected contig end distance
#                 s     SD of distance estimate
#                 also entered into clone data structure (arg1)
#
sub ScaffdBridgeCloneGap {
  my ($pClone, %opt) = @_;
  my ($debug, $dbg2);
  my ($lib, $pCEndDistEstim);

  # function parameters
  $debug = $opt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $lib = $$pClone{field}{lib} || ${ &ReadidToFields($$pClone{id}.'.s1')||{} }{lib};
  $debug and printf STDERR "%s. clone %s from library %s\n", &MySub,
    $$pClone{id}||"''", $lib||"''";

  # sample contig end distances of reads
  { my (%CEndDist);
    foreach my $ItEnd (qw(1 -1)) {
      $CEndDist{$ItEnd} = &Max (map { @{$_||[]} }
        &DataTreeSlc($$pClone{$ItEnd},[[0,'all'],['CEndDist']]) );
    }
    $debug and printf STDERR "%s. contig end distances fwd %d, rev %d\n", &MySub,
      @CEndDist{'1','-1'};
    unless ($$pClone{CEndDistSum} = &Sum(values(%CEndDist))) {
      printf STDERR "%s. WARNING: contig end distance is zero for clone %s\n", &MySub, $$pClone{id}||"''";
    }
  }

  # get clone length statistics from ReadWatch module
  # - NOTE: a reference on the original *.rc data is returned!
  $pCEndDistEstim = &CloneLenEstim ($lib,
    -copy=>1, -rc=>$opt{-RcCloneLen}, -default=>1, -debug=>$dbg2);
  $debug and printf STDERR "%s. clone length estimate "
    ."for clone %s, lib %s: %s +/- %s\n  *.rc file%s\n", &MySub,
    $$pClone{id}||"''", $lib||"''", @{$pCEndDistEstim}{'mean','s'},
    &CloneLenRcFile();
  $$pCEndDistEstim{mean} -= $$pClone{CEndDistSum};

  # exit SUB
  return $$pClone{CEndDist} = $pCEndDistEstim;
}


################################################################################
# contig ends in scaffold set data
################################################################################


# contig end statistics
#
# INTERFACE
# - argument 1: reference to contig set data structure
# - return val: contig end statistics plain text
#
sub ScaffdCendStatist {
  my ($pScaffdStruct,%opt) = @_;
  my ${LineFeed}  = $opt{-html} ? "<BR>\n" : "\n";
  my ${LineBreak} = $opt{-html} ? "\n" : '';
  my ${space}     = $opt{-html} ? '&nbsp;' : ' ';

  # count contig ends in several categories
  my %CtCend;
  foreach my $pCend (map {@{$_||[]}}
    &DataTreeSlc ($$pScaffdStruct{contig}, [[0,'all'],['end'],[0,'all']], -unique=>1)
  ) {
    $CtCend{all} ++;
    if ($$pCend{ScaffdJoin}) {
      $CtCend{ScaffdJoin} ++;
    } elsif (@{$$pCend{bridge}}) {
      $CtCend{conflict} ++;
    } else {
      $CtCend{terminal} ++;
    }
  }

  # plain-format statistics
  my $StatPlain =       "contig ends:${LineFeed}";
  $StatPlain .= sprintf "  total: %d${LineFeed}", $CtCend{all};
  $StatPlain .= sprintf "  scaffold-joined: %d${LineFeed}", $CtCend{ScaffdJoin};
  $StatPlain .= sprintf "  conflicting total: %d${LineFeed}", $CtCend{conflict};
  $StatPlain .= sprintf "  terminal: %d${LineFeed}", $CtCend{terminal};

  # exit SUB
  return $StatPlain;
}


# bridge partners of a given contig end
#
# INTERFACE
# - argument 1: reference to contig end data structure
# - return val: array of bridged partner contig ends
#
sub ScaffdCendPartners {
  my ($pCend, %opt) = @_;
  my (@CendPartner);
  $pCend or return ();

  # bridged partners of contig end
  @CendPartner = grep { $_ ne $pCend } map { @{$_||[]} }
    &DataTreeSlc($$pCend{bridge},[[0,'all'],['cend'],[0,'all']]);

  # exit SUB
  return @CendPartner;
}


# locate contig-end in a scaffold
#
# INTERFACE
# - argument 1: reference to contig end data structure
#
# - options
#   -debug      [STD]
#
# - return val: - reference to hash containing result structure:
#                 scaffd        scaffold reference
#                 contig{-1/1}  references to arrays of contigs following
#                               backwards (-1) or ahead (1) in the same scaffold.
#                               In the backward list there's at least one contig
#                               (contig holding the contig end in quest)
#                 cend{-1/1}{-1/1}  reference to arrays of contig ends
#                               following backwards (-1) or ahead (1) in the
#                               same (1) or counter-orientation (-1)
#                 length{-1/1}  scaffold length following backwards (-1)
#                               or ahead (1) from the specified contig end
#               - reference to empty hash if an error occurs
#
sub ScaffdCendLocat {
  my ($pCend, %opt) = @_;
  my ($debug);
  my (%result, $pCendNext);

  # function parameters
  $debug = $opt{-debug};

  # scaffold
  $result{scaffd} = $$pCend{contig}{scaffd} or return {};

  # look ahead
  $pCendNext = $pCend;
  while ($pCendNext = &CendCntrCend($$pCendNext{ScaffdJoin})) {
    push @{$result{contig}{'1'}}, $$pCendNext{contig};
    $result{length}{'1'} += $$pCendNext{contig}{length};
    push @{$result{cend}{'1'}{'1'}}, $pCendNext;
    push @{$result{cend}{'1'}{'-1'}}, &CendCntrCend($pCendNext);
  }

  # look backwards
  $pCendNext = &CendCntrCend ($pCend);
  push @{$result{contig}{'-1'}}, $$pCend{contig};
  push @{$result{cend}{'-1'}{'-1'}}, $pCendNext;
  $result{length}{'-1'} = $$pCend{contig}{length};
  while ($pCendNext = &CendCntrCend($$pCendNext{ScaffdJoin})) {
    push @{$result{contig}{'-1'}}, $$pCendNext{contig};
    $result{length}{'-1'} += $$pCendNext{contig}{length};
    push @{$result{cend}{'-1'}{'1'}}, &CendCntrCend($pCendNext);
    push @{$result{cend}{'-1'}{'-1'}}, $pCendNext;
  }

  # exit SUB
  return \%result;
}


1;
# $Id: ScaffdAssemb.pm,v 1.18 2018/06/05 18:02:56 szafrans Exp $
