#! /usr/local/bin/perl
################################################################################
#
#  GAP4 Project Handling Tool
#
#  copyright (c)
#  Karol Szafranski on behalf of IMB Jena, Dept. Genome Analysis, 1999-2004,
#    szafrans@imb-jena.de
#  Karol Szafranski on behalf of FLI Jena, Genome Analysis Group, 2005-2007,
#    szafrans@fli-leibniz.de
#
################################################################################
#
#  DESCRIPTION
#
# - See function &usage for description of command line syntax
#
# - each function comes along with a description at the beginning of the code
#   block
#
# - this code makes extensive use of GAP4 Tcl scripts. For functioning
#   the the GAP4 environment needs to be set poperly.
#   - set $ENV{STADENROOT}
#   - source $ENV{STADENROOT}/staden.profile
#
# - assembly data source formats:
#   see function descriptions in usage listing ($ProgFile -help) or at the
#   beginning of the code block
#   - GAP4 database: binary database of the Staden GAP4, argument syntax
#     (folder/)database.version
#   - GAP4 show relationships: contig topology report extracted from a GAP4
#     database
#
# - This code has some project-specific properties, with respect to contig set
#   data structures or scaffold set data structures since they depend on the
#   definition of a nomenclatural relation between sequence reads and sequen-
#   cing templates. For details see DESCRIPTION notes in SeqAlign::Assembly.pm.
#
################################################################################
#
#  FUNCTIONS, DATA
#
# - MAIN
#   %GlobStore
#   $ProgFile,$ProgFstump
#   %ProgParam
#   $ProgMode,%ProgOpt,@ProgArg
#
# - usage help, command line arguments, basic I/O
#   &usage
#   &AddSwitch
#   &PrepOstump
#
# - hard database manipulation
#   &ProgCp
#   &ProgContigMv
#   &ProgAnnotRm
#   &ProgRepair
#
# - import, assemble
#   &ProgAddRna
#   &ProgAddRev
#   &ProgAssembleScreen
#   &ProgAssembleEnds
#
# - analyse and report: basic properties and assembly topology
#   &ProgListID
#   &ProgSeq
#   &ProgIndex
#   &ProgFollowRead
#   &ProgCoverage
#   &ProgMinimal
#
# - analyse and report scaffolds
#   &ProgEndRead
#   &ProgPairReport
#   &ProgScaffdMap
#   &ProgScaffdList
#   &ProgScaffdSeq
#   &ProgMapbyRead
#
# - analyse and report: advanced sequencing
#   &ProgSeqExtra
#   &ProgSeqWalk
#
# - analyse and report: miscellaneous
#   &ProgSrcSpecif
#
# - accuracy and annotation
#   &ProgSeqQual
#   &ProgQualAdjust
#   &ProgTag
#
#
#  STD OPTIONS
#
#   -debug      print debug protocol to STDERR
#   -timer      print time-performance protocol to STDERR
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - In practice, there may be problems using contig/read specifiers in the
#   argument list, if files exist for these names.  Then, &ContigStructMeta
#   will try to open the files as a contig source, but will fail.  However,
#   subsequent selection using the statements as selectors will not take place.
#
# - program mode -index:
#   Theoretically, a directed assembly would be a sufficient input format
#   for this and other program modes. Where's the problem?
#
# - look also for notes in the header of each function block
#
################################################################################


# global constants and variables
# also used by modules as a unique global data anchor
our %GlobStore;

# include path(s), includes
BEGIN {
  unshift @INC, grep{$_} split(/:+/,$ENV{KPERLPATH}||$ENV{PERLPATH}||'');
}
use strict; #use warnings;  # OK 20040818
use FileHandle;
use MainLib::StrRegexp qw(&TimeStr);
use MainLib::Data;
use MainLib::Path;
use MainLib::Cmdline;
use MainLib::File;
use MainLib::FileTmp qw(&PathUnique);
use MainLib::FileAccAsync;
use MainLib::Misc;
use Math::Calc;
use Math::Range;
use Math::Statist;
use Math::Plot2D;
use Math::PlotImg;
use database::DbPlain;
use SeqLab::SeqBench;
use SeqLab::SeqFormat;
use SeqAlign::Gap;
use SeqAlign::Assembly;
use SeqAlign::ScaffdAssemb;
use SeqAlign::EstGenome;
use ReadWatch::Read;
use ReadWatch::Library;
use ReadWatch::ReadIndex;


# script ID
# - program name as specified on the command line
our $ProgFile = ( split('/',__FILE__) )[-1];
  # "our" instead of "my", here since $ProgFile is accessed e.g. from
  # SeqAlign::ScaffdAssemb
our $ProgFstump=$ProgFile; $ProgFstump=~s/\.\w{1,4}$//;

# global constants (esp. default values)
our %ProgParam;
$ProgParam{path}{AddIniStamp} = '.GapAdd_';
$ProgParam{call}{PrjRequest} = "$CorePath{call}{PerlScript}/GscjRead.pl -provide -tag -v -timer";

$ProgParam{default}{GapSpacer}{clone} = 10;
$ProgParam{default}{GapSpacer}{scaffd} = 50;
$ProgParam{default}{OutImgRelhigh}{coverage} = 0.5;
$ProgParam{default}{OutImgRelhigh}{ScaffdLength} = 0.7;
$ProgParam{default}{OutImgWidth} = 640;
$ProgParam{default}{ProgMode} = 'help';
$ProgParam{default}{ThreshAddNum} = 40;
$ProgParam{default}{QualAdjust}{extern} = 2;
$ProgParam{default}{QualAdjust}{edit} = 50;

# working desk
$ProgParam{store} = undef;

# ensure GAP4 Tcl package available
unless (-d $CorePath{call}{GapScript}) {
  die sprintf "ERROR: GAP4 script path %s\n", $CorePath{call}{GapScript} ? 'not found':'undefined';
}


# manage I/O #####################################

# organise I/O handles
&Unbuffer();

# organise tmp files
# - encapsulation in a sub{} allows MainLib::FileTmp to identify the calling
#   package
my $pcFT = sub{ $ProgParam{TmpManag} = MainLib::FileTmp->new(); };
&$pcFT;


# command line interface #########################
# NOTE:
# - &GetoptsNArgs (via &AddSwitch) modifies global variables:
#   $ProgMode @ProgArg %ProgOpt
# - @ProgArg may be pre-filled in &GetoptsNArgs via &AddSwitch (option -fofn)

# arguments, switches, default subprogram
our $ProgMode = undef;
our @ProgArg = ();
our %ProgOpt = ();
unshift @ProgArg, &GetoptsNArgs();
$ProgMode ||= $ProgParam{default}{ProgMode};

# eventually open LOG file
if ($ProgOpt{-log}) {
  $ProgOpt{LogFile} = ($ProgOpt{-log}!=1) ? $ProgOpt{-log} : undef;
  &LogOpen (-file=>$ProgOpt{LogFile}, -stamp=>$ProgFstump, -prog=>"$ProgFile -$ProgMode");
}
END {
  $ProgOpt{-log} and &LogClose();
}


# work flow manifold #############################

# chain to program mode which is optionally argument-less
if (0) { }
elsif (!@ARGV or $ProgMode=~m/^h(elp)?$/i) { &usage() }
elsif ($ProgMode =~ m/^QualAdjust(?:=(\d+)(?:,(\d+))?)?$/i) {
  $ProgOpt{-QualAdjust} = [ $1, $2 ];
  &ProgQualAdjust (@ProgArg);
  exit 0;
}

# ensure input argument(s)
unless (@ProgArg) {
  die "ERROR: input arguments missing\n";
}

# Arguments that represent assembly data sources cannot be checked for existence
# as files/directories. Contig data source "index" is specified as a filename
# root since this resource is represented by multiple files.

# chain to program mode (with input argument(s))
if (0) { }
elsif ($ProgMode =~ m/^AddRna$/i) {
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  &ProgAddRna (@ProgArg);
}
elsif ($ProgMode =~ m/^AddRev$/i) {
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  &ProgAddRev (@ProgArg);
}
elsif ($ProgMode =~ m/^AnnotRm$/i) {
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  &ProgAnnotRm (@ProgArg);
}
elsif ($ProgMode =~ m/^AssembleEnds$/i) {
  &ProgAssembleEnds ($ProgArg[0]);
}
elsif ($ProgMode =~ m/^AssembleScreen$/i) {
  &ProgAssembleScreen (@ProgArg);
}
elsif ($ProgMode =~ m/^ContigMv$/i) {
  &InargFileExists ($ProgArg[0], -exit=>\&usage);
  &ProgContigMv (@ProgArg);
}
elsif ($ProgMode =~ m/^Coverage$/i) {
  &InargFileExists ($ProgArg[0], -exit=>\&usage);
  &ProgCoverage (@ProgArg);
}
elsif ($ProgMode =~ m/^cp$/i) {
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  &ProgCp (@ProgArg);
}
elsif ($ProgMode =~ m/^EndRead$/i) {
  &ProgEndRead (@ProgArg);
}
elsif ($ProgMode =~ m/^FollowRead$/i) {
  &ProgFollowRead (@ProgArg);
}
elsif ($ProgMode =~ m/^index$/i) {
  &ProgIndex (@ProgArg);
}
elsif ($ProgMode =~ m/^ListIDs?$/i) {
  $ProgMode = 'ListID';
  &ProgListID (@ProgArg);
}
elsif ($ProgMode =~ m/^List(Clone|Contig|Read)s?$/i) {  # old-fashioned command line syntax
  $ProgOpt{-SlcData} = lc $1;
  $ProgMode = 'ListID';
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  &ProgListID (@ProgArg);
}
elsif ($ProgMode =~ m/^ListOligos?$/i) {  # former function
  die "ERROR: function has moved to GscjGap.pl -ListOligos\n";
}
elsif ($ProgMode =~ m/^MapbyRead$/i) {
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  &ProgMapbyRead (@ProgArg);
}
elsif ($ProgMode =~ m/^MinimalTiling$/i) {
  &ProgMinimal (@ProgArg);
}
elsif ($ProgMode =~ m/^PairReport$/i) {
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  &ProgPairReport (@ProgArg);
}
elsif ($ProgMode =~ m/^(ScaffdMap|Psi(Contig)?Map|PairJoin)$/i) {
  if ($ProgMode =~ m/(Psi|Pair)/i) {
    printf STDERR "WARNING: -$ProgMode is an old-fashioned spell of program function -ScaffdMap\n";
    printf STDERR "  run '$ProgFile -help' for details\n";
  }
  $ProgMode = 'ScaffdMap';
  &ProgScaffdMap (@ProgArg);
}
elsif ($ProgMode =~ m/^(ScaffdList|Psi(Contig)?List)$/i) {
  if ($ProgMode =~ m/(Psi)/i) {
    printf STDERR "WARNING: -$ProgMode is an old-fashioned spell of program function -ScaffdList\n";
    printf STDERR "  run '$ProgFile -help' for details\n";
  }
  $ProgMode = 'ScaffdList';
  &ProgScaffdList (@ProgArg);
}
elsif ($ProgMode =~ m/^repair$/i) {
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  &ProgRepair (@ProgArg);
}
elsif ($ProgMode =~ m/^seq$/i) {
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  &ProgSeq (@ProgArg);
}
elsif ($ProgMode =~ m/^SeqExtra$/i) {
  $ProgParam{store}{ReadIndex} = ReadWatch::ReadIndex->new();
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  &ProgSeqExtra (@ProgArg);
}
elsif ($ProgMode =~ m/^SeqQual$/i) {
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  &ProgSeqQual (@ProgArg);
}
elsif ($ProgMode =~ m/^SeqWalk$/i) {
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  &ProgSeqWalk (@ProgArg);
}
elsif ($ProgMode =~ m/^SrcSpecif$/i) {
  &ProgSrcSpecif (@ProgArg);
}
else {
  die "ERROR: unknown program mode or switch '$ProgMode'\n";
}

# exit script successfully
# cf. END blocks!
exit 0;



################################################################################
# usage help, command line arguments
################################################################################


sub usage {
  print "\n";
  print <<END_USAGE;
DESCRIPTION
 $ProgFile provides functions concerning GAP4 databases.

COMMAND LINE SYNTAX
 $ProgFile  -<ModeSwitch> [-<OptionalSwitch> ...] <Arg1> [<Arg2> ...]

Arguments
---------
 See function descriptions.

Path Arguments
--------------
 Relative paths will be resolved according to the pwd. Prefixes "~" and "~uid"
 are resolved to the home directories. Path "-" resolves to STDIN or STDOUT,
 depending on the context.
 GAP4 database paths, frequently used in argument lists, may be of the form
   (folder/)database.version
 i.e. the version suffix is obligatory, a directory prefix (absolute or
 relative) is optional.

ModeSwitch (case-insensitive)
-----------------------------
switch argument types: B:=boolean, F:=floating point/scientific, N:=integer,
S:=string, X:=varying type.

<none>            default ModeSwitch -$ProgParam{default}{ProgMode} if program arguments are given.
                  Otherwise like ModeSwitch -help.
-AddRev           provide reverse reads for assembly
                  Arg1        GAP4 database
                  Arg2*       optional: contig specifier(s) for selection
                  -SlcEnds    select for contig end ranges
                  -SlcID      select for reads IDs
                  -SlcSpecif  select contigs for source specificity
                  Reads stated in a file named .GapAdd_<database>_neglect.foid
                  will be ignored in the read request step.
-AddRna           provide mRNA sequence(s) for assembly
                  *** out of curation ***
                  Arg1        GAP4 database
                  Arg2        mRNA sequence source
                  Arg3+       sequence ID(s)
-AnnotRm          delete annotations from a GAP4 database
                  Arg1        GAP4 database
                  Arg2*       optional: contig specifier(s) for selection
                              !!! selection does not work so far !!!
                  -SlcData    case-insensitive data specifier, default "all":
                              contig  contig annotations
                              read    annotations on readings
-AssembleEnds     perform internal assembly for a GAP4 database using contig
                  ends as a temporary input.
                  *** out of curation ***
                  Arg1        GAP4 database
-AssembleScreen   convert log output of GAP4 find internal joins procedure
                  to list of contig IDs (to STDOUT)
                  *** out of curation ***
                  Arg1        assembly log file
                  Arg2        path of GAP4 contig index file
                  -SlcScore   apply match percentage threshold
-ContigMv         move contigs from one to another GAP4 database
                  Arg1        source GAP4 database
                  Arg2        target GAP4 database
                  Arg3+       contig specifier(s)
                  -solution   use this switch to supply strategies to
                              circumvent interactive dialogue. Possible:
                              LeaveConfl, DelSrcSing, DelSrcAll, DelTgtSing,
                              DelTgtAll, AlwaysYes, cancel
                  -SlcRange   no effect here
-coverage         prepare coverage graph for each contig in an assembly
                  Arg1        assembly data source
                  Arg2*       optional: contig specifier(s) for selection
                  -OutImg     prepare images showing the local coverage for
                              every contig (one image per contig)
                  -OutStump   direct multi-file output, default:
                              <date/time>_Coverage*
                  -SlcID      select for read IDs
                  -SlcRange   no effect here
                  -WinSize    window for plot smoothening
-cp               copy GAP4 database
                  Arg1        source GAP4 database
                  Arg2        target ((folder/)database.)version
                  -solution   use this switch to pre-choose strategies to
                              circumvent interactive dialogue. Possible:
                              "AlwaysYes", "overwrite"
-EndRead          report clones/readings at the end of contigs which are likely
                  to continue beyond the contig end
                  Arg1        assembly data source
                  Arg2*       either additional assembly data source(s)
                              (like Arg1) or contig specifier(s) for selection
                  -OutTab     redirect table output to file, default to STDOUT.
                  -RcCloneLen custom clone length rc file
-FollowRead       report topology for list of readings. Output consists of two
                  lists for read / clone topology.
                  Arg1        assembly data source
                  Arg2+       readings to be analysed
                  -OutStump   direct multi-file output, default:
                              <date/time>_FollowRead*
-h(elp)           output command line usage description and exit
-index            create tabular listing of readings and contigs, output to
                  multiple files
                  Arg1+       assembly data source(s)
                  -OutStump   direct multi-file output, default:
                              <date/time>_index*
                  --spacer=N  size of spacer for joining contigs,
                              default: 100.
-ListID(s)        list identifiers of objects in an assembly data source. Print
                  to STDOUT.
                  Arg1        assembly data source
                  Arg2*       either additional assembly data source(s)
                              (like Arg1) or contig specifier(s) for
                              selection
                  -SlcData    case-insensitive data specifier:
                              clone, contig (default), read.
-MapbyRead        map contig set according to mapped member readings.
                  Output is a contig topology index like for program mode
                  -index (multiple files).
                  Arg1        map information file, with expected column order:
                              MarkerID, MapGroup, MapCoo.
                  Arg2*       assembly data source(s)
                  --spacer=N  size of spacer sequence for joining contigs,
                              default: 100.
-MinimalTiling    list minimal tiling set of sequences in a contig. Output to
                  STDOUT.
                  Arg1        assembly data source
                  Arg2*       optional: contig specifier(s) for selection
                  -SlcData    sequence category selector: clones, reads
                              (default).
                  -SlcRange   no effect here
-PairReport       report pairing status of readings originating from the same
                  clone, determine observed clone lengths; output to STDOUT.
                  *** out of curation ***
                  call of "$ProgFile -ScaffdMap ..." gives superior results
                  Arg1        assembly data source
                  Arg2*       either additional assembly data source(s)
                              (like Arg1) or contig specifier(s) for selection
                  -OutTab     save positive clone list to specified file.
                              However, a full report output is done to STDOUT.
-QualAdjust(=N1(,N2))
                  for a batch of Experiment files: adjust accuracy values:
                  N1          maximum value for edited symbols (defined by
                              lower case letters), default: $ProgParam{default}{QualAdjust}{edit}
                  N2          value for extern sequences (defined by missing
                              trace file), default: $ProgParam{default}{QualAdjust}{extern}
                  Arg1*       list of Experiment files, default: files listed
                              in ./fofn
-repair           fix GAP4 database errors
                  Arg1        GAP4 database
-ScaffdList       tabular report on scaffolds (contigs which may be joined by
                  bridging clone(s)). The resulting table is a concentrate of
                  what would be got by "$ProgFile -index -SlcData=scaffd ..."
                  Arg1        assembly data source
                  Arg2*       either additional assembly data source(s)
                              (like Arg1) or contig specifier(s) for
                              selection
                  -OutReport  save "show relationships" report to specified
                              file
                  -OutStump   invoke and direct multi-file output, default:
                              no extra output.
                  -OutTab     output file for scaffold list, default: STDOUT
                  -RcCloneLen custom clone length rc file
-ScaffdMap        report scaffolds, i.e. contigs which may be joined by
                  bridging clones and report some related information like
                  read pairing status, scaffold length distribution etc.
                  Arg1        assembly data source
                  Arg2*       either additional assembly data source(s)
                              (like Arg1) or contig specifier(s) for
                              selection
                  -export     use this switch to export scaffolds.
                              Export will work only if the assembly data source
                              is a GAP4 database.
                  -OutImg     output image showing cumulative scaffold length.
                              See also switch -OutStump.
                  -OutReport  save "show relationships" report to specified
                              file
                  -OutStump   direct multi-file output, default:
                              <date/time>_ScaffdMap*
                  -RcCloneLen custom clone length rc file
-seq              retrieve sequences. Output Experiment format to STDOUT.
                  Arg1        GAP4 database
                  Arg2*       optional: contig specifier(s) for selection
                  -RcCloneLen custom clone length rc file, only for
                              -SlcData=scaffd
                  -SlcData    case-insensitive data specifier:
                              clone   clone sequences based on contig
                                      consensi, Experiment file format, incl.
                                      pads, consensus mode 2.
                              contig  (default) contig consensi in
                                      Experiment file format, incl. pads,
                                      consensus mode 2.
                              scaffd  scaffold sequences, cmp. ModeSwitch
                                      -ScaffdMap. Sequence description will
                                      include source specificity information.
                              read    sequences of readings. Experiment file
                                      format of pre-assembled sub-format.
                  -SlcID      effect only in -SlcData=clone or -SlcData=read
                  -SlcRange   no effect here
                  --SlcPair=N select for minimal pair status: 0=none,
                              0.5=unknown, 1=well-paired f/r in same contig.
                  --spacer=N  size of spacer sequence for joining clone
                              fragments, default for -SlcData=clone: $ProgParam{default}{GapSpacer}{clone}, default
                              for -SlcData=scaffd: $ProgParam{default}{GapSpacer}{scaffd}.
-SeqExtra         find chances of extra template sequencing strategies for
                  advancement of an assembly project. Output to STDOUT.
                  Arg1        GAP4 database
                  Arg2*       optional: contig specifier(s) for selection
-SeqQual          report quality attributes of consensus sequences
                  Arg1        GAP4 database
                  Arg2*       optional: contig specifier(s) for selection
                  -SlcRange   no effect here
-SeqWalk          find chances of primer walk sequencing strategies for
                  advancement of an assembly project. Output to STDOUT.
                  *** not implemented ***
                  Arg1        GAP4 database
                  Arg2*       optional: contig specifier(s) for selection
-SrcSpecif        calculate source specificity for contigs. Tabular output to
                  STDOUT.
                  Arg1        assembly data source
                  Arg2*       optional: contig specifier(s) for selection
                  -RcTgtSpecif
                              custom target specificity rc file
                  -SlcRange   no effect here

OptionalSwitch (case-insensitive)
---------------------------------
switch argument types: B:=boolean, F:=floating point/scientific, N:=integer,
S:=string, X:=varying type.

-debug(=N)        print debug protocol to STDERR (sometimes STDOUT). Keep
                  temporary files.
                  N   debug depth value
-export=N         do extra export action.
                  N   number of scaffolds to export
-fofn=S           supply list of command arguments in a file. The entries of the
                  file will be appended to the argument list. Multiple -fofn
                  switch statements are allowed.
-log(=S)          redirect STDOUT and STDERR to LOG file
                  S  log file path, default path worked out
                     automatically from built-in directives.
-OutImg(=S)       force program to produce image output and (optionally)
                  specify output path
-OutImgRelhigh=F  define the output image's height in relation to its width
-OutImgTransp     turn image's background transparent
-OutImgWidth=N    define the output image's pixel width
-OutReport=S      save original data report to specified file
-OutStump=S       path stump for multi-file output. A default is derived from
                  local time and phrase of program mode.
-OutTab=S         file path for tabular output
-RcCloneLen=S     use this rc file for clone length data, default:
                  $ReadWatch::Library::LibGlob{CloneLen}{default}{PathRc}
                  annotated example:
                  $CorePath{call}{MeInstDoc}/$CorePath{ReadWatch}{CloneLenXmpl}
-RcTgtSpecif=S    use this rc file for target specificity data, default:
                  $ReadWatch::Library::LibGlob{TgtSpecif}{default}{PathRc}
                  annotated example:
                  $CorePath{call}{MeInstDoc}/$CorePath{ReadWatch}{TgtSpecifXmpl}
-SlcCnum=N        select for minimum clone number in contig/scaffold
-SlcData=S        select data to be treated, possible values:
                  read, clone, contig, scaffd. There are different defaults
                  depending on the program mode.
-SlcEnds=N        select sequence ends having the specified length N bp. A
                  sequence smaller than two times this value will be
                  regarded completely.
-SlcID=S          specify a regexp that shall be used for sequence ID selection
-SlcLen=N1(..N2)  select input sequences according to their length
                  N1  minimum length
                  N2  maximum length, default: no limit
-SlcRange=S       select for sequence range, syntax of arg S: <start>..<end>.
                  A negative position value in the range argument refers to
                  the sequence end.
-SlcRnum=N        select for minimum reading number in contig/scaffold
-SlcScore=F|N     select results score threshold
-SlcSpecif=S(,F)  select for source specificity.
                  S  ID of specificity set.
                  F  minimum delta of specificity measure
                  cmp. switch -RcTgtSpecif
-solution=S       specify a solution strategy for cases of conflict
-ThreshAddNum=N   repeat cycle only if number of reads added in the
                  last cycle exceeds threshold n
-timer            print time-performance protocol to STDOUT/STDERR
-v(erbose)        print extended protocol to STDOUT
-WinSize=N        specify window size for smoothening etc.
--*               program mode-specific switches. See the descriptions there.
                  Case-sensitive!

Environment Variables
---------------------
 \$PERLPATH        primary search path for Perl package look-up
 \$TEMPPATH        directory for storage of temporary files, default /tmp

Temporary Files
---------------
 The program may generate temporary files. These will be placed either in a
 directory specified by \$ENV{TEMPPATH} or in /tmp.

Further Reading
---------------
 A detailed, though incomplete, manual is available at
 $CorePath{call}{MeInstDoc}/$CorePath{man}{Gap} .
END_USAGE
  print "\n";
  exit 0;
}


# add program switches to global table (hash)
#
# INTERFACE
# - argument 1:  switch argument without leading '-'
#
# - global options:
#   -debug       [STD]
#
# - global data:
#   $ProgMode
#   @ProgArg     may be pre-filled here beside it's typically used to store
#                return value from &GetoptsNArgs via &AddSwitch (option
#                -fofn)
#   %ProgOpt  switch data which gets processed here
#
# DESCRIPTION
# - this function gets called by &MainLib::Misc::GetoptsNArgs
# - switch arguments are tested for validity. Arguments are parsed with highest
#   possible tolerance. This way, syntax errors can reported in accordance to
#   the actual switch, rather than reporting ANY syntax error.
#
sub AddSwitch {
  my $switch = shift;
  my $debug = $ProgOpt{-debug};

  # optional switches
  if ($switch =~ m/^debug(=(\d+))?$/i) {
    $ProgOpt{-debug} = defined($2) ? int($2) : 1;
    return;
  }
  if ($switch =~ m/^export(=(\d+))?$/i) {
    $ProgOpt{-export} = defined($2) ? int($2) : 1;
    return;
  }
  if ($switch =~ m/^fofn=(.+)$/i) {
    my $SwitchArg = ($1 eq '-') ? $1 : &PathExpand($1);
    my $pTable = &LoadFoid($SwitchArg);
    if ($pTable) {
      push @ProgArg, @$pTable;
      $debug and printf STDERR "%s. %d entries loaded from fofn %s\n", &MySub, int @$pTable, $SwitchArg||"''";
    } else {
      die sprintf "ERROR: unable to read entries from file of filenames %s (-> %s)\n",
        $1, $SwitchArg;
    }
    return;
  }
  if ($switch =~ m/^log(=(.*))?$/i) {
    $ProgOpt{-log} = $2 ? &PathExpand($2) : 1;
    return;
  }
  if ($switch =~ m/^OutImg(=(.+))?$/i) {
    $ProgOpt{-OutImg} = $2 ? &PathExpand($2) : '';
    return;
  }
  if ($switch =~ m/^OutImgRelhigh=([0-9\.eE+-]+)?$/i) {
    $ProgOpt{-OutImg} ||= '';
    $ProgOpt{-OutImgRelhigh} = $1;
    return;
  }
  if ($switch =~ m/^OutImgTransp(ar)?$/i) {
    $ProgOpt{-OutImg} ||= '';
    $ProgOpt{-OutImgTransp} = 1;
    return;
  }
  if ($switch =~ m/^OutImgWidth=(\d+)?$/i) {
    $ProgOpt{-OutImg} ||= '';
    $ProgOpt{-OutImgWidth} = $1;
    return;
  }
  if ($switch =~ m/^OutReport=(.+)$/i) {
    $ProgOpt{-OutReport} = &PathExpand($1);
    return;
  }
  if ($switch =~ m/^OutSt[au]mp=(.+)$/i) {
    $ProgOpt{-OutStump} = &PathExpand($1);
    return;
  }
  if ($switch =~ m/^OutTab=(.+)$/i) {
    $ProgOpt{-OutTab} = &PathExpand($1);
    return;
  }
  if ($switch =~ m/^RcCloneLen=(.+)$/i) {
    $ProgOpt{-RcCloneLen} = $1;
    return;
  }
  if ($switch =~ m/^RcTgtSpecif=(.+)$/i) {
    $ProgOpt{-RcTgtSpecif} = $1;
    return;
  }
  if ($switch =~ m/^Select/i) {
    die "ERROR: selector switches are now spelled \"-Slc*\"\n";
  }
  if ($switch =~ m/^SlcCnum=(\d+)$/i) {
    if (int($1) > 0) {
      $ProgOpt{-SlcCnum} = $1;
    } else {
      print  STDERR "WARNING: option -SlcCnum=N does not take effect with N <= 0\n";
    }
    return;
  }
  if ($switch =~ m/^SlcData=(.+)$/i) {
    $ProgOpt{-SlcData} = lc($1);
    $ProgOpt{-SlcData} =~ s/^(psicontig|scaffold)/scaffd/;
    return;
  }
  if ($switch =~ m/^SlcEnds=(\d+)$/i) {
    if ($1 > 0) {
      $ProgOpt{-SlcEnds} = $1;
    } else {
      print  STDERR "WARNING: option -SlcEnds=N does not take effect with N <= 0\n";
    }
    return;
  }
  if ($switch =~ m/^SlcID=(.+)$/i) {
    $ProgOpt{-SlcID} = $1;
    return;
  }
  if ($switch =~ m/^SlcLen=(.+)$/i) {
    unless ($ProgOpt{-SlcLen} = Math::Range->new_parsed($1)) {
      die sprintf "ERROR: invalid range argument for switch -SlcLen: $1\n";
    }
    return;
  }
  if ($switch =~ m/^SlcRange=(.+)$/i) {
    unless ($ProgOpt{-SlcRange} = Math::Range->new_parsed($1)) {
      die sprintf "ERROR: invalid range argument for switch -SlcRange: $1\n";
    }
    return;
  }
  if ($switch =~ m/^SlcRnum=(\d+)$/i) {
    if ($1 > 0) {
      $ProgOpt{-SlcRnum} = $1;
    } else {
      print  STDERR "WARNING: option -SlcRnum=N does not take effect with N <= 0\n";
    }
    return;
  }
  if ($switch =~ m/^SlcScore=([0-9\.eE+-]+)$/i) {
    $ProgOpt{-SlcScore} = $1;
    return;
  }
  if ($switch =~ m/^SlcSpecif=(\w+)(,([-+]?[0-9.]+))?$/i) {
    $ProgOpt{-SlcSpecif} = $1;
    $ProgOpt{-SlcSpecifDlt} = $3 || undef;
    return;
  }
  if ($switch =~ m/^solution=(.+)$/i) {
    $ProgOpt{-solution} = $1;
    return;
  }
  if ($switch =~ m/^timer$/i) {
    $ProgOpt{-timer} = 1;
    return;
  }
  if ($switch =~ m/^ThreshAddNum=(\d+)$/i) {
    $ProgOpt{-ThreshAddNum} = $1;
    return;
  }
  if ($switch =~ m/^v(erbose)?$/i) {
    $ProgOpt{-verbose} = 1;
    return;
  }
  if ($switch =~ m/^WinSize=([\d.]+)$/i) {
    $ProgOpt{-WinSize} = $1;
    return;
  }
  if ($switch =~ m/^(?:-|var=)(\w+)[,=](.+)$/i) {
    $ProgOpt{-var}{$1} = $2;
    return;
  }

  # program mode switches
  if (defined($ProgMode)) {
    die sprintf "ERROR: multiple specification of program mode or unknown switch, %s and %s\n",
      '-'.($ProgMode||"''"), '-'.($switch||"''");
  }
  else {
    $ProgMode = $switch;
    return;
  }
}


# work out output path base from global switches
#
# INTERFACE
# - global options:
#   -debug       [STD]
#   -OutDir      [STD]
#   -OutStump    [STD]
# - return val:  output path base
#
sub PrepOstump {
  my $debug = $ProgOpt{-debug};

  # prepare output path base
  # 1st: $ProgOpt{-OutStump}
  # 2nd: time + $ProgMode
  my $PathStamp = $ProgOpt{-OutStump};
  unless ($PathStamp) {
    $PathStamp = &TimeStr(-format=>'CompactComp') .'_'. $ProgMode;
  }

  # determine effective directory
  $ProgOpt{-OutDir} and $PathStamp =
    $ProgOpt{-OutDir} .'/'. &PathSplit($PathStamp)->{name};

  # final refinement: expand to nice, rooted path
  $PathStamp = &PathExpand ($PathStamp);

  # return path
  return $PathStamp;
}


################################################################################
# hard database manipulation
################################################################################


# copy GAP4 database
#
# INTERFACE
# - argument 1: source GAP4 database: (folder/)database.version
# - argument 2: target GAP4 database: ((folder/)database.)version
#
sub ProgCp {
  my ($ProjSrc,$ArgTgt) = @_;
  my $debug = $ProgOpt{-debug};
  my %gap = %{ &GapNameFields($ProjSrc) };
  if ($gap{exists}) {
    $debug and printf "%s. source database %s\n", &MySub, $gap{full}||"''";
  } else {
    die sprintf "%s. unable to read GAP4 database %s\n", &MySub, $gap{full}||"''";
  }

  # formulation of target (may be sparse syntax referring to source)
  unless (defined($ArgTgt)) { die "ERROR: target DB is unspecified\n" }
  my $ProjTgt;
  if (length($ArgTgt) == 1) {
    $ProjTgt = "$gap{dir}/$gap{name}.$ArgTgt";
  } else {
    $ProjTgt = $ArgTgt;
  }
  if (-e "$ProjTgt.BUSY") {
    die sprintf "ERROR: target DB %s exists and is busy\n", $ProjTgt||"''";
  }

  # debug before starting action
  if ($debug) {
    printf STDERR "%s. source DB is $ProjSrc (%s, %s)\n", &MySub, $gap{name}, $gap{version};
    printf STDERR "%s. target DB is $ProjTgt\n", &MySub;
  }

  # check for existing target
  if (-e $ProjTgt) {
    unless (grep{ $_ eq 'overwrite' or $_ eq 'AlwaysYes' }
            split(/\W+/,$ProgOpt{-solution})) {
      print "WARNING: target DB $ProjTgt exists, overwrite?";
      unless (&QueryConfirm()) { exit }
    }
  }

  # copy DB
  foreach ('', '.aux', '.log') { unlink $ProjTgt.$_; }
  &touch ("$ProjTgt.BUSY");
  foreach ('', '.aux') { &FileCopy ($ProjSrc.$_, $ProjTgt.$_); }
  unlink ("$ProjTgt.BUSY");
}


# move contigs from one to another GAP4 database
#
# INTERFACE
# - argument 1:  source GAP4 database
# - argument 2:  target GAP4 database
# - argument 3+: contig specifier(s)
#
# - global options:
#   -debug       [STD]
#   -timer       print time performance protocol to STDOUT
#   -verbose     print extended protocol to STDOUT
#
sub ProgContigMv {
  my ($ProjSrc, $ProjTgt, @contig) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $verbose = $ProgOpt{-verbose};
  my $bDV = $debug || $verbose;
  my $hDV = $debug ? \*STDERR : \*STDOUT;
  my $bTimer = $ProgOpt{-timer};  my $time;

  print  "$ProgFile -$ProgMode\n";
  my %gap;
  $gap{src} = &GapNameFields ($ProjSrc);
  $gap{tgt} = &GapNameFields ($ProjTgt);
  unless (-w $gap{src}{full}) {
    die sprintf "ERROR: unable to read & write GAP4 database %s\n", $ProjSrc||"''";
  }
  if ($gap{tgt}{exists}) {
    unless (-w $gap{tgt}{full}) {
      die sprintf "ERROR: unable to read & write GAP4 database %s\n", $ProjTgt||"''";
    }
  } else {
    unless ($gap{tgt}{valid}) {
      die sprintf "ERROR: GAP4 database %s doesn't exist - unable to create\n", $ProjTgt||"''";
    }
  }
  $debug and printf "%s. target GAP4 database, name fields: %s\n", &MySub,
    join(', ', map{ "$_ => $gap{tgt}{$_}" } keys %{$gap{tgt}});
  unless (@contig) { die "ERROR: no contigs specified\n" }
  my @StrategyBatch = split (/\W+/, $ProgOpt{-solution});

  ##############################################################################
  # export contigs from source

  # do export
  my (%path,%foid,$call,$ret,$pCall);
  printf "export contigs from source database %s\n", $gap{src}{full};
  $path{DirClipbrd} = $ProgParam{TmpManag}->Create();
  mkdir ($path{DirClipbrd});
  $path{FofnExport} = "$path{DirClipbrd}/export";
  $path{FofnClipbrd} = "$path{DirClipbrd}/fofn";
  my @ContigSafe = map{ s/.+\|.+/'$&'/;$_ } map{ s/^#.+$/'$&'/;$_ } @contig;
  $call = join (' ', $CorePath{call}{GapExportDirect}, $gap{src}{full},
    $path{DirClipbrd}, @ContigSafe, '>', $path{log}=$ProgParam{TmpManag}->Create(-touch=>1));
  $ret = &GapSafeCall ($call);
  if (-s $path{FofnClipbrd}) {
    &FileCopy ($path{FofnClipbrd}, $path{FofnExport});
    @{$foid{Export}} = @{ &LoadFoid ($path{FofnExport}) };
  } else {
    $foid{Export} = [];
  }

  # check result of export procedure
  unless (@{$foid{Clipbrd}} = @{$foid{Export}}) {
    printf "no contigs exported%s\n", $ret ? ", error code $ret" : '';
    $ret and exit 1;
  } else {
    @{$foid{ClipbrdIdx}}{@{$foid{Clipbrd}}} = (1) x int(@{$foid{Clipbrd}});
    if (grep{ m/^['"\\]?#/ } @contig) {
      printf "WARNING: contig specification of the fashion #n is ambiguous\n";
      print  "  cannot check for missing contigs!\n";
    } elsif (@{$foid{ExportMiss}} = grep{ !$foid{ClipbrdIdx}{$_} } @contig) {
      printf "NOTE: %d of %d contig%s missing in export from source database\n",
        int(@{$foid{ExportMiss}}), int(@contig), (@contig==1)?'':'s';
    }
    printf "%d read%s exported\n",
      int(@{$foid{Export}}), (@{$foid{Export}}==1)?'':'s';
  }

  ##############################################################################
  # read cross-check

  # preparations for cross-check and solution management
  $path{ProjTmp} = "$path{DirClipbrd}/temp.0";
  $path{FofnConfl} = "$path{DirClipbrd}/Confl";
  $path{FofnSingle} = "$path{DirClipbrd}/ConflSingle";
  my %StrategyRes;

  if (@{$foid{Clipbrd}} and $gap{tgt}{exists}) {

    # get read list from target database
    $path{FoidHaveTgt} = $ProgParam{TmpManag}->Create(-touch=>1);
    $call = join (' ', $CorePath{call}{GapContigReads}, $gap{tgt}{full}, $path{FoidHaveTgt});
    $ret = &GapSafeCall ($call, -debug=>$dbg2);
    $foid{HaveTgt} = &LoadFoid ($path{FoidHaveTgt}) || [];
    if ($ret) {
      print  STDERR "ERROR: unable to get list of read IDs from target database\n";
      $bDV and printf $hDV "%s. call was: $call\n", &MySub;
      $bDV and print  $hDV $SeqAlign::Gap::LibGlob{ErrorMsg};
      exit 1;
    }

    { # redo block
      $bDV and print  $hDV "read cross-check\n";

      # get read list from clipboard database
      unless ($foid{Clipbrd} = &LoadFoid ($path{FofnClipbrd}) and @{$foid{Clipbrd}}) {
        print  "there's nothing left for import\n";
        last;
      }
      %{$foid{ClipbrdIdx}} = map{ ($_,1) } @{$foid{Clipbrd}};
      $debug and printf "loaded %d/%d read IDs from %s\n", int @{$foid{Clipbrd}}, int keys %{$foid{ClipbrdIdx}}, $path{FofnClipbrd};

      # conflict diagnosis
      if (@{$foid{Confl}} = grep{ $foid{ClipbrdIdx}{$_} } @{$foid{HaveTgt}}) {
        printf "CONFLICT: %d of %d read%s already reside%s in the target database\n",
          int @{$foid{Confl}}, int @{$foid{Clipbrd}},
          (@{$foid{Clipbrd}} == 1) ? '':'s', (@{$foid{Confl}} == 1) ? 's':'';
        $debug and print join ("\n", @{$foid{Confl}}, '');

  ##############################################################################
  # solve conflict

        # solution definitions
        %StrategyRes or %StrategyRes = (

          LeaveConfl => { idnum => '1',
                          descr => 'leave conflicting contigs in source database',
                          func  => sub {
            #print  STDOUT map{ "$_\n" } @{$foid{Confl}};
            $call = join (' ', $CorePath{call}{GapAssembDirect}, $path{ProjTmp},
              $path{FofnClipbrd}, '>', $path{log}=$ProgParam{TmpManag}->Create(-touch=>1));
            &GapSafeCall ($call, -debug=>$dbg2) and return;
            $path{FofnTmp} = $ProgParam{TmpManag}->Create();
            $call = join (' ', $CorePath{call}{GapContigReads}, $gap{tgt}{full},
              $path{FofnTmp}, map{ "'$_'" } @{$foid{Confl}});
            &GapSafeCall ($call, -debug=>$dbg2) and return;
            $call = join (' ', $CorePath{call}{GapReadDel}, $path{ProjTmp},
              $path{FofnTmp}, '>/dev/null');
            &GapSafeCall ($call, -debug=>$dbg2) and return;
            $call = join (' ', $CorePath{call}{GapExportDirect}, $path{ProjTmp},
              $path{DirClipbrd}, '>', $path{log}=$ProgParam{TmpManag}->Create(-touch=>1));
            &GapSafeCall ($call, -debug=>$dbg2);
            $debug or unlink (glob "$path{ProjTmp}*");
            %{$foid{ConflIdx}} = map{ ($_=>1) } @{$foid{Confl}};
            @{$foid{Export}} = grep{ !$foid{ConflIdx}{$_} } @{$foid{Export}};
            &WriteFile ($path{FofnExport}, join("\n",@{$foid{Export}},''));
            }, },

          DelSrcAll => {  idnum => 2,
                          descr => 'delete all conflicting readings in source database',
                          func  => sub {
            $call = join (' ', $CorePath{call}{GapAssembDirect}, $path{ProjTmp},
              $path{FofnClipbrd}, '>', $path{log}=$ProgParam{TmpManag}->Create(-touch=>1));
            &GapSafeCall ($call, -debug=>$dbg2) and return;
            &WriteFile ($path{FofnConfl}, join("\n",reverse(@{$foid{Confl}}),''));
            $call = join (' ', $CorePath{call}{GapReadDel}, $path{ProjTmp},
              $path{FofnConfl}, '>/dev/null');
            &GapSafeCall ($call, -debug=>$dbg2) and return;
            $call = join (' ', $CorePath{call}{GapExportDirect}, $path{ProjTmp},
              $path{DirClipbrd}, '>', $path{log}=$ProgParam{TmpManag}->Create(-touch=>1));
            &GapSafeCall ($call, -debug=>$dbg2);
            $debug or unlink (glob "$path{ProjTmp}*");
            }, },

          DelSrcSing => { idnum => 3,
                          descr => 'delete conflicting single reads in source database',
                          func  => sub {
            $call = join (' ', $CorePath{call}{GapAssembDirect}, $path{ProjTmp},
              $path{FofnClipbrd}, '>', $path{log}=$ProgParam{TmpManag}->Create(-touch=>1));
            &GapSafeCall ($call, -debug=>$dbg2) and return;
            $call = join (' ', $CorePath{call}{GapReadSingle}, $path{ProjTmp},
              reverse @{$foid{Confl}}, ">$path{FofnSingle}");
            &GapSafeCall ($call, -debug=>$dbg2) and return;
            if (-z $path{FofnSingle}) { return }
            $call = join (' ', $CorePath{call}{GapReadDel}, $path{ProjTmp},
              $path{FofnSingle}, '>/dev/null');
            &GapSafeCall ($call, -debug=>$dbg2) and return;
            $call = join (' ', $CorePath{call}{GapExportDirect}, $path{ProjTmp},
              $path{DirClipbrd}, '>', $path{log}=$ProgParam{TmpManag}->Create(-touch=>1));
            &GapSafeCall ($call, -debug=>$dbg2);
            $debug or unlink (glob $path{ProjTmp}.'*');
            }, },

          DelTgtAll  => { idnum => 4,
                          descr => 'delete all conflicting readings in target database',
                          func  => sub {
            &WriteFile ($path{FofnConfl}, join("\n",reverse(@{$foid{Confl}}),''));
            $pCall = &CallExtClosure ($CorePath{call}{GapReadDel}, $gap{tgt}{full},
              $path{FofnConfl}, '>', $path{log}=$ProgParam{TmpManag}->Create(-touch=>1));
            $time = &Sum ((times)[0,2]);
            unless (&PhysicalAccessCtrl ($gap{tgt}{full}, -mode=>'func', -noLock=>1,
              -func=>$pCall, -log=>1)) {
              print STDERR "ERROR in deleting files in target database. It may be corrupt now\n";
              return;
            }
            $bTimer and printf "%s. CPU time to delete reads in project: %.3f\n", &MySub, &Sum((times)[0,2])-$time;
            $call = join (' ', $CorePath{call}{GapContigReads}, $gap{tgt}{full}, $path{FoidHaveTgt});
            $ret = &GapSafeCall ($call, -debug=>$dbg2);
            if ($ret) {
              print  STDERR "ERROR: unable to get list of read IDs from target database, operation aborted\n";
              $bDV and printf $hDV "%s. call was: $call\n", &MySub;
              $bDV and print  $hDV $SeqAlign::Gap::LibGlob{ErrorMsg};
              exit 1;
            }
            $foid{HaveTgt} = &LoadFoid ($path{FoidHaveTgt});
            }, },

          DelTgtSing => { idnum => 5,
                          descr => 'delete conflicting single reads in target database',
                          func  => sub {
            $call = join (' ', $CorePath{call}{GapReadSingle}, $gap{tgt}{full},
              reverse @{$foid{Confl}}, ">$path{FofnSingle}");
            &GapSafeCall ($call, -debug=>$dbg2) and return;
            if (-z $path{FofnSingle}) { return }
            $pCall = &CallExtClosure ($CorePath{call}{GapReadDel}, $gap{tgt}{full},
              $path{FofnSingle}, ($debug or $verbose) ? '':'>/dev/null');
            unless (&PhysicalAccessCtrl ($gap{tgt}{full}, -mode=>'func', -noLock=>1,
              -func=>$pCall, -log=>1)) {
              print STDERR "ERROR in deleting files in target database. It may be corrupt now\n";
              return;
            }
            $call = join (' ', $CorePath{call}{GapContigReads}, $gap{tgt}{full}, $path{FoidHaveTgt});
            $ret = &GapSafeCall ($call, -debug=>$dbg2);
            if ($ret) {
              print  STDERR "ERROR: unable to get list of read IDs from target database, operation aborted\n";
              $bDV and printf $hDV "%s. call was: $call\n", &MySub;
              $bDV and print  $hDV $SeqAlign::Gap::LibGlob{ErrorMsg};
              exit 1;
            }
            $foid{HaveTgt} = &LoadFoid ($path{FoidHaveTgt});
            }, },

          FakeSrc => { idnum => 6,
                       descr => 'rename conflicting readings to fakes in source database',
                       func  => sub {
            $call = join (' ', $CorePath{call}{GapAssembDirect}, $path{ProjTmp},
              $path{FofnClipbrd}, '>', $path{log}=$ProgParam{TmpManag}->Create(-touch=>1));
            &GapSafeCall ($call, -debug=>$dbg2) and return;
            %{$foid{AllIdx}} = map{ ($_,1) } @{$foid{Clipbrd}}, @{$foid{HaveTgt}};
            foreach my $IdCurr (@{$foid{Confl}}) {
              my $CtUnique = '';
              my $IdTempl = $IdCurr . 'f';
              my $IdNew;
              while ($foid{AllIdx}{ $IdNew = $IdTempl.$CtUnique }) { ++$CtUnique }
              $pCall = &CallExtClosure ($CorePath{call}{GapReadRename},
                $path{ProjTmp}, $IdCurr, $IdNew, $debug ? '':'>/dev/null');
              unless (&PhysicalAccessCtrl ($gap{tgt}{full}, -mode=>'func', -noLock=>1,
                -func=>$pCall)) {
                print STDERR "ERROR while renaming read in target database. It may be corrupt now\n";
                return;
              }
            }
            $call = join (' ', $CorePath{call}{GapExportDirect}, $path{ProjTmp},
              $path{DirClipbrd}, '>', $path{log}=$ProgParam{TmpManag}->Create(-touch=>1));
            &GapSafeCall ($call, -debug=>$dbg2);
            $debug or unlink (glob $path{ProjTmp}.'*');
            }, },

          FakeTgt    => { idnum => 7,
                          descr => 'rename conflicting readings to fakes in target database',
                          func  => sub {
            %{$foid{AllIdx}} = map{ ($_,1) } @{$foid{Clipbrd}}, @{$foid{HaveTgt}};
            foreach my $IdCurr (@{$foid{Confl}}) {
              my $CtUnique = '';
              my $IdTempl = $IdCurr . 'f';
              my $IdNew;
              while ($foid{AllIdx}{ $IdNew = $IdTempl.$CtUnique }) { ++$CtUnique }
              $pCall = &CallExtClosure ($CorePath{call}{GapReadRename},
                $gap{tgt}{full}, $IdCurr, $IdNew, $debug ? '':'>/dev/null');
              unless (&PhysicalAccessCtrl ($gap{tgt}{full}, -mode=>'func', -noLock=>1,
                -func=>$pCall)) {
                print STDERR "ERROR while renaming read in target database. It may be corrupt now\n";
                return;
              }
            }
            $call = join (' ', $CorePath{call}{GapContigReads}, $gap{tgt}{full}, $path{FoidHaveTgt});
            $ret = &GapSafeCall ($call, -debug=>$dbg2);
            if ($ret or ! ($foid{HaveTgt} = &LoadFoid ($path{FoidHaveTgt})) ) {
              print  STDERR "ERROR: unable to get list of read IDs from target database, operation aborted\n";
              $bDV and printf $hDV "%s. call was: $call\n", &MySub;
              $bDV and print  $hDV $SeqAlign::Gap::LibGlob{ErrorMsg};
              exit 1;
            }
            }, },

          ListConfl  => { idnum => '0a',
                          descr => 'list conflicting readings',
                          func  => sub {
            print  STDOUT map{ "$_\n" } @{$foid{Confl}};
            }, },

          ListUniq   => { idnum => '0b',
                          descr => 'list unique readings',
                          func  => sub {
            $foid{ConflIdx} = { map{ ($_,1) } @{$foid{Confl}} };
            print  STDOUT map{ "$_\n" } grep{ !$foid{ConflIdx}{$_} } @{$foid{Clipbrd}};
            }, },

          cancel     => { idnum => '0c',
                          descr => 'cancel operation',
                          func  => sub {
            &WriteFile ($path{FofnClipbrd}, ''); delete $foid{Clipbrd};
            &WriteFile ($path{FofnExport}, '');
            }, },
          );

        foreach (keys %StrategyRes) { $StrategyRes{$_}{id} = $_; }
        my $StrategyValid = sub {
          my $StrategyArg = shift @_;
          my @StrategyGrep;
          length ($StrategyArg) or return undef;
          if ($StrategyRes{$StrategyArg}
            or (@StrategyGrep = (grep{ $_->{idnum} eq $StrategyArg; } values %StrategyRes)
              and $StrategyRes{${$StrategyGrep[0]}{id}}
              and $StrategyArg = ${$StrategyGrep[0]}{id})) {
            return $StrategyArg;
          } else {
            print  STDERR "ERROR: don't know solution strategy $StrategyArg\n";
            return undef;
          }
        };

        # pick a solution
        my $StrategyCurr;
        while ($StrategyCurr = shift @StrategyBatch) {
          ($StrategyCurr = &$StrategyValid($StrategyCurr)) and last;
        }
        unless ($StrategyCurr) {

          # dialogue to get a solution
          print  "pick one ore several solutions from the list:\n";
          foreach (sort{ $a->{idnum} cmp $b->{idnum} } values %StrategyRes) {
            printf "%-4s %-10s  %s\n", $_->{idnum}||='0', $_->{id}, $_->{descr};
          }
          ProgContigMvConsole: while (1) { # console loop
            print 'choose> ';
            chop ($StrategyCurr=<STDIN>);
            @StrategyBatch = split (/\W+/, $StrategyCurr);
            while ($StrategyCurr = shift @StrategyBatch) {
              $debug and printf STDERR "got strategy %s\n", length ($StrategyCurr) ? $StrategyCurr:"''";
              $StrategyCurr = &$StrategyValid ($StrategyCurr) and last ProgContigMvConsole;
            }
          }
        }

        # solution procedure
        $bDV and print  $hDV "applying solution strategy $StrategyCurr\n";
        &{$StrategyRes{$StrategyCurr}{func}} ();
        unless ($StrategyCurr eq 'cancel') { redo }
      }

      # no conflict (anymore)
      else {
        $bDV and printf $hDV "no conflicts found\n",
      }
    }
  }

  ##############################################################################
  # finish

  # there's an import
  if (exists($foid{Clipbrd}) and int(@{$foid{Clipbrd}})) {

    # remove redundant tags (this is a dicty special)
    $gap{src}{phys} = &PathPhysical ($gap{src}{full});
    if ($gap{src}{phys} =~ m/dicty/i or
        $gap{src}{phys} =~ m/mycos/i) {
      $path{cwd} = &PathCwd();
      chdir $path{DirClipbrd};
      system ($CorePath{call}{DATagReduce});
      chdir $path{cwd};
    }

    # adjust quality (always)
    $path{cwd} = &PathCwd();
    chdir $path{DirClipbrd};
    system ($CorePath{call}{DAQualAdjust});
    chdir $path{cwd};

    # import contigs into target database
    printf "import contigs into target database %s: %d read%s\n",
      $gap{tgt}{full}, int(@{$foid{Clipbrd}}), (@{$foid{Clipbrd}}==1)?'':'s';
    $pCall = &CallExtClosure ($CorePath{call}{GapAssembDirect}, $gap{tgt}{full},
      $path{FofnClipbrd}, '>', $path{log}=$ProgParam{TmpManag}->Create(-touch=>1));
    unless (&PhysicalAccessCtrl ($gap{tgt}{full}, -mode=>'func', -noLock=>1,
      -func=>$pCall, -log=>1)) {
      print STDERR "ERROR: directed assembly failed\n";
      $bDV and printf $hDV "%s. call was: %s\n", &MySub, &$pCall ('WhatRUCalling');
      exit 1;
    }

    # corruption-free copy of the database
    unless ($gap{tgt}{exists}) {
      print "prepare corruption-free copy of the database\n";
      $call = join (' ', $CorePath{call}{GapDbCopy}, $gap{tgt}{full}, 'X');
      $ret = &GapSafeCall ($call);
      system ($CorePath{call}{GapDbCopyPhys},
        "$gap{tgt}{dir}/$gap{tgt}{name}.X", $gap{tgt}{version});
      $debug or unlink (glob ("$gap{tgt}{dir}$gap{tgt}{name}.X*"));
    }

    # check success of import procedure
    # ...
  }

  # delete contigs from source database
  if (-s $path{FofnExport}) {  # in the beginning, there was an export
    my $bDel;
    if (@{$foid{Clipbrd}}) {   # there's still an export
      $bDel = 1;
    } else {                     # there's no longer an export
      $bDel = int (grep{ $_ eq 'AlwaysYes' } split(/\W+/,$ProgOpt{-solution}));
      unless ($bDel) {
        print "delete reads in source database anyway?";
        &QueryConfirm() and $bDel = 1;
      }
    }
    if ($bDel) {
      print "delete contigs from source database\n";
      $pCall = &CallExtClosure ($CorePath{call}{GapReadDel}, $gap{src}{full},
        $path{FofnExport}, '>', $path{log}=$ProgParam{TmpManag}->Create(-touch=>1));
      unless (&PhysicalAccessCtrl ($gap{src}{full}, -mode=>'func', -noLock=>1,
        -func=>$pCall, -log=>1)) {
        print STDERR "ERROR: deletion of contigs in source database failed\n";
        $bDV and printf $hDV "%s. call was: %s\n", &MySub, &$pCall('WhatRUCalling');
        exit 1;
      }
    }
  }

  # move SCF links/files
  if (@{$foid{Clipbrd}} and
      &PathPhysical ($gap{src}{dir}) ne &PathPhysical ($gap{tgt}{dir})) {
    $debug and printf "%s. directory comparison:\n  %s\n  %s\n", &MySub,
      &PathPhysical ($gap{src}{dir}), &PathPhysical ($gap{tgt}{dir});
    print  "moving SCF files\n";
    foreach (@{$foid{Clipbrd}}) {
      $path{SCF} = "$gap{src}{dir}/${_}SCF";
      (-w $path{SCF}) or next;
      unless (&mv ($path{SCF}, $gap{tgt}{dir})) {
        printf STDERR "ERROR: moving of SCF file %s failed\n", $path{SCF};
      }
    }
  }

  # tidy up
  if ($debug) {
    print "tidy up suppressed in debug mode\n";
  } else {
    print "tidy up\n";
    unlink grep{-e $_} grep{ !$path{SaveIdx}{$_} } glob("$path{DirClipbrd}/*"),
      $path{log}, $path{FoidHaveTgt}, $path{FofnTmp};
    rmdir $path{DirClipbrd};
  }
}


# delete annotations
#
# INTERFACE
# - argument 1: GAP4 database
#
# - global options:
#   -debug      print debug protocol to STDOUT
#   -SlcData    case-insensitive data specifier, default "all", possible:
#               all, contig, read
#
sub ProgAnnotRm {
  my ($PathProj,@SlcContig) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my %SlcContig = map{ ($_=>1) } @SlcContig;
  my %SlcData = map{ ($_=>1) }
    grep{ m/^(contig|read)$/ } map{ m/reading$/?'read':$_ }
    map{ ($_ eq 'all')?qw(contig read):($_) } $ProgOpt{-SlcData}||'all';
  $debug and printf STDERR "%s. data selectors: %s\n", &MySub,
    join(' ',keys(%SlcData));

  print  "$ProgFile\n";
  my %gap = %{ &GapNameFields($PathProj) };
  if ($gap{exists}) {
    $debug and printf "%s. working on database %s\n", &MySub, $gap{full}||"''";
  } else {
    die sprintf "%s. unable to read GAP4 database %s\n", &MySub, $gap{full}||"''";
  }

  ##############################################################################
  # analyse database
  # - neither used nor free
  # - loop in list

  # get data structure
  my $pGapData = &GapDataStruct ($gap{full}, -debug=>$dbg2);
  unless ($pGapData) {
    die sprintf "%s. ERROR in \&GapDataStruct, got no data structure\n", &MySub;
  }

  # iterate over contigs, apply selectors
  my (@AnnotCtgOff,@ChgCommand);
  foreach my $pCtg (@{$$pGapData{contig}}) {
    #if (@SlcContig and !$SlcContig{"#$$pCtg{data_num}"} and !$SlcContig{$$pGapData{reading}[$$pCtg{left}]{name}}) {
    ## this doesn't work!!! Used Gap4 data structure does not contain clear
    ##   contig/read names!

    # delete annotation chains in contigs
    if ($$pCtg{annotations} and $SlcData{contig}) {
      # remind first object in chain of annotations
      push @AnnotCtgOff, $$pCtg{annotations};
      # delete chain in contig
      $$pCtg{annotations} = 0;
      push @ChgCommand, sprintf ("io_write_contig \$io %d \"%s\"",
        $$pCtg{data_num}, &GapHash('contig',$pCtg));
    }

    # delete annotation chains in readings
    if ($SlcData{reading}) {
      ## iteration does not work this way!!!
      ## we need to crawl through the chain of readings!
      next;
      foreach my $pRead (grep{ $_->{annotations} }@{$$pGapData{reading}}) {
        # remind first object in chain of annotations
        push @AnnotCtgOff, $$pRead{annotations};
        # delete chain in reading
        $$pRead{annotations} = 0;
        push @ChgCommand, sprintf ("io_write_reading \$io %d \"%s\"",
          $$pRead{data_num}, &GapHash('reading',$pRead));
      }
    }
  }

  # move annotation chains to free list
  if (@AnnotCtgOff) {
    my $AnnotNumPrev = $$pGapData{database}{free_annotations};

    # iterate over annotation chains
    foreach my $AnnotNum (@AnnotCtgOff) {
      my $pAnnot = $$pGapData{annotation}[$AnnotNum];

      # crawl through current annotation chain, find end
      while ($$pAnnot{next}) {
        $pAnnot = $$pGapData{annotation}[$$pAnnot{next}];
      }

      # move current annotation chain in front of free list
      $$pAnnot{next} = $AnnotNumPrev;
      push @ChgCommand, sprintf ("io_write_annotation \$io %d \"%s\"",
        $$pAnnot{data_num}, &GapHash('annotation',$pAnnot));

      $AnnotNumPrev = $AnnotNum;
    }

    # append to free list
    $$pGapData{database}{free_annotations} = $AnnotNumPrev;
    push @ChgCommand, sprintf ("io_write_database \$io \"%s\"",
      &GapHash ('database', $$pGapData{database}));
  }

  ##############################################################################
  # execute modification strategy
  if (@ChgCommand) {

    # write Tcl script
    my $PathScript = $ProgParam{TmpManag}->Create();
    &WriteFile ($PathScript, join("\n",@ChgCommand,''));

    # execute Tcl script
    $debug and printf "%s. executing script for database modification\n", &MySub;
    my $ret = int (system ("$CorePath{call}{GapExec} $gap{full} $PathScript") / 256);

    # tidy up
    $debug or unlink $PathScript;
  }

  # no errors found
  else {
    print STDERR "no contig annotations found\n";
  }
}


# fix errors in a GAP4 database
#
# INTERFACE
# - argument 1: GAP4 database
#
# - global options:
#   -debug      print debug protocol to STDOUT
#   -timer      print time performance protocol to STDOUT
#
# DEBUG, CHANGES, ADDITIONS
# - see 'repair doubly referenced annotations'. Retrieval of looser reference.
# - see 'repair misordered annotations'. analyse complete chains.
#
sub ProgRepair {
  my ($PathProj) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;

  print  "$ProgFile -$ProgMode\n";
  my %gap = %{ &GapNameFields($PathProj) };
  if ($gap{exists}) {
    $debug and printf "%s. working on database %s\n", &MySub, $gap{full}||"''";
  } else {
    die sprintf "%s. unable to read GAP4 database %s\n", &MySub, $gap{full}||"''";
  }

  ##############################################################################
  # analyse database

  # get data structure
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pGapData = &GapDataStruct ($gap{full},-debug=>$dbg2);
  unless ($pGapData) {
    die sprintf "%s. ERROR in \&GapDataStruct, got no data structure\n", &MySub;
  }
  if ($bTimer) {
    printf "%s. CPU time for retrieval/construction of data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = &Sum ((times)[0,2]);
  }
  # don't rewrite $pGapData with &DataPrint !
  # the annotation structures contain references to all their referees.

  # check readings
  my (@ReadLoop,@AnnotNull,@AnnotLoop,@AnnotOutsd,@AnnotOrder,$i);
  for ($i=1; $i<@{$$pGapData{reading}||[]}; $i++) {
    my $pRead = $$pGapData{reading}[$i];
    # valid number of referees: 2
    if (@{$$pRead{referee}||[]} != 2) {

      # neither used nor free
      if (@{$$pRead{referee}||[]} < 2) {
        print  "reading $$pRead{data_num} missing reference.\n";
      }

      # loop in reading list
      elsif (@{$$pRead{referee}} > 2) {
        print  "extra reference to reading $$pRead{data_num}.\n";
        foreach (@{$$pRead{referee}} ) {
          if (ref($_) eq 'HASH') {
            print  "  referee $_->{data_type} $_->{data_num}\n";
          } else {
            print  "  referee error '$_'\n";
          }
        }
        push @ReadLoop, $pRead;
      }
    }
  }

  # check annotations
  my (@AnnotNull, @AnnotLoop, @AnnotOutsd, @AnnotOrder);
  for ($i=1; $i<=$$pGapData{database}{Nannotations}; $i++) {
    my $pAnnot = $$pGapData{annotation}[$i];
    # valid number of referees: 1
    if (@{$$pAnnot{referee}||[]} != 1) {

      # neither used nor free
      if (@{$$pAnnot{referee}||[]} < 1) {
        print  "annotation $$pAnnot{data_num} neither used or free.\n";
        push @AnnotNull, $pAnnot;
      }

      # loop in annotation list
      elsif (@{$$pAnnot{referee}} > 1) {
        print  "doubly referenced annotation $$pAnnot{data_num}.\n";
        foreach (@{$$pAnnot{referee}} ) {
          if (ref($_) eq 'HASH') {
            print  "  referee $_->{data_type} $_->{data_num}\n";
          } else {
            print  "  referee struct error ('$_')\n";
          }
        }
        push @AnnotLoop, $pAnnot;
      }
    }

    # annotations out of range
    if ($$pAnnot{ref_base}{data_type} ne 'FreeAnnotations' and
        $$pAnnot{position} > $$pAnnot{ref_base}{length}
    ) {
      printf "annotation out of range: base referee %s %s\n",
        $$pAnnot{ref_base}{data_type}, $$pAnnot{ref_base}{data_num}||'';
      push @AnnotOutsd, $pAnnot;
    }

    # misordered annotations
    # remind annotation downstream in linked list
    if (int(@{$$pAnnot{referee}||[]}) == 1 and
        $$pAnnot{referee}[0]{data_type} eq 'annotation' and
        $$pAnnot{position} < $$pAnnot{referee}[0]{position} and
        $$pAnnot{ref_base}{data_type} ne 'FreeAnnotations'
    ) {
      printf "annotation out of positional order: %d (left of referring %d), base referee %s %s\n",
        $$pAnnot{data_num}, $$pAnnot{referee}[0]{data_num},
        $$pAnnot{ref_base}{data_type}, $$pAnnot{ref_base}{data_num}||'';
      push @AnnotOrder, $pAnnot;
    }
  }

  if ($bTimer) {
    printf "%s. CPU time for problem analysis: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = &Sum ((times)[0,2]);
  }

  ##############################################################################
  # repair doubly referenced readings
  my @repair;
  if (@ReadLoop) {
    print  "repairing doubly referenced readings\n";

    # loop over doubly referenced readings
    print  STDERR "WARNING: repair is not implemented\n";
#    foreach my $pRead (@ReadLoop) {
#
#      # determine looser referee
#      my ($pReferee) = sort {
#        $WinnerByType{$a->{data_type}} <=> $WinnerByType{$b->{data_type}} or
#                       $b->{data_num}  <=> $a->{data_num} or
#        1 } @{$$pRead{referee}};
#      print  "  looser referee: $$pReferee{data_type} $$pReferee{data_num}\n";
#
#      # delete reference of looser
##      push @repair, sprintf ("io_write_%s \$io %d \"%s\"",
##        $$pReferee{data_type}, $$pRead{data_num},
##        &GapHash($$pReferee{data_type},$pReferee));
#    }
  }

  ##############################################################################
  # repair annotations out of range
  if (@AnnotOutsd) {
    print  "repairing annotations out of range\n";

    # force lost annotations into one chain
    foreach my $pAnnot (@AnnotOutsd) {

      # delete reference of looser
      my $pReferee = $$pAnnot{referee}[0] or next;
      $$pReferee{data_type} or next;
      $$pReferee{next} = 0;
      push @repair, sprintf ("io_write_%s \$io %d \"%s\"",
        $$pReferee{data_type}, $$pReferee{data_num},
        &GapHash($$pReferee{data_type},$pReferee));

      # continue like for lost annotation
      push @AnnotNull, $pAnnot;
    }
  }

  ##############################################################################
  # repair lost annotations
  if (@AnnotNull) {
    print  "repairing lost annotations\n";

    # force lost annotations into one chain
    for ($i=0; $i<@AnnotNull; $i++) {
      my $pAnnot = $AnnotNull[$i];
      if ($i+1 < @AnnotNull) {
        $$pAnnot{next} = $AnnotNull[$i+1]{data_num};
      } else {
        $$pAnnot{next} = $$pGapData{database}{free_annotations};
      }
      push @repair, sprintf ("io_write_annotation \$io %d \"%s\"",
        $$pAnnot{data_num}, &GapHash('annotation',$pAnnot));
    }

    # append to free list
    $$pGapData{database}{free_annotations} = $AnnotNull[0]{data_num};
    push @repair, sprintf ("io_write_database \$io \"%s\"",
      &GapHash('database',$$pGapData{database}));
  }

  ##############################################################################
  # repair doubly referenced annotations
  if (@AnnotLoop) {
    printf "repairing doubly referenced annotations: %d\n", int(@AnnotLoop);

    # referee winning rules
    my %WinnerByType = (
      annotation => 1,
      reading    => 2,
      contig     => 3,
      );

    # loop over doubly referenced annotations
    foreach my $pAnnot (@AnnotLoop) {

      # determine looser referee
      # - the way to determine the looser is not very ambitious, i know
      # - It would be fine to see if a reference is rooted in the free list.
      #   Then, this reference is definitely the looser.
      my ($pReferee) = sort {
        $WinnerByType{$a->{data_type}} <=> $WinnerByType{$b->{data_type}} or
                       $b->{data_num}  <=> $a->{data_num} or
        1 } @{$$pAnnot{referee}};
      printf "  referees: %s\n", join (', ', map{ "$_->{data_type} $_->{data_num}" } @{$$pAnnot{referee}});
      print  "  looser referee: $$pReferee{data_type} $$pReferee{data_num}\n";

      # delete reference of looser
      $$pReferee{next} = 0;
      push @repair, sprintf ("io_write_%s \$io %d \"%s\"",
        $$pReferee{data_type}, $$pReferee{data_num},
        &GapHash($$pReferee{data_type},$pReferee));
    }
  }

  ##############################################################################
  # repair unordered annotations
  if (@AnnotOrder) {
    printf "repairing unordered annotations: %d\n", int(@AnnotOrder);

    # loop over unordered annotations
    while (my $pAnnot = shift(@AnnotOrder)) {

      # grab full linked list
      my $pReferee = $$pAnnot{ref_base};
      my @AnnotList = ($pReferee);
      my $AnnotCt;
      while (
        $_=$$pReferee{($$pReferee{data_type}eq'annotation')?'next':'annotations'} and
        $_ < int(@{$$pGapData{annotation}}) and
        $pAnnot = $$pGapData{annotation}[$_]
      ) {
        $$pAnnot{OrderOrig} = ++$AnnotCt;
        push @AnnotList, $pAnnot;
        $pReferee = $pAnnot;
      }
      # sort list according to annotation position (start position)
      # but keep reference base (reading, contig) first in the list
      @AnnotList = sort {
        $b->{data_type} cmp $a->{data_type} or
         $a->{position} <=> $b->{position} or
        $a->{OrderOrig} <=> $b->{OrderOrig}
        } @AnnotList;
      if ($debug) {
        printf STDERR "%s. linked list anchored at %s(%d)\n", &MySub,
          $$pAnnot{ref_base}{data_type}, $$pAnnot{ref_base}{data_num};
        for ($i=1; $i<int(@AnnotList); ++$i) {
          printf STDERR "  %s(%d), orig. order %d, position %d, type %s, referencing %d\n",
            $AnnotList[$i]{data_type}, $AnnotList[$i]{data_num},
            $AnnotList[$i]{OrderOrig}, $AnnotList[$i]{position},
            $AnnotList[$i]{type}, $AnnotList[$i]{next};
        }
      }
      # delete annotation list from erroneous cases
      if (@AnnotOrder) {
        my %AnnotIdx = map{ ("$_"=>1) }@AnnotList;
        @AnnotOrder = grep{ !$AnnotIdx{"$_"} }@AnnotOrder;
      }

      # update references according to sorted order
      for ($i=1; $i<int(@AnnotList); ++$i) {
        if ($AnnotList[$i-1] ne $AnnotList[$i]{referee}[0]) {
          $AnnotList[$i]{referee}[0] = $pReferee = $AnnotList[$i-1];
          $$pReferee{($$pReferee{data_type}eq'annotation')?'next':'annotations'}
            = $AnnotList[$i]{data_num};
          $debug and printf STDERR "%s. repairing reference %s(%d)->%s(%d)\n", &MySub,
            $$pReferee{data_type}, $$pReferee{data_num},
            $AnnotList[$i]{data_type}, $AnnotList[$i]{data_num};
          push @repair, sprintf ("io_write_%s \$io %d \"%s\"",
            $$pReferee{data_type}, $$pReferee{data_num},
            &GapHash($$pReferee{data_type},$pReferee));
        }
      }
      if ($AnnotList[-1]{data_type} eq 'annotation' and $AnnotList[-1]{next}!=0) {
        $AnnotList[-1]{next} = 0;
        $debug and printf STDERR "%s. repairing reference %s(%d)->0\n", &MySub,
          $AnnotList[-1]{data_type}, $AnnotList[-1]{data_num};
        push @repair, sprintf ("io_write_annotation \$io %d \"%s\"",
          $AnnotList[-1]{data_num}, &GapHash('annotation',$AnnotList[-1]));
      }
    }
  }

  ##############################################################################
  # execute repair strategy/strategies
  if (@repair) {

    if ($bTimer) {
      printf "%s. CPU time for problem fix: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
      $time = &Sum ((times)[0,2]);
    }

    # write Tcl script
    my $PathScript = $ProgParam{TmpManag}->Create();
    &WriteFile ($PathScript, join("\n",@repair,''));

    # execute Tcl script
    $debug and printf "%s. executing repair script\n", &MySub;
    my $ret = int (system ("$CorePath{call}{GapExec} $gap{full} $PathScript") / 256);

    # tidy up
    $debug or unlink $PathScript;
    $bTimer and printf "%s. CPU time for repair: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
  }

  # no errors found
  else {
    print STDERR "no error or no repair strategy\n";
  }
}


################################################################################
# import, assemble
################################################################################


# provide mRNA sequence(s) for assembly into a GAP4 database
#
# INTERFACE
# - argument 1:  GAP4 database
# - argument 2:  sequence source file
# - argument 3+: sequence ID(s)
#
# - global options:
#   -debug       [STD]
#   -timer       [STD]
#
# DEBUG, CHANGES, ADDITIONS
# - there's no detection of unentered sequences
#
sub ProgAddRna {
  my ($PathProj,$SeqRnaSrc,@SeqRnaSelect) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;

  print  "$ProgFile -$ProgMode\n";
  my %gap = %{ &GapNameFields($PathProj) };
  unless (-w $gap{full}) {
    die sprintf "ERROR: unable to read & write GAP4 database %s", $gap{full}||"''";
  }

  # select sequences from sequence source
  my (%file,$pCall);
  $bTimer and $time = &Sum ((times)[0,2]);
  $file{foid} = $ProgParam{TmpManag}->Create();
  &WriteFile ($file{foid}, join("\n",@SeqRnaSelect,''));
  $file{EnterFa} = $ProgParam{TmpManag}->Create();
  $pCall = &CallExtClosure ($CorePath{call}{SeqCat}, $SeqRnaSrc,
    "-MatchID=$file{foid}", '-pure=DNA5', "-OutSeq=$file{EnterFa}");
  $debug and printf STDERR "%s. calling: %s\n", &MySub, &$pCall ('WhatRUCalling');
  unless (&$pCall() and -s $file{EnterFa}) {
    die sprintf "ERROR: no matching sequences found in file %s\n  call was: %s\n",
      $SeqRnaSrc, &$pCall ('WhatRUCalling');
  }
  if ($bTimer) {
    printf STDERR "%s. CPU time to find sequences: %.3f\n", &MySub, &Sum((times)[0,2])-$time;
    $time = (times)[0];
  }

  # prepare consensus from GAP4
  $file{SeqGap} = $ProgParam{TmpManag}->Create();
  system ("$CorePath{call}{SeqConcat} $gap{full} -OutSeq=$file{SeqGap}");
  $debug and printf STDERR "%s. calling: %s\n", &MySub,
    "$CorePath{call}{SeqConcat} $gap{full} -OutSeq=$file{SeqGap}";
  if ($bTimer) {
    printf STDERR "%s. CPU time to provide database sequence: %.3f\n", &MySub, &Sum((times)[0,2])-$time;
  }
  if ($debug) {
    printf STDERR "%s. files:\n", &MySub;
    printf STDERR "  GAP4 database: %s%s.%s\n", $gap{dir}, $gap{name}, $gap{version};
    printf STDERR "  sequence source: %s\n", $SeqRnaSrc;
    print  STDERR "  foid: $file{foid}\n";
    print  STDERR "  RNA sequences: $file{EnterFa}\n";
    print  STDERR "  concatenated GAP sequence: $file{SeqGap}\n";
  }

  # loop over RNA sequences
  my $pSeqQueue = SeqLab::SeqStreamIn->new($file{EnterFa});
  $pSeqQueue->AddSwitch(-debug=>$dbg2);
  $file{SeqCurr} = $ProgParam{TmpManag}->Create(-touch=>1);
  $file{SeqOut} = $ProgParam{TmpManag}->Create(-touch=>1);
  my ($pSeq,$CtSeq);
  while ($pSeq = $pSeqQueue->GetNext()) {
    $bTimer and $time = &Sum ((times)[0,2]);
    $CtSeq ++;
    &WriteFile ($file{SeqCurr}, &SeqentryToFasta($pSeq));

    # localise introns and f/r fusion site
    # insert gaps and Ns
    printf "splicing RNA sequence %s\n", $$pSeq{id}||"''";
    my $pFeature = &EstGenome ($file{SeqGap}, $file{SeqCurr});
    unless ($pFeature = &EstGenome ($file{SeqGap}, $file{SeqCurr})) {
      printf STDERR "%s. ERROR when trying to splice RNA sequence %s\n", &MySub, $$pSeq{id}||"''";
      next;
    }
    while (my $FeatCurr = pop @$pFeature) {
      substr ($$pSeq{sequence}, $$FeatCurr{pos}, 0) =
        (($$FeatCurr{feature} eq 'ReadGap') ? 'N':'-') x $$FeatCurr{length};
    }

    # output modified RNA sequence
    &WriteFile ($file{SeqOut}, &SeqentryToFasta($pSeq), -append=>1);
    $bTimer and printf STDERR "%s. CPU time to match RNA onto database sequence: %.3f\n", &MySub,
      &Sum((times)[0,2])-$time;
  }

  # prepare directed assembly
  $file{AssembFrag} = $ProgParam{TmpManag}->Create();
  mkdir ($file{AssembFrag});
  system ("$CorePath{call}{SeqBreakAssemb}=3000,1 $file{SeqOut} -AnnotLabel=EXON -OutDir=$file{AssembFrag}");
  $file{error} = $ProgParam{TmpManag}->Create(-touch=>1);
  $file{AssembLog} = $ProgParam{TmpManag}->Create(-touch=>1);
  $pCall = &CallExtClosure ($CorePath{call}{GapAssembDirect}, $gap{full},
    "$file{AssembFrag}/fofn", "1>$file{AssembLog}", "2>$file{error}");
  unless (&PhysicalAccessCtrl ($gap{full}, -mode=>'func', -noLock=>1,
      -func=>$pCall, -log=>1||$debug) or -s $file{error}) {
    # this doesn't work for the detection of unentered sequences
    die sprintf "ERROR: directed assembly failed, call:\n  %s\n", &$pCall ('WhatRUCalling');
  }

  # tidy up
  if ($debug) {
    printf STDERR "%s. additional files:\n", &MySub;
    printf STDERR "  RNA sequences, modified (%d entr%s): $file{SeqOut}\n", $CtSeq, ($CtSeq == 1) ? 'y' : 'ies';
    print  STDERR "  fofn for directed assembly: $file{AssembFrag}/fofn\n";
    print  STDERR "  LOG of directed assembly: $file{AssembLog}\n";
  } else {
    unlink (glob "$file{AssembFrag}/*");
    rmdir ($file{AssembFrag});
    unlink (grep{-e $_} qw(foid EnterFa SeqGap SeqCurr SeqOut error AssembLog));
  }
}


# provide reverse reads for GAP4 database
#
# INTERFACE
# - argument 1:    GAP4 database
# - argument 2*:   contig specifier(s) for selection
#
# - global options:
#   -debug         [STD]
#   -RcTgtSpecif   [STD]
#   -SlcCnum       [STD]
#   -SlcEnds       select contig end range
#   -SlcID         select for read IDs
#   -SlcLen        [STD]
#   -SlcRnum       [STD]
#   -SlcSpecif     [STD]
#   -SlcSpecifDlt  [STD]
#   -timer         [STD]
#
sub ProgAddRev {
  my ($PathProj,@SelectContig) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;
  my %slc;
  $slc{cnum}      = $ProgOpt{-SlcCnum};
  $slc{rnum}      = $ProgOpt{-SlcRnum};
  $slc{len}       = $ProgOpt{-SlcLen};
  $slc{id}        = $ProgOpt{-SlcID};
  $slc{Specif}    = $ProgOpt{-SlcSpecif};
  $slc{SpecifDlt} = $ProgOpt{-SlcSpecifDlt};
  $slc{ends}      = $ProgOpt{-SlcEnds};
  $slc{reap}      = 0;
  $slc{reap} and $slc{ReapOpt} = '-SlcReap=passed';

  print  "$ProgFile -$ProgMode\n";
  my %gap = %{ &GapNameFields($PathProj) };
  unless (-w $gap{full}) {
    die sprintf "ERROR: write access denied on GAP4 database %s", $gap{full};
  }
  printf "GAP4 database: %s%s.%s\n", $gap{dir}, $gap{name}, $gap{version};

  # OutStump, output files
  my %out;
  $out{base} = &PrepOstump();
  $out{HaveFoid} = $out{base} . '_have.foid';
  $out{GetFoid}  = $out{base} . '_get.foid';
  $out{GetLog}   = $out{base} . '_get.log';
  $out{GotFoid}  = $out{base} . '_got.foid';
  $out{temp} = $ProgParam{TmpManag}->Create();

  # prepare assembly data structure
  my $pCtgStruct = &ContigStructMeta ([ $gap{full} ],
    -cend  => 0,  # default: calculate cend binding for all reads
    -debug => $dbg2);
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ContigStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct
  if ($bTimer) {
    printf "%s. CPU time for retrieving contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = &Sum ((times)[0,2]);
  }
  printf "total contigs in GAP4 database: %d\n",int keys %{$$pCtgStruct{contig}};
  printf "total reads in GAP4 database: %d\n", int keys %{$$pCtgStruct{read}};

  ##############################################################################
  # selection process
  # work could also be done in &ContigStructMeta. But, we wouldn't have
  # control over the actual procedure and knowledge of the statistical counts
  # during the actual procedure. So, we do it here.
  my ($pData,%id,$call,$ret);

  # select contigs
  @{$$pData{contig}} = values %{$$pCtgStruct{contig}};
  if (@SelectContig) {  # don't change the order here!
    if (grep{ m/^#/ } @SelectContig) {
      $call = sprintf ("$CorePath{call}{GapContigIds} $gap{full} $out{temp} %s",
        join (' ', map{ s/^#.+$/'$&'/;$_ } @SelectContig));
      $ret = &GapSafeCall ($call);
      $debug and printf STDERR "%s. calling: $call\n", &MySub;
      if ($ret or ! ($slc{ctg2}=&LoadFoid($out{temp}) and @{$slc{ctg2}})) {
        die "ERROR: unable to get list of contig IDs (total)\n";
      }
      @SelectContig = @{$slc{ctg2}};
    }
    printf "applying contig specifier%s\n", (@SelectContig == 1) ? '':'s';
    @{$$pData{contig}} = map{ $$pCtgStruct{read}{$_}{contig} }
      grep{ exists($$pCtgStruct{read}{$_}) } @SelectContig;
    printf "  contigs passed: %d\n", int @{$$pData{contig}};
  }
  if ($slc{Specif}) {
    printf "applying specificity selector: %s, delta log(P) %s\n", $slc{Specif}, $slc{SpecifDlt};
    foreach my $pCtg (@{$$pData{contig}}) {
      &ContigSpecif ($pCtg, $slc{Specif}, -delta=>$slc{SpecifDlt},
        -RcTgtSpecif=>$ProgOpt{-RcTgtSpecif}) and
        push @{$$pData{ContigNew}}, $pCtg;
    };
    $$pData{contig} = $$pData{ContigNew};
    delete $$pData{ContigNew};
    printf "  contigs passed: %d\n", int @{$$pData{contig}};
  }
  if ($slc{cnum}) {
    print  "applying clone number selector: $slc{cnum}\n";
    @{$$pData{contig}} = grep{ int(keys %{$_->{clone}})>=$slc{cnum} } @{$$pData{contig}};
    printf "  contigs passed: %d\n", int @{$$pData{contig}};
  }
  if ($slc{rnum}) {
    print  "applying read number selector: $slc{rnum}\n";
    @{$$pData{contig}} = grep{ int(keys %{$_->{read}})>=$slc{rnum} } @{$$pData{contig}};
    printf "  contigs passed: %d\n", int @{$$pData{contig}};
  }
  if ($slc{len}) {
    print  "applying contig length selector: $slc{len}\n";
    @{$$pData{contig}} = grep{ $_->{length}>$slc{len} } @{$$pData{contig}};
    printf "  contigs passed: %d\n", int @{$$pData{contig}};
  }

  # translate contigs to reads
  @{$$pData{read}} = map{ values %{$_->{read}} } @{$$pData{contig}};
  printf "reads residing in selected contigs: %d\n", int @{$$pData{read}};

  # select reads
  if ($slc{id}) {
    print  "applying read ID selector: $slc{id}\n";
    @{$$pData{read}} = grep{ $_->{id}=~m/$slc{id}/o } @{$$pData{read}};
    printf "  selected reads: %d\n", int @{$$pData{read}};
  }
  if ($slc{ends}) {
    print  "applying contig end distance selector: $slc{ends}\n";
    @{$$pData{read}} = grep{ $_->{CEndDist}<=$slc{ends} } @{$$pData{read}};
    printf "  selected reads: %d\n", int @{$$pData{read}};
  }
  unless (@{$$pData{read}}) {
    print  STDERR "there's nothing left after selection!\n";
    $debug or unlink ($out{temp});
    exit 0;
  }
  @{$id{analyse}} = map{ $_->{id} } @{$$pData{read}};

  ##############################################################################
  # find reads for clones being analysed, do filtering

  # compile IDs of clones which are present in the database
  foreach my $ItID (@{$id{analyse}}) {
    my $pField = &ReadidToFields($ItID);
    unless ($pField) { next }
    $id{clone}{$$pField{cln}} = $$pField{cln};
  }
  printf "index look-up for clones: %d\n", int(values %{$id{clone}});

  # select files from read index
  $bTimer and $time = &Sum ((times)[0,2]);
  $ProgParam{store}{ReadIndex} = ReadWatch::ReadIndex->new();
  $ProgParam{store}{ReadIndex}->{switch}{-FullMatch} = 1;
  $bTimer and printf STDOUT "CPU time for loading read index:                %.3f\n", &Sum((times)[0,2])-$time;
  $bTimer and $time = &Sum ((times)[0,2]);
  $ProgParam{store}{ReadIndex}->AddTreeIndex('CloneStout');
  $bTimer and printf STDOUT "CPU time for expanding read index:              %.3f\n", &Sum((times)[0,2])-$time;
  $bTimer and $time = &Sum ((times)[0,2]);
  @{$id{get}} = $ProgParam{store}{ReadIndex}->Retrieve('exper', values %{$id{clone}});
  $bTimer and printf STDOUT "CPU time for retrieving from read index:        %.3f\n", &Sum((times)[0,2])-$time;
  printf "existing reads for clones being analysed: %d\n", int @{$id{get}};

  # filter off reads which are already present in the database
  $bTimer and $time = &Sum ((times)[0,2]);
  @{$id{IdxGet}}{@{$id{get}}} = (1) x int(@{$id{get}});
  foreach (keys %{$$pCtgStruct{read}}) { delete $id{IdxGet}{$_}; }
  $id{get} = [ keys %{$id{IdxGet}} ];
  printf "new reads: %d\n", int @{$id{get}};

  # filter off contaminated reads and reads to be neglected
  my $PathNeglect = $gap{dir} . $ProgParam{path}{AddIniStamp} . $gap{name}  . '_neglect.foid';
  if (-r $PathNeglect) { # always use this foid for filtering
    push @{$id{filter}}, @{ &LoadFoid($PathNeglect) };
  }
  if (@{$id{filter}}) {
    printf "applying read filter: %d entries\n", int @{$id{filter}};
    @{$id{IdxGet}}{@{$id{get}}} = (1) x int(@{$id{get}});
    foreach (@{$id{filter}}) { delete $id{IdxGet}{$_}; }
    $id{get} = [ keys %{$id{IdxGet}} ];
    printf "  remaining reads: %d\n", int @{$id{get}};
  } else {
    print  "no filtering\n";
  }
  $bTimer and printf STDOUT "CPU time for read list filtering:             %.3f\n", &Sum((times)[0,2])-$time;

  ##############################################################################
  # provide reads in GAP4 database directory

  $bTimer and $time = &Sum ((times)[0,2]);
  open (OUTFOID, ">$out{GetFoid}");
  print OUTFOID map{ "$_\n" } @{$id{get}};
  close OUTFOID;
  if ($ret = int (system (
    "$ProgParam{call}{PrjRequest} -log=$out{GetLog} -FullMatch $slc{ReapOpt} -OutPass=$out{GotFoid} $out{GetFoid} $gap{dir}"
    ) / 256)) {
    die "ERROR: read request call failed\n";
  }
  $bTimer and printf STDOUT "CPU time for providing reads in GAP4 database folder: %.3f\n", &Sum((times)[0,2])-$time;
  $id{got} = &LoadFoid ($out{GotFoid});
  printf "reads provided%s: %d\n",
     $slc{reap} ? ' (REAP-passed)' : '',
     int @{$id{got}};

  ##############################################################################

  # tidy up
  $debug or unlink ($out{temp});
}


# convert assembly log to list of contig IDs
#
# INTERFACE
# - argument 1:   path of assembly log file
# - argument 2:   path of GAP4 contig index file
#
# - global options:
#   -debug        [STD]
#   -SlcScore     apply match percentage threshold
#
# DESCRIPTION
# - procedure:
#   - read contig source index, contig table
#   - ...
#
sub ProgAssembleScreen {
  my ($PathAssembLog,$PathContigIndex) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;

  # load contig index
  my $pCtgIdx = &PlainToTable ($PathContigIndex,
    -TabType=>'HIH', -ColIdx=>1, -comments=>1, -debug=>$dbg2);
  unless ($pCtgIdx) {
    die sprintf "ERROR: unable to read filter file %s\n", $PathContigIndex||"''";
  }
  $debug and printf STDERR "%s. %d entries in contig index %s\n", &MySub,
    int(keys %$pCtgIdx), $PathContigIndex||"''";

  # parse assembly log file
  my (%match);
  my (@ContigList,%ContigMatchedIdx,$CtContig);
  unless (open (INASSEMB,$PathAssembLog)) {
    die sprintf "ERROR: unable to read assembly log file %s\n", $PathAssembLog||"''";
  }
  while (<INASSEMB>) {
    if (m/ Possible join between contig (\d+).+ and contig (\d+)/) {
      @match{'num1','num2'} = ($1,$2);
      $_ = <INASSEMB>;
      m/Percentage mismatch +([0-9\.]+)/;
      $match{percent} = $1;
      if ($ProgOpt{-SlcScore} and
          $ProgOpt{-SlcScore}<$match{percent}) { next }
      foreach $CtContig ('num1', 'num2') {
        unless ($ContigMatchedIdx{$match{$CtContig}}) {
          $debug and printf STDERR "%s. found match in line %d\n", &MySub, $.;
          push @ContigList, $match{$CtContig};
          $ContigMatchedIdx{$match{$CtContig}} = 1;
        }
      }
    } elsif ($debug and m/ Possible join/) {
      printf STDERR "%s. regexp match in line %d\n", &MySub, $.;
    }
  }
  close INASSEMB;

  # print contig IDs
  foreach (@ContigList) {
    printf "%s\n", $$pCtgIdx{$_}{contig_id};
  }
}


# perform internal assembly for a GAP4 database
# *** out of curation ***
#
# INTERFACE
# - argument 1: GAP4 database
#
# - global options:
#   -debug         [STD]
#   ...
#
sub ProgAssembleEnds {
  my ($PathProj) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $pAction = sub { return (! int (system (join(' ',@_)) / 256)); };
  my $OutStump = &PrepOstump();

  print  "$ProgFile -$ProgMode\n";
  my %gap = %{ &GapNameFields($PathProj) };
  unless (-w $gap{full}) {
    die sprintf "ERROR: unable to read/write GAP4 database %s", $gap{full};
  }
  printf "GAP4 database: %s%s.%s\n", $gap{dir}, $gap{name}, $gap{version};

  # save contigs ends in Experiment file format
  $ProgParam{store}{queue}{SeqIn} = SeqLab::SeqStreamIn->new($gap{full});
  $ProgParam{store}{queue}{SeqIn}->AddSwitch (
    -SlcEnds => $ProgOpt{-SlcEnds} || 100,
    -SlcLen  => $ProgOpt{-SlcLen},
    -pure    => 1,
    -debug   => $dbg2);
  my $PathExper = "$gap{dir}${OutStump}_seq";
  mkdir ($PathExper);
  my @fofn;
  while (my $pSeq = $ProgParam{store}{queue}{SeqIn}->GetNext()) {
    $$pSeq{id} =~ s/^$gap{name}\.$gap{version}\./fake./;
    $$pSeq{id} =~ s/_(\d)end$/.$1/;
    &WriteFile ("$PathExper/$$pSeq{id}", &SeqentryToExper($pSeq));
    push @fofn, $$pSeq{id};
  }
  open  (OUTFOFN, ">$PathExper/fofn");
  printf OUTFOFN "%s\n", join ("\n", @fofn);
  close  OUTFOFN;
  my $CtEntry = &wc_l ("$PathExper/fofn");
  printf "fake reads to be assembled: %d\n", $CtEntry;

  # perform shotgun assembly, analyse result
  my $call = "$CorePath{call}{GapAssembShotgun} $PathProj $PathExper/fofn 11.0 NO NO YES 'REPT SPSQ ENZ2' 50 >$gap{dir}${OutStump}_assemble.log 2>>$gap{dir}${OutStump}_error.log";
  $debug and printf "%s. calling process for shotgun assembly:\n  $call\n", &MySub;
  unless (&PhysicalAccessCtrl ($PathProj, -mode=>'func', -noLock=>1,
    -func => $pAction, -FuncArg=>[$call],
    -mail => { ProcStamp=>"$ProgFile -$ProgMode" },
    -log  => 1)) {
    die "ERROR: shotgun assembly call failed\n";
  }
  {
    my $buffer = &ReadFile ("$CorePath{call}{egrep} '^ *[0-9]* joins made' $gap{dir}${OutStump}_assemble.log |");
    $CtEntry = 0;
    while ($buffer =~ m/^ *(\d+) /gm) { $CtEntry += $1; }
    printf "made internal joins: %d\n", $CtEntry;
  }

  # move file of filenames
  foreach (glob "$PathExper/fofn*") {
    my $target = $_; $target =~ s|seq/||;
    &FileCopy ($_, $target);
  }

  # delete fake reads
  print  "delete fake reads\n";
  $call = "$CorePath{call}{GapReadDel} $PathProj $PathExper/fofn  >/dev/null 2>/dev/null";
  unless (&PhysicalAccessCtrl ($PathProj, -mode=>'func', -noLock=>1,
    -func => $pAction, -FuncArg=>[$call],
    -mail => { ProcStamp=>"$ProgFile -$ProgMode" },
    -log  => 1)) {
    die "ERROR: deletion of fake reads failed\n";
  }

  # tidy up
  print "done\n";
}


################################################################################
# analyse and report: basic properties and assembly topology
################################################################################


# list contigs/reads residing in the assembly data source
#
# INTERFACE
# - argument 1:    assembly data source
# - argument 2*:   either additional assembly data source(s) (like Arg1) or
#                  contig specifier(s) for selection
#
# - global options:
#   -debug         [STD]
#   -RcTgtSpecif   [STD]
#   -SlcData       what type of data to be listed
#   -SlcCnum       [STD]
#   -SlcLen        [STD]
#   -SlcRnum       [STD]
#   -SlcSpecif     [STD]
#   -SlcSpecifDlt  [STD]
#   -timer         [STD]
#
# DEBUG, CHANGES, ADDITIONS
# - a combination of contig selector, -SlcData=clone and -SlcRange=[...] may
#   result in clones listed which fulfil the range criterion for contigs outside
#   the contig selection.
#
sub ProgListID {
  my (@ArgSrc) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;
  $debug and printf STDERR "%s. analysis of assembly data source %s %s\n", &MySub,
    $ArgSrc[0]||"''", (@ArgSrc>1) ? '...':'';
  my $SlcData = $ProgOpt{-SlcData} || 'contig';

  # get contig set data structure from assembly data source
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pCtgStruct = &ContigStructMeta ([@ArgSrc], %ProgOpt,
    -debug => $dbg2);
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ContigStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct
  if ($bTimer) {
    printf STDERR "%s. CPU time for retrieving contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = &Sum ((times)[0,2]);
  }

  ##############################################################################
  # analysis
  if (0) { }

  # list clones
  elsif ($SlcData eq 'clone') {
    foreach my $pClone (values %{$$pCtgStruct{clone}}) {

      # select for range
      if ($ProgOpt{-SlcRange}) {
        foreach my $pRead (map{ @{$_||[]} }
          &DataTreeSlc ($pClone, [['^[a-z]$','regexp'],['^[1-9]$','regexp']])
        ) {
          my $poRange = Math::Range->new(
              map{ ($_>0) ? $_ : $_+$$pRead{contig}{length}+1 } @{$ProgOpt{-SlcRange}}
            );
          if ($poRange) {
            if ($poRange->overlaps(@{$$pRead{pos}}{'-1','1'})) {
              print "$$pClone{id}\n";
              last;
            }
          } else {
            printf STDERR "%s. WARNING: undefined range %s for contig %s, length %d\n", &MySub,
              $ProgOpt{-SlcRange}->string(), $$pRead{contig}{id}, $$pRead{contig}{length};
          }
        }
      }

      # simply output
      else {
        print "$$pClone{id}\n";
      }
    }
  }

  # list contigs
  elsif ($SlcData eq 'contig') {
    foreach my $pCtg (values %{$$pCtgStruct{contig}}) {
      print "$$pCtg{id}\n";
    }
  }

  # list reads
  elsif ($SlcData eq 'read') {
    foreach my $pRead (values %{$$pCtgStruct{read}}) {

      # select for range
      if ($ProgOpt{-SlcRange}) {
        my $poRange = Math::Range->new(
            map{ ($_>0) ? $_ : $_+$$pRead{contig}{length}+1 } @{$ProgOpt{-SlcRange}}
          );
        if ($poRange) {
          if ($poRange->overlaps(@{$$pRead{pos}}{'-1','1'})) {
            print "$$pRead{id}\n";
          }
        } else {
          printf STDERR "%s. WARNING: undefined range %s for contig %s, length %d\n", &MySub,
            $ProgOpt{-SlcRange}->string(), $$pRead{contig}{id}, $$pRead{contig}{length};
        }
      }

      # simply output
      else {
        print "$$pRead{id}\n";
      }
    }
  }

  # unknown data category
  else {
    die sprintf "ERROR: illegal data type selector %s\n", $SlcData||"''";
  }
}


# cat consensus sequences from a GAP4 database
#
# INTERFACE
# - argument 1:    GAP4 database
# - argument 2*:   contig specifier(s) for selection
#
# - global options:
#   -debug         [STD]
#   -RcTgtSpecif   [STD]
#   -SlcCnum       [STD], does not work with -SlcData=scaffd
#   -SlcData       data type for which the sequence shall be output
#   -SlcID         in case of -SlcData=clone or -SlcData=read:
#                  select for clone ID
#   -SlcLen        [STD]
#   -SlcRnum       [STD], does not work with -SlcData=scaffd
#   -SlcSpecif     [STD]
#   -SlcSpecifDlt  [STD]
#   -timer         [STD]
#
# DESCRIPTION
# - the procedural concept of this function is much like &ProgAddRev
# - process logging must be done to STDERR, cause STDOUT is the default
#   destination of sequence output.
#
# DEBUG, CHANGES, ADDITIONS
# - Currently, the function only works with true GAP4 database sources.
#   A assembly data source, though, might also be non-database. But later on,
#   the retrieval of the actual sequences depends on the knowledge where
#   and how to get the sequences from.
#
sub ProgSeq {
  require SeqLab::SeqStreamOut;
  my (@ArgSrc) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;
  my $SlcData = $ProgOpt{-SlcData} || 'contig';
  if ($SlcData eq 'scaffd') {
    &ProgScaffdSeq (@_);
    return;
  }

  # arguments
  my %gap = %{ &GapNameFields($ArgSrc[0]) };
  if ($gap{exists}) {
    $debug and printf STDERR "%s. working on database %s\n", &MySub, $gap{full}||"''";
  } else {
    die sprintf "%s. unable to read GAP4 database %s\n", &MySub, $gap{full}||"''";
  }

  # prepare assembly data structure from complete assembly data source
  # selection is done below
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pCtgStruct = &ContigStructMeta ([@ArgSrc], %ProgOpt,
    -pair   => int($SlcData eq 'clone'),
    -RelyOn => undef,
    -debug  => $dbg2);
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ContigStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct
  $bTimer and printf STDERR "%s. CPU time for retrieving contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
  unless (values %{$$pCtgStruct{contig}}) {
    printf STDERR "WARNING: no contigs found in assembly data source %s! (after selection)\n",
      $$pCtgStruct{root}{source}[0] || $ArgSrc[0];
    exit 0;
  }

  ##############################################################################
  # retrieve and output sequences
  my (%path,@IdAnalyse,$pSeq,$pSeqNew);
  my ($call,$ret);
  if (0) { }

  # contig consensus sequences
  # - selection according to contig specifiers is done via &ContigStructMeta
  elsif ($SlcData eq 'contig') {

    # list of contig IDs
    @IdAnalyse = map{ $_->{id} } values %{$$pCtgStruct{contig}};

    # call to get consensi
    $path{TmpCons} = $ProgParam{TmpManag}->Create();
    $call = join (' ', $CorePath{call}{GapExportCons}, '-f Experiment',
      $gap{full}, $path{TmpCons}, @IdAnalyse);
    if ($ret = &GapSafeCall($call)) {
      die "ERROR: GAP4 consensus call failed (code $ret):\n  $call\n";
    }
    if (-z $path{TmpCons}) {
      $debug and print  STDERR "ERROR: GAP4 consensus has zero size, call was:\n  $call\n";
    }
    &FileCopy ($path{TmpCons}, \*STDOUT);
  }

  # read sequences
  # - selection according to read specifiers is not possible!
  elsif ($SlcData eq 'read') {

    # list of read IDs
    @IdAnalyse = map{ $_->{id} } values %{$$pCtgStruct{read}};
    $debug and printf STDERR "reads residing in contigs: %d\n", int @IdAnalyse;
    if ($ProgOpt{-SlcID}) {
      @IdAnalyse = grep{ m/$ProgOpt{-SlcID}/o } @IdAnalyse;
      $debug and printf STDERR "selected readings: %d\n", int @IdAnalyse;
    }

    # export Experiment files from GAP4 database
    # - export as pre-assembled format
    # - copy single-file output to STDOUT
    $path{TmpExper} = $ProgParam{TmpManag}->Create();
    $call = join (' ', $CorePath{call}{GapExportPreass},
      $gap{full}, $path{TmpExper}, join(' ',@IdAnalyse), '>/dev/null');
    if ($ret = &GapSafeCall($call)) {
      die sprintf "%s. ERROR: call to export Experiment data failed (code %s):\n  %s\n", &MySub, $ret, $call;
    }
    foreach (map{@{$_||[]}} &LoadFoid("$path{TmpExper}/fofn")) {
      &FileCopy ("$path{TmpExper}/$_", \*STDOUT);
    }
  }

  # clone sequences
  # - selection according to clone specifiers is done here
  elsif ($SlcData eq 'clone') {
    my $spacer = $ProgOpt{-var}{spacer} || $ProgParam{default}{GapSpacer}{clone};

    # list of clones
    my @clone = values %{$$pCtgStruct{clone}};
    $debug and printf STDERR "total clones: %d\n", int @clone;
    if ($ProgOpt{-SlcID}) {
      @clone = grep{ $_->{id}=~m/$ProgOpt{-SlcID}/o } @clone;
      printf STDERR "selected clones: %d\n", int @clone;
    }
    if ($ProgOpt{-var}{SlcPair}) {
      @clone = grep{ ($_->{PairStatus}||0)>0 } @clone;
      $debug and printf STDERR "%ssequenced & consistent clones: %d\n",
        $ProgOpt{-SlcData} ? 'selected ':'', int @clone;
    }
    $debug and int(@clone) and printf STDERR "thereof contiguous clones: %d\n",
      $ProgOpt{-SlcData} ? 'selected ':'', int (grep{ $_->{PairStatus}>=1 } @clone);

    # start seq output queue
    my $pSeqOutQueue = SeqLab::SeqStreamOut->new(
      -file   => '-',
      -format => 'Experiment',
      -debug  => $dbg2);

    # retrieve & output sequences
    foreach my $pClone (@clone) {

      # readings consistently residing in one contig?
      my $bOneCtg = int($$pClone{PairStatus}>=1) || do{
        my %ctg = map{($_=>1)} map{@{$_||[]}} &DataTreeSlc ($pClone,[['^-?[01]$','regexp'],[0,'all'],['contig']]);
        (int(keys %ctg)==1);
      };

      # consistently residing in one contig
      if ($bOneCtg) {
        my ($pCtg) = map{@$_} &DataTreeSlc ($pClone,[['^-?[01]$','regexp'],[0,'all'],['contig']]);
        $pSeq = &ContigSeq ($pCtg, -debug=>$dbg2);
        $$pSeq{descr} = "contig $$pSeq{id}";
        unless ($pSeq and $$pSeq{sequence}) {
          printf STDERR "%s. ERROR in retrieval of contig sequence for clone %s\n", &MySub, $$pClone{id};
          next;
        }
        $pSeq = &SeqRange ($pSeq,
          &Min (map{@$_} &DataTreeSlc($pClone,[['^-?[01]$','regexp'],[0,'all'],['pos'],[0,'all']]) ),
          &Max (map{@$_} &DataTreeSlc($pClone,[['^-?[01]$','regexp'],[0,'all'],['pos'],[0,'all']]) ),
          );
        $$pSeq{id} = $$pClone{id};
        if ((&DataTreeSlc($$pClone{'1'},[[0,'all']],-unique=>1)||[])->[0]{orient} == -1) {
          $debug and printf STDERR "%s. complementing contig seq for clone %s\n", &MySub, $$pClone{id};
          $pSeq = &SeqRevcompl ($pSeq);
        }
        $pSeqOutQueue->Push($pSeq);
      }

      # fully sequenced, consistent pairing, but residing in two contigs
      elsif ($$pClone{PairStatus}>=0.5) {
        my $pSeqP = &ContigSeq ((&DataTreeSlc($$pClone{'1'},[[0,'all'],['contig']])||[])->[0], -debug=>$dbg2);
        $$pSeqP{descr} = "contig $$pSeqP{id}";
        my $pSeqN = &ContigSeq ((&DataTreeSlc($$pClone{'-1'},[[0,'all'],['contig']])||[])->[0], -debug=>$dbg2);
        $$pSeqN{descr} = "contig $$pSeqN{id}";
        unless ($pSeqP and $$pSeqP{sequence} and $pSeqN and $$pSeqN{sequence}) {
          printf STDERR "%s. ERROR in retrieval of contig sequence for clone %s\n", &MySub, $$pClone{id};
          next;
        }
        $$pSeqP{orient} = &DataTreeSlc($$pClone{'1'},[[0,'all']])->[0]{orient};
        $$pSeqN{orient} = &DataTreeSlc($$pClone{'-1'},[[0,'all']])->[0]{orient};
        $pSeqP = &SeqRange ($pSeqP,
          ($$pSeqP{orient}==1)  ? &Min (map{@$_} &DataTreeSlc($$pClone{'1'},[[0,'all'],['pos'],[0,'all']]) ) : 1,
          ($$pSeqP{orient}==-1) ? &Max (map{@$_} &DataTreeSlc($$pClone{'1'},[[0,'all'],['pos'],[0,'all']]) ) : -1,
          );
        $pSeqN = &SeqRange ($pSeqN,
          ($$pSeqN{orient}==1)  ? &Min (map{@$_} &DataTreeSlc($$pClone{'-1'},[[0,'all'],['pos'],[0,'all']]) ) : 1,
          ($$pSeqN{orient}==-1) ? &Max (map{@$_} &DataTreeSlc($$pClone{'-1'},[[0,'all'],['pos'],[0,'all']]) ) : -1,
          );
        $$pSeqP{id} = $$pSeqN{id} = $$pClone{id};
        if ($$pSeqP{orient}==-1) { $pSeqP = &SeqRevcompl($pSeqP) }
        if ($$pSeqN{orient}==1) {  $pSeqN = &SeqRevcompl($pSeqN) }
        $pSeqNew = &SeqConcat ($pSeqP, $pSeqN, -spacer=>$spacer);
        $$pSeqNew{descr} = '('. $$pSeq{descr} .') joint with ('. $$pSeq{descr} .')';
        $pSeqOutQueue->Push ($pSeqNew);
      }

      # neither of the above
      else {
        $debug and printf STDERR "%s. DEBUG: scattered clone %s\n", &MySub, $$pClone{id};
      }
    }
  }

  # unsupported data type
  else {
    die "ERROR: unsupported data type $SlcData\n";
  }

  ##############################################################################
  # tidy up

  if (! $debug) {
    if (-d $path{TmpExper}) {
      unlink glob("$path{TmpExper}/*");
      rmdir $path{TmpExper};
    }
    unlink (grep{-e $_} $path{TmpCons});
  }
}


# create index for contig set data
#
# INTERFACE
# - argument 1+:   assembly data sources.
#                  Alternatively, an internal handing over of contig set data
#                  can be done as:
#                  Arg1  'internal'
#                  Arg2  reference to contig/scaffold set data structure
#
# - global options:
#   -debug         [STD]
#   -OutStump      [STD]
#   -OutTab        same as -OutStump - we have multi-file tabular output.
#                  Default: $ProgParam{default}{GapIndex}
#   -RcTgtSpecif   [STD]
#   -SlcCnum       [STD], not implemented for -SlcData=scaffd
#   -SlcData       possible: contig=read, scaffd
#   -SlcLen        [STD]
#   -SlcRnum       [STD], not implemented for -SlcData=scaffd
#   -SlcSpecif     [STD]
#   -SlcSpecifDlt  [STD]
#   -timer         [STD]
#
# DESCRIPTION
# - index tables are intended to represent all the data needed to reconstruct
#   the exact topology of an assembly.
#
# DEBUG, CHANGES, ADDITIONS
# - databases to be indexed are specified by the script arguments. Arguments
#   are verified to be real GAP4 databases.
#
sub ProgIndex {
  my (@ArgSrc) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;
  my %file; $file{OutBase} = &PrepOstump();

  ##############################################################################
  # get contig set data structure
  my ($pCtgStruct, $pCtg, $pRead, $pRepEntry);
  my ($hOutRead, $hOutCtg, @ColumnRead, @ColumnContig);

  # got scaffold set data structure from calling code
  if ($ArgSrc[0] eq 'internal' and ref($ArgSrc[1]) eq 'HASH') {
    $pCtgStruct = $ArgSrc[1];
  }

  # get scaffold set data structure from assembly data source(s)
  if ($ProgOpt{-SlcData} eq 'scaffd') {
    $bTimer and $time = &Sum ((times)[0,2]);
    $pCtgStruct ||= &ScaffdStruct ([@ArgSrc], %ProgOpt,
      -SlcSingle => 1,
      -debug     => $dbg2);
    # error message was done in &SeqAlign::Assembly::ContigStruct or &SeqAlign::ScaffdAssemb::ScaffdStruct
    unless ($pCtgStruct) { exit 1 }
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ScaffdStruct
    $bTimer and printf STDERR "%s. CPU time for constructing scaffold set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $bTimer and $time = &Sum ((times)[0,2]);
    my $spacer = $ProgOpt{-var}{spacer} || $ProgParam{default}{GapSpacer}{scaffd};
    unless ($pCtgStruct = &ScaffdStructToContigStruct ($pCtgStruct,
      -spacer=>$spacer, -debug=>$dbg2)) {
      exit 1;
    }
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ScaffdStructToContigStruct
    $bTimer and printf STDERR "%s. CPU time for constructing contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
  }

  # get contig set data structure from assembly data source(s)
  else {
    $bTimer and $time = &Sum ((times)[0,2]);
    $pCtgStruct ||= &ContigStructMeta ([@ArgSrc], %ProgOpt,
      -debug => $dbg2);
    # error message was done in &SeqAlign::Assembly::ContigStruct
    unless ($pCtgStruct) { exit 1 }
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ContigStruct
    $bTimer and printf STDERR "%s. CPU time for constructing contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
  }

  # empty assembly data source
  unless (values %$pCtgStruct and values %{$$pCtgStruct{contig}}) {
    print  STDERR "WARNING: no contigs found\n";
    exit 0;
  }

  ##############################################################################
  # table headers

  # output files
  $file{OutRead} = $file{OutBase} .'Read.tab';
  $file{OutContig} = $file{OutBase} .'Contig.tab';
  $hOutRead = FileHandle->new($file{OutRead},'w');
  printf "writing file %s\n", $file{OutRead}||"''";
  $hOutCtg = FileHandle->new($file{OutContig},'w');
  printf "writing file %s\n", $file{OutContig}||"''";

  # header of read table
  $file{HeadPhrCall} = join (' ', $ProgFile, "-$ProgMode",
    (map{ "$_=$ProgOpt{$_}" } keys %ProgOpt),
    &ListMaxfirst(\@ArgSrc,3,-ElemExceed=>'...'));
  print  $hOutRead "# $ProgFstump -$ProgMode\n";
  print  $hOutRead "# global GAP4 topological index - read listing\n";
  print  $hOutRead "# call: $file{HeadPhrCall}\n";
  printf $hOutRead "# date/time: %s\n", &TimeStr();
  printf $hOutCtg "#\n# note:\n# position values follow all-day semantics\n";
  @ColumnRead = qw(read_id contig_source contig_id contig_idnum
                   read_offset read_length read_orient read_true);
  printf $hOutRead "#\n# column labels:\n# %s\n", join("\t",@ColumnRead);

  # header of contig table
  print  $hOutCtg "# $ProgFstump -$ProgMode\n";
  print  $hOutCtg "# global GAP4 topological index - contig listing\n";
  print  $hOutCtg "# call: $file{HeadPhrCall}\n";
  printf $hOutCtg "# date/time: %s\n", &TimeStr();
  @ColumnContig = qw(contig_id contig_idnum contig_source contig_length read_num);
  printf $hOutCtg "#\n# column labels:\n# %s\n", join("\t",@ColumnContig);

  ##############################################################################
  # output data to table

  # print data lines into read table
  foreach $pRead (values %{$$pCtgStruct{read}}) {
    $$pRead{id} =~ m/^fake/ and next;
    $pRepEntry = {
      read_id       => $$pRead{id},
      contig_source => $$pRead{contig}{source}{path},
      contig_id     => $$pRead{contig}{id},
      contig_idnum  => $$pRead{contig}{idnum},
      read_offset   => $$pRead{pos}{'-1'},
      read_length   => $$pRead{length},
      read_orient   => $$pRead{orient},
      read_true     => $$pRead{trace},
      };
    printf $hOutRead "%s\n", join("\t",@{$pRepEntry}{@ColumnRead});
  }

  # print data lines into contig table
  foreach $pCtg (values %{$$pCtgStruct{contig}}) {
    $pRepEntry = {
      contig_id     => $$pCtg{id},
      contig_idnum  => $$pCtg{idnum},
      contig_source => $$pCtg{source}{path},
      contig_length => $$pCtg{length},
      read_num      => int(values %{$$pCtg{read}}),
      };
    printf $hOutCtg "%s\n", join("\t",@{$pRepEntry}{@ColumnContig});
  }
}


# report clone topologies for list of readings
#
# INTERFACE
# - argument 1:  assembly data source
# - argument 2+: read IDs
#
# - global options:
#   -debug       [STD]
#   -OutReport   save showrelationship report to specified file
#   -OutStump    [STD]
#   -OutTab      same as -OutStump - we have multi-file tabular output,
#                default from &PrepOstump
#   -timer       print time performance protocol to STDOUT
#
sub ProgFollowRead {
  my ($PathSrc,@ArgRead) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;
  int(@ArgRead) or exit 1;

  ##############################################################################
  # get and format data

  # get contig set data structure from assembly data source
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pCtgStruct = &ContigStructMeta ([ $PathSrc ],
    -cend    => 0,  # calculate cend binding for all reads
    -pair    => 1,  # pair analysis always!
    %ProgOpt,
    -debug   => $dbg2);
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ContigStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct
  if ($bTimer) {
    printf "%s. CPU time for constructing contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = &Sum ((times)[0,2]);
  }

  ##############################################################################
  # table headers

  # output files
  my %file;
  $file{OutStump} = &PrepOstump();
  $file{OutTabRead} =  $file{OutStump} .'Read.tab';
  $file{OutTabClone} = $file{OutStump} .'Clone.tab';
  my $hOutRead = FileHandle->new($file{OutTabRead},'w');
  my $hOutCln = FileHandle->new($file{OutTabClone},'w');

  # header of read table
  $file{HeadPhrCall} = join (' ', $ProgFile, "-$ProgMode",
    (map{ "$_=$ProgOpt{$_}" } keys %ProgOpt), $PathSrc,
    &ListMaxfirst(\@ArgRead,3,-ElemExceed=>'...'));
  print  $hOutRead "# $ProgFstump -$ProgMode\n";
  print  $hOutRead "# follow read in contig set - read listing\n";
  print  $hOutRead "# call: $file{HeadPhrCall}\n";
  printf $hOutRead "# date/time: %s\n", &TimeStr();
  printf $hOutRead "# contig set data source: %s\n", $PathSrc||"''";
  printf $hOutRead "# query entries: %d\n", int(@ArgRead);
  my @ColumnRead = qw(read_id  read_relation
                   contig_id  contig_readnum  contig_end  cend_dist);
  printf $hOutRead "#\n# column labels:\n# %s\n", join("\t",@ColumnRead);

  # header of contig table
  print  $hOutCln "# $ProgFstump -$ProgMode\n";
  print  $hOutCln "# follow read in contig set - clone listing\n";
  print  $hOutCln "# call: $file{HeadPhrCall}\n";
  printf $hOutCln "# date/time: %s\n", &TimeStr();
  printf $hOutCln "# contig set data source: %s\n", $PathSrc||"''";
  printf $hOutCln "# query entries: %d\n", int(@ArgRead);
  my @ColumnClone = qw(clone_id  clone_present  read_present
                    consistent  consistency_err  clone_read_num
                    bridging  contig_ids);
  printf $hOutCln "#\n# column labels:\n# %s\n", join("\t",@ColumnClone);

  ##############################################################################
  # locate query reads, output to table

  # loop over read arguments
  my (%report,$pRepEntry);
  foreach my $ReadQuery (@ArgRead) {
    my $pField = &ReadidToFields ($ReadQuery);

    # loop over encountered reads for current clone
    my @ReadPick = map{ @{$_||[]} }
      &DataTreeSlc ($$pCtgStruct{clone}{$$pField{cln}},
      [['^-?[01]$','regexp'],[0,'all']], -debug=>$dbg2);
    foreach my $pRead (@ReadPick) {
      $$pRead{field} = &ReadidToFields ($$pRead{id});
      $$pRead{RepEntry} = {
        read_id        => $$pRead{id},
        read_relation  => ($$pRead{field}{prd} eq $$pField{prd}) ? (
          ($$pRead{field}{prm}==$$pField{prm} and $$pRead{field}{num}==$$pField{num}) ? 'query' : 'para'
          ) : '',
        contig_id      => $$pRead{contig}{id},
        contig_readnum => int (keys %{$$pRead{contig}{read}}),
        contig_end     => $$pRead{cend}{idnum},
        cend_dist      => $$pRead{CEndDist},
        };
      # read_relation for prd!=prd
      $$pRead{RepEntry}{read_relation} ||= $$pField{prd} ? (
        $$pRead{field}{prd} ? 'counter' : 'extra'
        ) : 'std';

    }
    foreach my $pRead (sort {
      $b->{RepEntry}{read_relation} cmp $a->{RepEntry}{read_relation} or
                   $a->{field}{num} <=> $b->{field}{num} }
      @ReadPick
    ) {
      $pRepEntry = $$pRead{RepEntry};
      printf $hOutRead "%s\n", join ("\t", @{$pRepEntry}{@ColumnRead});
    }

    # single-line report for clone
    { # control block
      $pRepEntry = { clone_id => $$pField{cln} };
      $$pRepEntry{clone_present} = (@ReadPick ? 1:0) or last;
      $$pRepEntry{read_present} = ($$pCtgStruct{read}{$ReadQuery} ? 1:0);
      $$pRepEntry{clone_read_num} = int @ReadPick;
      $$pRepEntry{consistency_err} = '';
      if ( (map{ @{$_||[]} } &DataTreeSlc (\@ReadPick,
        [[0,'all'],['contig']], -unique=>1, -debug=>$dbg2) ) >= 3
      ) {
        $$pRepEntry{consistency_err} = 'ContigDisperse';
      }
      $$pRepEntry{consistent} = ($$pRepEntry{consistency_err} ? 0 : 1) or last;
      $$pRepEntry{bridging} = ( (map{ @{$_||[]} } &DataTreeSlc (\@ReadPick,
        [[0,'all'],['contig']], -unique=>1, -debug=>$dbg2) )
        == 2) ? 1 : 0;
      $$pRepEntry{contig_ids} = join (',',
        map{ @{$_||[]} } &DataTreeSlc (\@ReadPick,
        [[0,'all'],['contig'],['id']], -unique=>1, -debug=>$dbg2) );
    }
    push @{$report{clone}}, $pRepEntry;
    printf $hOutCln "%s\n", join ("\t", @{$pRepEntry}{@ColumnClone});
  }

  $bTimer and printf "%s. CPU time for analysis & report: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;

  ##############################################################################
  # final work

  # clone report summary
  print  $hOutCln "# SUMMARY\n";
  $report{found} = int (grep{ $_->{read_present} } @{$report{clone}});
  printf $hOutCln "# query reads found: %d (%.2f)\n",
    $report{found}, $report{found} / int(@ArgRead);
  $report{bridge} = int (grep{ $_->{bridging} } @{$report{clone}});
  printf $hOutCln "# query reads bridging: %d (%.2f)\n",
    $report{bridge}, $report{bridge} / int(@ArgRead);
  unless ($ProgOpt{-SlcCnum}>1 or $ProgOpt{-SlcRnum}>1) {
    print $hOutCln "#   note: joined contigs may be singlets, use switches -SlcCnum or -SlcRnum\n",
  }
}


# report read coverage for contigs
#
# INTERFACE
# - argument 1:  assembly data source
# - argument 2*: contig specifier(s) for selection
#
# - global options:
#   -debug       print debug protocol to STDOUT
#   -OutReport   save showrelationship report to specified file
#   -OutStump    [STD]
#   -SlcID       select for read IDs
#   -SlcCnum     [STD]
#   -SlcRnum     [STD]
#   -timer       print time performance protocol to STDOUT
#
sub ProgCoverage {
  my (@ArgSrc) = @_;
  my ($debug, $dbg2, $bTimer, $time, $SlcID);
  my ($pCtgStruct, $pCtg, %file, $hOutTab);
  my (%plot, $pGraph);
  my ($i);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $bTimer = $ProgOpt{-timer};

  $SlcID = $ProgOpt{-SlcID} || '.';
  $file{OutStump} = &PrepOstump();
  $debug and printf "%s. assembly data source: %s\n", &MySub, $ArgSrc[0]||"''";

  # get contig set data structure from assembly data source
  $bTimer and $time = &Sum ((times)[0,2]);
  $pCtgStruct = &ContigStructMeta ([@ArgSrc], %ProgOpt,
    # we don't use -SlcCnum here by default, cause its performance will depend
    # on the read/clone naming syntax
    -SlcID   => undef,
    -SlcRnum => $ProgOpt{-SlcRnum} || 2,
    -debug   => $dbg2);
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ContigStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct
  if ($bTimer) {
    printf "%s. CPU time for constructing contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = &Sum ((times)[0,2]);
  }

  # start tabular output
  $file{table} = "$file{OutStump}.tab";
  $hOutTab = &GetWriteHandle ($file{table}, -filestm=>'coverage data to file', -exit=>1);
  print  $hOutTab "# $ProgFile -$ProgMode\n";
  printf $hOutTab "# call: %s\n", join (' ', $ProgFile, "-$ProgMode",
    (map{ "$_=$ProgOpt{$_}" } keys %ProgOpt),
    &ListMaxfirst(\@ArgSrc,3,-ElemExceed=>'...'));
  printf $hOutTab "# date/time: %s\n", &TimeStr();
  print  $hOutTab "#\n";
  print  $hOutTab "# column labels:\n";
  printf $hOutTab "# contig\tstretch_off\tstretch_end\tstretch_len\tcoverage\n";
  print  $hOutTab "#\n";

  ##############################################################################
  # analysis

  $plot{ImgFlag} = defined($ProgOpt{-OutImg})
    || $ProgOpt{-OutImgRelhigh} || $ProgOpt{-OutImgWidth}
    || $ProgOpt{-WinSize};

  # loop over contigs
  foreach $pCtg (values %{$$pCtgStruct{contig}}) {

    # get coverage plot from sequence data
    # do not sample the actual coverage values, but sample the local changes
    #   of seq coverage ("differential plot")
    $plot{differential} = [
      map{ ([$_->{pos}{'-1'},1], [$_->{pos}{'1'}+1,-1]); }
      grep{ $_->{id} =~ m/$SlcID/o }  # optionally, apply seq ID filter
      values %{$$pCtg{read}} ];
    $debug and printf "%s. %d differential events\n", &MySub, int @{$plot{differential}};
    $plot{diffuniq} = [[0,0]];
    # change "differential plot" to unique position values => @{$plot{diffuniq}}
    # change "differential values" to actual coverage values
    foreach (sort{ $a->[0]<=>$b->[0] } @{$plot{differential}}) {
      my $PosCurr = $plot{diffuniq}[-1][0];
      if ($_->[0] == $PosCurr) {
        $plot{diffuniq}[-1][1] += $_->[1];
      } else {
        $_->[1] += $plot{diffuniq}[-1][1];
        push @{$plot{diffuniq}}, $_;
      }
    }
    delete $plot{differential};

    # output stretches of constant coverage, tabular
    printf $hOutTab "# contig %s\n", $$pCtg{id};
    my $pCovPrev = $plot{diffuniq}[1];
    for ($i=2; $i<@{$plot{diffuniq}}; $i++) {
      printf $hOutTab "%s\t%d\t%d\t%d\t%d\n", $$pCtg{id},
        $plot{diffuniq}[$i-1][0], $plot{diffuniq}[$i][0]-1,
        $plot{diffuniq}[$i][0]-$plot{diffuniq}[$i-1][0],
        $plot{diffuniq}[$i-1][1];
    }

    # create image for contig
    if ($plot{ImgFlag}) {
      my $hOutDat;

      # create 2D plot, holding actual coverage coordinates
      $plot{Exact} = [[0,0]];
      for ($i=1; $i<@{$plot{diffuniq}}; $i++) {
        push @{$plot{Exact}},
          [$plot{diffuniq}[$i][0]-0.9, $plot{diffuniq}[$i-1][1]],
          [$plot{diffuniq}[$i][0]-0.1, $plot{diffuniq}[$i][1]];
      }
      $plot{object} = Math::Plot2D->new($plot{Exact});

      # determine image parameters
      $plot{ImgWidth} = $ProgOpt{-OutImgWidth} || $ProgParam{default}{OutImgWidth};
      $plot{RelHigh} = $ProgOpt{-OutImgRelhigh} || $ProgParam{default}{OutImgRelhigh}{coverage};

      # smoothen 2D plot
      $plot{ImgStep} = ($plot{object}->Xmax() - $plot{object}->Xmin()) / $plot{ImgWidth} / 2;
      $debug and printf "%s. smooth re-plot, step size: %f\n", &MySub, $plot{ImgStep};
      $plot{SmoothWinSize} = $ProgOpt{-WinSize} || 100;
      $plot{Smooth} = $plot{object}->SmoothPlot ($plot{ImgStep},
        -window=>$plot{SmoothWinSize}, -debug=>$debug);
      # meanwhile debug
      if ($debug) {
        printf "%s. %d differential loci\n", &MySub, int @{$plot{diffuniq}};
        printf "%s. writing extra tabular files %s_%s_(raw|exact|smooth).tab\n", &MySub,
          $file{OutStump}, $$pCtg{id};
        &WriteFile (sprintf ("%s_%s_raw.tab", $file{OutStump}, $$pCtg{id}),
          join ('', map{ $_->[0] ."\t". $_->[1] ."\n" }@{$plot{diffuniq}}));
        &WriteFile (sprintf ("%s_%s_exact.tab", $file{OutStump}, $$pCtg{id}),
          join ('', map{ $_->[0] ."\t". $_->[1] ."\n" }@{$plot{Exact}}));
        &WriteFile (sprintf ("%s_%s_smooth.tab", $file{OutStump}, $$pCtg{id}),
          join ('', map{ $_->[0] ."\t". $_->[1] ."\n" }@{$plot{Smooth}}));
      }
      delete $plot{object};

      foreach my $ItImg ('Exact', 'Smooth') {

        # plot graph
        $pGraph = &DataClone ( {
          BgTranspar => $ProgOpt{-OutImgTransp},
          plot => [
            { DimPixel  => { x=>$plot{ImgWidth} },
              HeightRel => $plot{RelHigh},
              DataType  => 'AA',
              data      => $plot{$ItImg},
              DataRange => { x=>[0,$$pCtg{length}], y=>[0] },
              ReprType  => 'line',
              ReprColor => 'black',
            },
            ],
          scale => [
            { PlotNum  => 0,
              location => 'bottom',
              color    => 'black',
            },
            { PlotNum  => 0,
              location => 'left',
              color    => 'black',
            },
            { PlotNum  => 0,
              location => 'right',
              color    => 'black',
            },
            ],
          }, -debug=>$dbg2);

        # save plot graph data
        $file{$ItImg}{data} = sprintf ("%s_%s_%s_img.dat",
          $file{OutStump}, $$pCtg{id}, $ItImg);
        if ($hOutDat = &GetWriteHandle($file{$ItImg}{data})) {
          &DataPrint ($pGraph, -handle=>$hOutDat, -debug=>$dbg2);
        }
        # create image
        $file{$ItImg}{img} = sprintf "%s_%s_%s.png",
          $file{OutStump}, $$pCtg{id}, $ItImg;
        if (&Graph ($pGraph, -save=>$file{$ItImg}{img}, -debug=>$dbg2)) {
          printf "writing image file %s\n", $file{$ItImg}{img};
        } else {
          printf STDERR "%s. ERROR: failed to write graph image (cumulative length distribution)\n", &MySub;
        }

        if ($bTimer) {
          printf "%s. CPU time for coverage calc./drawing for contig: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
          $time = &Sum ((times)[0,2]);
        }
      }
    }  # end if ($plot{ImgFlag})
  }

  # finish tabular output
  $debug and printf "written tabular file %s\n", $file{table};
}


# list minimal tiling set of reads or clones
#
# INTERFACE
# - argument 1:  GAP4 database
# - argument 2*: contig specifier(s) for selection
#
# - global options:
#   -debug       [STD]
#   -SlcData     switch to listing of clones rather than reads
#
# DEBUG, CHANGES, ADDITIONS
# - Theoretically, the data source may be abstract assembly data source, not only
#   a GAP4 database. But currently, we rely on Stadeb scripting to get the
#   information.
# - implement -SlcCnum, -SlcRnum etc.
#
sub ProgMinimal {
  my ($ArgProj, @contig) = @_;
  my (%gap, $debug, $dbg2);
  my ($contig, %call, %path, %foid);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  %gap = %{ &GapNameFields($ArgProj) };
  $contig = join (' ', @contig);

  # create AlnK project
  $path{script} = $ProgParam{TmpManag}->Create();
  $call{list} = <<END_CALLMINIM;
set contiglist "$contig"
if { "\$contiglist" == "" } {
  set contiglist [CreateAllContigList \$io]
}
set mylist [minimal_coverage -io \$io -contigs \$contiglist]
foreach ElemCurr \$mylist {
  puts \$ElemCurr
}
END_CALLMINIM
  &WriteFile ($path{script}, $call{list});
  $foid{plain} = &GapSafeRead ("$CorePath{call}{GapExec} $gap{full} $path{script}");

  # convert raw list into list of clones?
  # keep order
  if ($ProgOpt{-SlcData} eq 'clone') {
    @{$foid{raw}} = grep{ $_ } split (/\n/, $foid{plain});
    foreach (@{$foid{raw}}) {
      $foid{clone} = $_;
      $foid{clone} =~ s/\..+$//;
      $foid{index}{$foid{clone}} or push @{$foid{fin}}, $foid{clone};
      $foid{index}{$foid{clone}} = 1;
    }
    print map{"$_\n"} @{$foid{fin}};

  # list reads only
  } else {
    print $foid{plain};
  }

  # tidy up
  $debug or unlink ($path{script});
}


################################################################################
# analyse and report scaffolds
################################################################################


# provide reverse reads for GAP4 database
#
# INTERFACE
# - argument 1:  GAP4 database
# - argument 2*: contig specifier(s) for selection
#
# - global options:
#   -debug       [STD]
#   -RcCloneLen  [STD]
#   -SlcCnum     [STD]
#  (-SlcEnds     select contig end range
#   -SlcID       select for read IDs
#   -SlcLen      [STD]
#   -SlcRnum     [STD]
#   -timer       [STD]
#
sub ProgEndRead {
  my (@ArgSrc) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;

  # get contig set data structure from assembly data source
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pCtgStruct = &ContigStructMeta ([@ArgSrc],
    -pair  => 1,  # pair analysis always!
    %ProgOpt,
    -cend  => 0,  # calculate cend binding for all reads
    -debug => $dbg2);
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ContigStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct
  if ($bTimer) {
    printf "%s. CPU time for retrieving contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = &Sum ((times)[0,2]);
  }

  ##############################################################################
  # analysis
  my ($pClone,@site,$pSite);

  # loop over clones
  foreach $pClone (values %{$$pCtgStruct{clone}}) {
    if ($debug) {
      printf STDERR "%s. analysing clone %s:\n", &MySub, $$pClone{id};
      printf STDERR "  sequencing status: %d\n", $$pClone{SeqStatus};
      printf STDERR "  dispersed for single clone ends: %d\n", int(@{$$pClone{DispsPrd}});
      printf STDERR "  dispersed pair of clone ends: %d\n", $$pClone{DispsClnEnd};
      printf STDERR "  dispersed readings: %d\n", $$pClone{DispsRead};
    }
    if ($$pClone{SeqStatus} and ! $$pClone{DispsRead}) { next }
    my %CloneSite=();

    # analyse clone (- get name fields?)
    my $pField = &ReadidToFields($$pClone{id}.'.s1',-debug=>$dbg2);

    # loop over readings
    # - determine contig end -> remind in index
    # - filter interesting sites: short distance from contig end
    foreach my $pRead (map{values %$_} $$pClone{'1'}, $$pClone{'-1'}) {
      my $pCend = $$pRead{cend};

      # create/access site entry
      $pSite = $CloneSite{$$pCend{id}} ||= {
        clone  => $pClone,
        cend   => $pCend,
        contig => $$pCend{contig},
        # later come:
        #   CEndDist
        #   ContinMax
        #   ContinProb
        #   ...
        };
      push @{$$pSite{read}}, $pRead;
      if ($$pRead{CEndDist} and $$pRead{CEndDist}>$$pSite{CEndDist}) {
        $$pSite{CEndDist} = $$pRead{CEndDist};
        $$pSite{ContinMax} =
          (&CloneLenCutoff($$pField{lib},-rc=>$ProgOpt{-RcCloneLen},-default=>1) || 0)
          - $$pSite{CEndDist};
        if ($$pSite{ContinMax} > 0) {
          my $pCLEstim = &CloneLenEstim ($$pField{lib}||'aa', -rc=>$ProgOpt{-RcCloneLen}, -default=>1);
          if ($pCLEstim and $$pCLEstim{s}) {
            $$pSite{ContinProb} = &GaussQuant (($$pCLEstim{mean}-$$pSite{CEndDist})/$$pCLEstim{s});
              # $$pSite{CEndDist} is the least insert size of the clone
            if ($$pSite{CEndDist} > $$pCLEstim{mean}) {
              $$pSite{ContinProb} = 1 - $$pSite{ContinProb};
            }
          } else {
            printf STDERR "%s. ERROR: no insert size estimates for clone %s\n", &MySub, $$pClone{id};
          }
        } else {
          $$pSite{ContinProb} = 0;
        }
      }
    }

    # have all reads consistent spacial arrangement in relation to read strandedness?
    # this may be swapped from &ProgPairDiag
    # ...

    # sample interesting sites
    foreach $pSite (grep{ $_->{ContinProb} } values %CloneSite) {
      push @site, $pSite;
      if (int(@site)%10 == 0 and $debug) {
        printf STDERR "%s. selected site: clone %s, contig end %s, contin. probability %d\n", &MySub,
          $$pClone{id}, $$pSite{cend}{id}, $$pSite{ContinProb};
      }
    }
  }

  ##############################################################################
  # prepare report

  # table header
  my $hOutTab;
  if ($ProgOpt{-OutTab}) {
    $hOutTab = FileHandle->new($ProgOpt{-OutTab},'w');
  } else {
    $hOutTab = \*STDOUT;
  }
  print  $hOutTab "# $ProgFile -$ProgMode\n";
  print  $hOutTab "# list of possibly continuing clones and associated contigends\n";
  print  $hOutTab "# \n";
  printf $hOutTab "# date/time: %s\n", &TimeStr();
  printf $hOutTab "# clone length rc: %s\n", &CloneLenRcFile();
  printf $hOutTab "# assembly data source: %s%s\n", $ArgSrc[0]||"''", (@ArgSrc>1) ? ' ...':'';
  printf $hOutTab "# total contigs: %d\n", int keys %{$$pCtgStruct{contig}};
  printf $hOutTab "# total readings: %d\n", int keys %{$$pCtgStruct{read}};
  print  $hOutTab "# \n";
  print  $hOutTab "# column labels:\n";
  print  $hOutTab "# clone\tCloneComplete\tCloneDispsCtg\treads"
    ."\tcontig\tCtgEnd\tCtgEndDist\tContinProb\n";

  # list of clones per contig end
  foreach $pSite (sort {
     $a->{contig}{id} cmp $b->{contig}{id} or
    $a->{cend}{idnum} cmp $b->{cend}{idnum} or
     $b->{ContinProb} <=> $a->{ContinProb} or
      $a->{clone}{id} cmp $b->{clone}{id}
  } @site) {
    $pClone = $$pSite{clone};
    printf "%s\t%d\t%d\t%s\t%s\t%s\t%d\t%s\n",
      $$pClone{id}, $$pClone{SeqStatus}, $$pClone{DispsRead},
      join(',',map{$_->{id}}@{$$pSite{read}}),
      $$pSite{contig}{id}, $$pSite{cend}{idnum}, $$pSite{CEndDist},
      $$pSite{ContinProb};
  }
  $hOutTab->close();
}


# report instances of paired readings corresponding to clone
#
# INTERFACE
# - argument 1:    assembly data source
# - argument 2*:   either additional assembly data source(s) (like Arg1) or
#                  contig specifier(s) for selection
#
# - global options:
#   -debug         [STD]
#   -OutReport     save showrelationship report to specified file
#   -OutTab        output list of paired clones to file
#   -RcTgtSpecif   [STD]
#   -SlcCnum       [STD], this is not recommended
#   -SlcLen        [STD], this is not recommended
#   -SlcSpecif     [STD], this is not recommended
#   -SlcRnum       [STD], this is not recommended
#   -SlcSpecifDlt  [STD]
#   -timer         [STD]
#   -verbose       print extended protocol to STDERR
#
# DESCRIPTION
# - ReadWatcher.pl -CloneLen depends on this function!!!
#
# DEBUG, CHANGES, ADDITIONS
# - i would like to drop this program mode. However, ReadWatcher.pl -CloneLen
#   depends on it!
#
sub ProgPairReport {
  my (@ArgSrc) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;
  $debug and printf STDERR "%s. analysis of assembly data source %s%s\n", &MySub,
    $ArgSrc[0]||"''", (@ArgSrc>1) ? ' ...':'';

  # get contig set data structure from assembly data source
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pCtgStruct = &ContigStructMeta ([@ArgSrc],
    -pair  => 1,  # pair analysis always!
    %ProgOpt,
    -cend  => 0,  # calculate cend binding for all reads
    -debug => $dbg2);
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ContigStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct
  $bTimer and printf STDERR "%s. CPU time for retrieving contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;

  ##############################################################################
  # analysis
  my ($pClone,$pRead);
  my (%CloneToContigs,@unpair,@pair,$PlainList);

  # loop over contigs
  # at contig ends: sample short reads, reads at clone ends
  ProgPairReportClone:
  foreach $pClone (values %{$$pCtgStruct{clone}}) {
    if ($$pClone{SeqStatus} != 1) { next }

    # analyse clone - get name fields
    $debug and printf STDERR "%s. analysing clone %s\n", &MySub, $$pClone{id};
    $$pClone{field} ||= &ReadidToFields ($$pClone{id}.'.s1', -debug=>$dbg2);

    # clone consistently residing in one contig?
    foreach $pRead (values %{$$pClone{'1'}}, values %{$$pClone{'-1'}}) {
      unless (defined($pRead) and ref($pRead) eq 'HASH') {
        printf STDERR "%s. WARNING: undefined read sub-structure, anchored in clone %s\n", &MySub, $$pClone{id};
        next;
      }
      $CloneToContigs{$$pClone{id}}{$$pRead{contig}{idnum}} = $$pRead{contig};
    }
    if (keys(%{$CloneToContigs{$$pClone{id}}}) > 1) {
      $$pClone{DiagnMethod} = 'contig-disperse';
      push @unpair, $pClone;
      next;
    } else {
      ($$pClone{contig}) = values %{$CloneToContigs{$$pClone{id}}};
    }

    # derive strandedness of clone
    foreach my $ItEnd (qw(1 -1)) {
      foreach $pRead (values %{$$pClone{$ItEnd}}) {
        $$pClone{orient} ||= $ItEnd * $$pRead{orient};

        # all reads of clone showing consistent strandedness?
        if ($$pRead{orient}*$ItEnd != $$pClone{orient}) {
          $$pClone{DiagnMethod} = 'strandedness';
          push @unpair, $pClone;
          next ProgPairReportClone;
        }
      }
    }

    # start and end positions of clone
    # positions refer to the original contig situation, irrespective of read/clone orient.
    foreach $pRead (values %{$$pClone{'1'}}, values %{$$pClone{'-1'}}) {
      $$pClone{pos}{'-1'} ||= $$pRead{pos}{'-1'};
      $$pClone{pos}{'-1'}   = &Min ($$pClone{pos}{'-1'}, $$pRead{pos}{'-1'});
      $$pClone{pos}{'1'} ||= $$pRead{pos}{'1'};
      $$pClone{pos}{'1'}   = &Max ($$pClone{pos}{'1'}, $$pRead{pos}{'1'});
    }

    # have all reads consistent spacial arrangement in relation to read strandedness?
    foreach $pRead (values %{$$pClone{'1'}}) {
      if (
        ($$pClone{orient}>0 and $$pRead{pos}{'-1'}-150 > $$pClone{pos}{'-1'}) or
        ($$pClone{orient}>0 and $$pRead{pos}{'1'} -150 > $$pClone{pos}{'1'}) or
        ($$pClone{orient}<0 and $$pRead{pos}{'1'} +150 < $$pClone{pos}{'1'}) or
        ($$pClone{orient}<0 and $$pRead{pos}{'-1'}+150 < $$pClone{pos}{'-1'})
      ) {
        $$pClone{DiagnMethod} = 'divergence';
          # further information, like actual read offset/end and orientation may
          # be got from output of Gap.pl -index
        push @unpair, $pClone;
        next ProgPairReportClone;
      }
    }

    # reads consistently paired, determine length of clone
    $$pClone{length} = $$pClone{pos}{'1'} - $$pClone{pos}{'-1'} + 1;
    push @pair, $pClone;
  }

  ##############################################################################
  # prepare report

  # consistent clones
  foreach $pClone (sort {
    $a->{field}{lib} cmp $b->{field}{lib} or
    $a->{field}{plt} <=> $b->{field}{plt} or
    $a->{field}{coo} cmp $b->{field}{coo}
  } @pair) {
    $PlainList .= sprintf ("%s\t%s\t%s\t%d\n", $$pClone{id},
      $$pClone{contig}{source}{path}, $$pClone{contig}{id},
      $$pClone{length});
  }
  print  "# $ProgFile -$ProgMode\n";
  printf "# date/time: %s\n", &TimeStr();
  printf "# assembly data source: %s%s\n", $ArgSrc[0]||"''", (@ArgSrc>1) ? ' ...':'';
  printf "# number of contigs: %d\n", int keys %{$$pCtgStruct{contig}};
  print  "#\n";
  print  "# CONSISTENT CLONES, CLONE LENGTHS\n";
  print  "#\n";
  print  "# column labels:\n";
  print  "# clone\tassembly\tcontig\tclone_len\n";
  print $PlainList;
  if ($ProgOpt{-OutTab}) {
    open  (OUTTAB, ">$ProgOpt{-OutTab}");
    print  OUTTAB "# $ProgFile -$ProgMode\n";
    printf OUTTAB "# date/time: %s\n", &TimeStr();
    printf OUTTAB "# assembly data source: %s%s\n", $ArgSrc[0]||"''", (@ArgSrc>1) ? ' ...':'';
    printf OUTTAB "# number of contigs: %d\n", int keys %{$$pCtgStruct{contig}};
    print  OUTTAB "# \n";
    print  OUTTAB "# column labels:\n";
    print  OUTTAB "# clone\tassembly\tcontig\tclone_len\n";
    print  OUTTAB $PlainList;
    close  OUTTAB;
  }

  # inconsistent clones
  print  "#\n";
  print  "# CLONES SHOWING INCONSISTENCIES\n";
  print  "#\n";
  print  "# legend of inconsistency categories:\n";
  print  "# \"contig-disperse\"  readings from same clone reside on different contigs.\n";
  print  "#                    These cases do not necessarily reflect inconsistencies.\n";
  print  "# \"strandedness\"     readings primed from contrary ends of the clone\n";
  print  "#                    show the same orientation\n";
  print  "# \"divergence\"       readings primed from contrary ends of the clone\n";
  print  "#                    are leading off from each other\n";
  print  "#\n";
  print  "# column labels:\n";
  print  "# clone\tdiagn_method\tcontigs\n";
  foreach $pClone (sort {
    $a->{field}{lib} cmp $b->{field}{lib} or
    $a->{field}{plt} <=> $b->{field}{plt} or
    $a->{field}{coo} cmp $b->{field}{coo}
    } @unpair
  ) {
    my %contig;
    foreach $pRead (values %{$$pClone{'1'}}, values %{$$pClone{'-1'}}) {
      $contig{$$pRead{contig}{id}} = 1;
    }
    printf "%s\t%s\t%s\n", $$pClone{id},
      $$pClone{DiagnMethod}, join(',',keys(%contig));
  }
}


# calculate and report scaffold map
#
# INTERFACE
# - argument 1:    assembly data source
# - argument 2*:   either additional assembly data source(s) (like Arg1) or
#                  contig specifier(s) for selection
#
# - global options:
#   -debug         print debug protocol to STDOUT
#   -export        export specified number of scaffolds into separate
#                  databases
#   -OutImg        save image representations of statistics etc.
#   -OutImgWidth   output image pixel width
#   -OutReport     save showrelationship report to specified file
#   -OutStump      [STD]
#   -RcCloneLen    [STD]
#   -RcTgtSpecif   [STD]
#   -SlcCnum       no effect, overwritten by call of &ContigStructMeta
#   -SlcLen        [STD], taking effect on computed scaffolds
#   -SlcEnds       no effect
#   -SlcLen        [STD], taking effect on computed scaffolds
#   -SlcRnum       no effect
#   -SlcSpecif     [STD], taking effect on computed scaffolds
#   -SlcSpecifDlt  [STD], taking effect on computed scaffolds
#   -timer         print time-performance protocol to STDOUT
#   -verbose       print extended protocol to STDOUT
#
# DESCRIPTION
# - for description of scaffold set data structure see
#   &SeqAlign::ScaffdAssemb::ScaffdStruct
#
sub ProgScaffdMap {
  my (@ArgSrc) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;
  my %file; $file{OutStump} = &PrepOstump();

  # get scaffold set data structure
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pCtgStruct = &ScaffdStruct ([@ArgSrc],
    %ProgOpt,  # including -debug
    -OutStump  => $file{OutStump},
     defined($ProgOpt{-SlcCnum}) ?
   (-SlcCnum   => $ProgOpt{-SlcCnum}) : (),
    -SlcRnum   => undef,  # -SlcRnum would take effect in &ContigStruct
    -SlcSingle => 1,  # singlet contigs, not singlet reads!
    );
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ScaffdStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct or &SeqAlign::ScaffdAssemb::ScaffdStruct
  $bTimer and printf "%s. CPU time for retrieving scaffold set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;

  ##############################################################################
  # list of clones
  my ($pRead,@column,$pClone);

  # report header
  $file{RptClone} = "$file{OutStump}_Clone.tab";
  my $hOutTab = &GetWriteHandle ($file{RptClone}, -exit=>1);
  print  $hOutTab "$$pCtgStruct{report}{theader}";
  print  $hOutTab "# clone list\n";
  print  $hOutTab "#\n";
  print  $hOutTab "$$pCtgStruct{report}{tdatabase}";
  print  $hOutTab "#\n";
  printf $hOutTab "# column labels:\n# %s\n",
    join ("\t", @column=qw(id length end_reads custom_reads error error_custom
      contig_disp_prmend contig_prmend contig_extra));

  # loop over clones
  foreach $pClone (values %{$$pCtgStruct{clone}}) {
    if ($$pClone{SeqStatus} < 1) { next }

    # contigs that harbor end-primer-derived readings
    my %ctg = map{($_=>$_)} map{@{$_||[]}} &DataTreeSlc($pClone,
      [['\b1$','regexp'],[0,'all'],['contig']], -unique=>1);
    # fill first report fields
    my %report = (
      id => $$pClone{id},
      end_reads => join (' ', map{@{$_||[]}} &DataTreeSlc($pClone,
        [['\b1$','regexp'],[0,'all'],['id']], -unique=>1) ),
      custom_reads => join (' ', map{@{$_||[]}} &DataTreeSlc($pClone,
        [['0'],[0,'all'],['id']], -unique=>1) ),
      error => int(@{$$pClone{DispsPrd}||[]}) ? $$pClone{PairError}:'',
      error_custom => int(@{$$pClone{DispsPlusOther}||[]}) ?
        ('extra contigs: '.join(' ',@{$$pClone{DispsPlusOther}||[]})) : '',
      contig_disp_prmend => $$pClone{DispsClnEnd},
      contig_prmend => join (' ', map{$_->{id}||''} values %ctg),
      );

    # determine clone length (single contig)
    ProgScaffdMapCLen: {
    if (!$report{error} and int(values(%ctg))==1) {

      # derive strandedness of clone, check consistency
      foreach my $ItEnd (qw(1 -1)) {
        foreach $pRead (values %{$$pClone{$ItEnd}}) {
          $$pClone{orient} ||= $ItEnd * $$pRead{orient};
          if ($$pRead{orient}*$ItEnd != $$pClone{orient}) {
            $report{error} = 'end read orientation in single contig';
            last ProgScaffdMapCLen;
          }
        }
      }

      # start/end/length values of clone on contig
      # positions refer to the original contig situation, irrespective of
      #   read/clone orientation
      foreach $pRead (values(%{$$pClone{'1'}}), values(%{$$pClone{'-1'}})) {
        $$pClone{pos}{'-1'} ||= $$pRead{pos}{'-1'};
        $$pClone{pos}{'-1'}   = &Min ($$pClone{pos}{'-1'}, $$pRead{pos}{'-1'});
        $$pClone{pos}{'1'} ||= $$pRead{pos}{'1'};
        $$pClone{pos}{'1'}   = &Max ($$pClone{pos}{'1'}, $$pRead{pos}{'1'});
      }
      $$pClone{length} = $$pClone{pos}{'1'} - $$pClone{pos}{'-1'} + 1;
      $$pClone{lcutoff} = &CloneLenCutoff ($$pClone{field}{lib}||'fake',
        -rc=>$ProgOpt{-RcCloneLen}, -default=>1, -debug=>$dbg2) || 0;
      if ($$pClone{lcutoff} and $$pClone{length}>$$pClone{lcutoff}) {
        $report{error} = "clone length in single contig, > cutoff $$pClone{lcutoff}";
        last ProgScaffdMapCLen;
      }

      # do custom-primed reads have consistent spacial arrangement in relation
      #   to end-derived reads?
      if (values %{$$pClone{'0'}} and !$report{error_custom}) {
        my $TopolToler = 150;
        my $TopolRange = Math::Range->new([$$pClone{pos}{'-1'}-150, $$pClone{pos}{'1'}+150]);
        foreach $pRead (values %{$$pClone{'0'}}) {
          if (!($TopolRange->covers($$pRead{pos}{'-1'}) and
                $TopolRange->covers($$pRead{pos}{'1'}))
          ) {
            $report{error_custom} = 'topology of custom-primed reads';
            last ProgScaffdMapCLen;
          }
        }
      }

    } } # end label "ProgScaffdMapCLen"
    $report{length} ||= $$pClone{length};

    # determine least clone length of dispersed clone
    if (! $report{error} and int(values(%ctg))==2) {
      $$pClone{length} =
        &Max (map{ $_->{CEndDist} } values %{$$pClone{'1'}}) +
        &Max (map{ $_->{CEndDist} } values %{$$pClone{'-1'}});
      $$pClone{lcutoff} = &CloneLenCutoff ($$pClone{field}{lib}||'fake',
        -rc=>$ProgOpt{-RcCloneLen}, -default=>1, -debug=>$dbg2) || 0;
      if ($$pClone{lcutoff} and $$pClone{length}>$$pClone{lcutoff}) {
        $report{error} = "clone length in two contigs, > cutoff $$pClone{lcutoff}";
      }
      $$pClone{length} = ">$$pClone{length}";
    }
    $report{length} ||= $$pClone{length};

    # output entry
    print  $hOutTab join("\t",@report{@column}), "\n";
  }
  #if ((getlogin()||getpwuid($<)) eq 'szafrans') { exit }

  ##############################################################################
  # scaffold list, statistical report (incl. optional image)
  my ($pScaffd);

  # table header
  $bTimer and $time = &Sum ((times)[0,2]);
  $file{RptScaffd} = "$file{OutStump}_Scaffd.tab";
  $hOutTab = &GetWriteHandle ($file{RptScaffd}, -exit=>1);
  print  $hOutTab "$$pCtgStruct{report}{theader}";
  print  $hOutTab "# scaffold list\n";
  print  $hOutTab "#\n";
  print  $hOutTab "$$pCtgStruct{report}{tdatabase}";
  print  $hOutTab "#\n";
  printf $hOutTab "# column labels:\n# %s\n",
    join ("\t", 'id', 'CtContigMember', 'CtContigAll', 'ContigLenMember');
  foreach $pScaffd (sort{ &CendIdSort($a->{cend}[0]{id},$b->{cend}[0]{id}) }
    values %{$$pCtgStruct{scaffd}}
  ) {
    printf $hOutTab "%s\t%d\t%d\t%d\n", $$pScaffd{id},
      $$pScaffd{CtContig}{member}, $$pScaffd{CtContig}{all},
      $$pScaffd{length};
  }

  # image showing cumulative scaffold length
  if (defined($ProgOpt{-OutImg})) {
    my ($pPlotRaw, $poPlot, @PlotCumullen, $integral, %img, %graph, $hOutDat);

    # calculate entry cumulation dependent on length
    $pPlotRaw = map{${$_||[]}[3]} scalar &PlainToTable ($file{RptScaffd},
      -TabType=>'AC', -comments=>1);
    if ($debug) {
      printf "%s. %d length entries in scaffold report\n", &MySub, int @$pPlotRaw;
      printf "  first 10 entries: %s\n", join (', ', @{$pPlotRaw}[0 .. &Min(9,$#$pPlotRaw)]);
    }
    $poPlot = Math::Plot2D->new(&DistribEmpir (
      [ map{ [log($_)/log(10),$_ ] } @$pPlotRaw ],
      -debug=>$dbg2), -debug=>$dbg2);
    $img{width} = $ProgOpt{-OutImgWidth} || $ProgParam{default}{OutImgWidth};
    $img{RelHigh} = $ProgOpt{-OutImgRelhigh} || $ProgParam{default}{OutImgRelhigh}{ScaffdLength};
    $img{StepNum} = $img{width} * 2;
    $img{StepSize} = ($poPlot->Xmax()-$poPlot->Xmin()) / $img{StepNum};
    $img{xCurr} = $img{xLast} = $poPlot->Xmin();
    while ($img{xCurr} <= $poPlot->Xmax()) {
      $integral += $poPlot->Integral ($img{xLast}, $img{xCurr});
      push @PlotCumullen, [ $img{xCurr}, $integral ];
      $img{xLast} = $img{xCurr};
      $img{xCurr} += $img{StepSize};
    }
    foreach (@PlotCumullen) { $_->[1] = $integral - $_->[1]; }

    # plot graph
    %graph = (
      BgTranspar => $ProgOpt{-OutImgTransp},
      plot => [
        { DimPixel  => { x=>$img{width} },
          HeightRel => $img{RelHigh},
          DataType  => 'AA',
          data      => \@PlotCumullen,
          ReprType  => 'line',
          ReprColor => 'red',
        },
        ],
      scale => [
        { PlotNum  => 0,
          location => 'bottom',
          color    => 'black',
        },
        { PlotNum  => 0,
          location => 'top',
          color    => 'black',
        },
        { PlotNum  => 0,
          location => 'left',
          color    => 'black',
        },
        { PlotNum  => 0,
          location => 'right',
          color    => 'black',
        },
        ],
      );

    # save plot graph data
    $file{ImgLenCumulData} = "$file{OutStump}_ScaffdLenCumul_img.dat";
    if ($hOutDat = &GetWriteHandle($file{ImgLenCumulData})) {
      &DataPrint (\%graph, -handle=>$hOutDat, -debug=>$dbg2);
    }
    # create image
    $file{ImgLenCumul} = "$file{OutStump}_ScaffdLenCumul.png";
    if (&Graph (\%graph, -save=>$file{ImgLenCumul}, -debug=>$dbg2)) {
      printf "saving file %s\n", $file{ImgLenCumul};
    } else {
      print  STDERR "ERROR: unable to create graph image (cumulative length distribution)\n";
    }

    $bTimer and printf "%s. CPU time for scaffold lengths statistics report: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
  }

  ########################################################################
  # report of bridging clones

  # output file
  $file{BridgeClone} = "$file{OutStump}_BridgeClone.foid";
  $hOutTab = &GetWriteHandle ($file{BridgeClone}, -exit=>1);
  foreach my $pBridge (map{@{$_||[]}}
    &DataTreeSlc ($$pCtgStruct{bridge}, [[0,'all'],[0,'all']], -unique=>1)
  ) {
    $$pBridge{isWinner} or next;
    print  $hOutTab map{"$_->{id}\n"} map{@{$_||[]}} $$pBridge{clone};
  }

  ##############################################################################
  # HTML output of scaffold map

  # table header
  $bTimer and $time = &Sum ((times)[0,2]);
  my ${LineFeed} = "<BR>\n";
  my ${space} = '&nbsp;';
  $file{RptMap} = "$file{OutStump}_map.html";
  my $hOutHtml = &GetWriteHandle ($file{RptMap}, -exit=>1);

  # header, database information
  printf $hOutHtml "<HTML><HEAD><TITLE>scaffold map %s%s - %s</TITLE></HEAD>\n",
    &PathSplit($ArgSrc[0])->{name}, (@ArgSrc>1) ? ' ...':'',
    &TimeStr (-format=>'CompactComp');
  print  $hOutHtml "<BODY>";
  print  $hOutHtml "<PRE>\n$$pCtgStruct{report}{header}</PRE>\n";

  print  $hOutHtml "${LineFeed}${LineFeed}";
  print  $hOutHtml "CONTIG DATA SOURCE${LineFeed}";
  printf $hOutHtml "<PRE>$$pCtgStruct{report}{database}</PRE>";

  # scaffold map I
  print  $hOutHtml "${LineFeed}${LineFeed}";
  print  $hOutHtml "PSEUDO-CONTIG MAP I. - Join Via Winning Bridges${LineFeed}<PRE>";
  print  $hOutHtml $$pCtgStruct{report}{map1}{BridgeStatist};
  print  $hOutHtml "\n";
  print  $hOutHtml $$pCtgStruct{report}{map1}{CendStatist};
  printf $hOutHtml "  conflicting at same end: %d\n", int @{$$pCtgStruct{report}{map1}{CendConfl}};
  print  $hOutHtml "\n";
  printf $hOutHtml "scaffolds:\n";
  printf $hOutHtml "  non-singlets (holding contigs): $$pCtgStruct{report}{map1}{scaffd} ($$pCtgStruct{report}{map1}{ScaffdMember})\n";
  printf $hOutHtml "  singlets: $$pCtgStruct{report}{map1}{singlet}\n";
  print  $hOutHtml "</PRE>";

  # scaffold map II
  print  $hOutHtml "${LineFeed}${LineFeed}";
  print  $hOutHtml "PSEUDO-CONTIG MAP II. - Resolve Staggered References${LineFeed}";
  printf $hOutHtml "<PRE>$$pCtgStruct{report}{map2}{CendStatist}";
  printf $hOutHtml "  conflicting at same end: %d\n", int @{$$pCtgStruct{report}{map2}{CendConfl}};
  print  $hOutHtml "\n";
  printf $hOutHtml "scaffolds:\n";
  printf $hOutHtml "  non-singlets (holding contigs): $$pCtgStruct{report}{map2}{scaffd} ($$pCtgStruct{report}{map2}{ScaffdMember})\n";
  printf $hOutHtml "  singlets: $$pCtgStruct{report}{map2}{singlet}\n";
  print  $hOutHtml "</PRE>";
  if (%{$$pCtgStruct{scaffd}}) {
    foreach $pScaffd (sort{ &CendIdSort($a->{cend}[0]{id},$b->{cend}[0]{id}) }
      grep{ $_->{CtContig}{member}>1 or $_->{CtContig}{satellite} or $_->{CtContig}{conflict} }
      values %{$$pCtgStruct{scaffd}}
    ) {
      print  $hOutHtml "${LineFeed}${LineFeed}", &ScaffdPlain($pScaffd,-html=>1);
    }
  } else {
    print  $hOutHtml "*** NO PSEUDO-CONTIGS ***${LineFeed}";
  }

  # end document
  print  $hOutHtml "</BODY></HTML>";
  $bTimer and printf "%s. CPU time for HTML output of scaffold map: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;

  ##############################################################################
  # export scaffolds
  if ($ProgOpt{-export}) {
    printf "%s. starting scaffold export\n", &MySub;
    my (@ScaffdExport, $CtExport, @ScaffdExportContig, $sCall, $pCall);

    # sorted list of scaffolds
    # export pre-work
    @ScaffdExport = sort {
             $b->{length} <=> $a->{length} or
      $b->{CtContig}{all} <=> $a->{CtContig}{all} or
                       $a cmp $b;
      } grep{ $_->{CtContig}{all}>=2 } values %{$$pCtgStruct{scaffd}};
    $file{RptExport} = "$file{OutStump}_export.log";

    # export scaffolds
    for ($CtExport=0; $CtExport<$ProgOpt{-export}; $CtExport++) {

      # scaffold, contributing contigs (incl. conflicts, satellites)
      $pScaffd = $ScaffdExport[$CtExport];
      @ScaffdExportContig = map{@{$_||[]}} &ScaffdContigs ($pScaffd, -join=>1);

      # manage file paths
      # manage multiple GAP4 database sources for member contigs: *** implement me ***
      $file{source} = $ArgSrc[0];
      $file{TgtFull} = &PathUnique (-name=>sprintf("%s_psi#.0",&PathSplit($file{source})->{nameroot}),
        -dir=>&PathSplit($file{source})->{dir}, -CtStart=>1);
      $file{TgtNum} = ($file{TgtFull} =~ m/psi(\d+)/) ? $1 : $CtExport;
      $debug and printf STDERR "%s. target file for contig export: %s\n", &MySub,
        $file{TgtFull}||"''";

      # log export
      if ($hOutTab = &GetWriteHandle($file{RptExport},-access=>'a',-filestm=>'export log file')) {
        print  $hOutTab "\n", ('#' x 80), "\n\n";
        printf $hOutTab "scaffold: #%s\n", $$pScaffd{id};
        printf $hOutTab "target database: %s\n", $file{TgtFull};
        printf $hOutTab "contig number: %d\n", int @ScaffdExportContig;
        printf $hOutTab "total contig size: %d\n",
          &Sum (map{$_->{length}} @ScaffdExportContig);
        printf $hOutTab "contigs by number: %s\n",
          join (', ', map{'#'.$_->{idnum}} @ScaffdExportContig);
        printf $hOutTab "contigs by readings: %s\n",
          join (', ', map{$_->{id}} @ScaffdExportContig);
        print  $hOutTab "\n";
        print  $hOutTab &ScaffdPlain ($pScaffd);
        print  $hOutTab "\n";
      }

      # export, log DB info
      $pCall = &CallExtClosure ($CorePath{call}{GapContigMvSafe}, "-log=$file{RptExport}",
        $file{source}, $file{TgtFull}, map{ $_->{id} } @ScaffdExportContig);
      unless (&$pCall()) {
        printf STDERR "%s. ERROR: problems in moving scaffold to separate database, call was:\n  %s\n", &MySub,
          &$pCall ('WhatRUCalling');
        next;
      }
      $sCall = join (' ', $CorePath{call}{GapDbInfo}, $file{TgtFull}, ">>$file{RptExport}");
      &GapSafeCall ($sCall);

      # find reverse reads
      $file{stamp}  = &TimeStr (-format=>'CompactComp');
      $file{stamp} .= sprintf ('_AddRev_psi%d', $file{TgtNum});
      $pCall = &CallExtClosure ($CorePath{call}{GapAddRev}, '-SlcEnds=4500',
        "-OutStump=$file{stamp}", "-log=$file{RptExport}", $file{TgtFull});
      &$pCall() or print STDERR "ERROR: call to add reverse reads failed\n";
      $pCall = &CallExtClosure ($CorePath{call}{GapAssembNew}, $file{TgtFull},
        "$file{stamp}_got.foid", '>/dev/null');
      unless (&PhysicalAccessCtrl ($file{TgtFull}, -mode=>'func', -noLock=>1,
        -func => $pCall,
        -mail => { ProcStamp=>"$ProgFile -$ProgMode" },
        -log  => 1)) {
        printf STDERR "%s. ERROR: assembly of reverse reads failed, call was:\n  %s\n", &MySub,
          &$pCall ('WhatRUCalling');
        next;
      }
    }
  }
}


# list scaffolds
#
# INTERFACE
# - argument 1:    assembly data source
# - argument 2*:   either additional assembly data source(s) (like Arg1) or
#                  contig specifier(s) for selection
#
# - global options:
#   -debug         [STD]
#   -OutReport     save showrelationship report to specified file
#   -OutTab        output file for scaffold list, default: STDOUT
#   -RcTgtSpecif   use this rc file for target specificity library
#   -SlcEnds       define contig end range for pair analysis (use is not
#                  recommended)
#   -SlcLen        [STD], sum of member lengths
#   -SlcSpecif     [STD]
#   -SlcSpecifDlt  [STD]
#   -timer         [STD]
#
# DESCRIPTION
# - for description of scaffold set data structure see
#   &SeqAlign::ScaffdAssemb::ScaffdStruct
#
sub ProgScaffdList {
  my (@ArgSrc) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;

  # get scaffold set data structure
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pCtgStruct = &ScaffdStruct ([@ArgSrc], %ProgOpt,
    -SlcSingle => 1,
    -SrcSpecif => 1,  # always report source specifity data
    );
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ScaffdStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct or &SeqAlign::ScaffdAssemb::ScaffdStruct

  # sort scaffolds
  my @ScaffdReport = sort {
              $b->{length} <=> $a->{length} or
    $b->{CtContig}{member} <=> $a->{CtContig}{member} or
       $b->{CtContig}{all} <=> $a->{CtContig}{all} or
    $a cmp $b; } values %{$$pCtgStruct{scaffd}};
  $bTimer and printf "%s. CPU time for retrieving scaffold set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;

  my (@ScaffdReport, $pCtg);

  ##############################################################################
  # report

  # output destination
  my $hOutTab = $ProgOpt{-OutTab} ?
    FileHandle->new($ProgOpt{-OutTab},'w') : \*STDOUT;

  # report header
  print  $hOutTab "$$pCtgStruct{report}{theader}";
  print  $hOutTab "# scaffold list\n";
  print  $hOutTab "#\n";
  print  $hOutTab "$$pCtgStruct{report}{tdatabase}";
  if ($ProgOpt{-SlcLen}) {
  printf $hOutTab "# scaffold length selector: %s\n", $ProgOpt{-SlcLen};
  }
  if ($ProgOpt{-SlcSpecif}) {
  printf $hOutTab "# source specificity selector: %s\n", $ProgOpt{-SlcSpecif};
  }
  my @column = qw(scaffd_id  member_num  length  member_maxlen  members  Chr1 Chr2 Chr3 Chr45 Chr6);
  printf $hOutTab "#\n# column labels:\n# %s\n", join("\t",@column);

  # loop over scaffolds
  foreach my $pScaffd (@ScaffdReport) {
    ($pCtg) = sort{ $b->{length}<=>$a->{length} } map{@{$_||[]}}
      scalar &ScaffdContigs($pScaffd);
    my %report = (
      scaffd_id     => $$pScaffd{id},
      member_num    => $$pScaffd{CtContig}{member},
      length        => $$pScaffd{length},
      member_maxlen => $$pCtg{id},
      members       => join (' ', map{$_->{id}} @{$$pScaffd{member}}),
      );
    @report{qw(Chr1 Chr2 Chr3 Chr45 Chr6)} = @{$$pScaffd{SrcSpecif}}{qw(Chr1 Chr2 Chr3 Chr45 Chr6)};
    print  $hOutTab join("\t",@report{@column}), "\n";
  }
}


# output scaffold sequences
#
# INTERFACE
# - argument 1:    GAP4 database
# - argument 2*:   either additional assembly data source(s) (like Arg1) or
#                  contig specifier(s) for selection
#
# - global options:
#   -debug         [STD]
#   -OutReport     save showrelationship report to specified file
#   -OutReport     [STD]
#   -RcCloneLen    [STD]
#   -RcTgtSpecif   [STD]
#   -SlcLen        [STD], sum of member lengths
#   -SlcSpecif     [STD]
#   -SlcSpecifDlt  [STD]
#   -timer         [STD]
#
# DESCRIPTION
# - for description of scaffold set data structure see
#   &SeqAlign::ScaffdAssemb::ScaffdStruct
#
sub ProgScaffdSeq {
  my (@ArgSrc) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $spacer = $ProgOpt{-var}{spacer} || $ProgParam{default}{GapSpacer}{scaffd};
  my $bTimer = $ProgOpt{-timer}; my $time;

  # get scaffold set data structure
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pCtgStruct = &ScaffdStruct ([@ArgSrc], %ProgOpt,
    -SlcSingle => 1,
    -SrcSpecif => 1,  # always include source specificity data
    );
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ScaffdStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct or &SeqAlign::ScaffdAssemb::ScaffdStruct

  # sort scaffolds
  my @ScaffdReport = sort {
              $b->{length} <=> $a->{length} or
    $b->{CtContig}{member} <=> $a->{CtContig}{member} or
       $b->{CtContig}{all} <=> $a->{CtContig}{all} or
    $a cmp $b; } values %{$$pCtgStruct{scaffd}};
  $bTimer and printf "%s. CPU time for retrieving scaffold set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;

  ##############################################################################
  # compile scaffold sequences

  # loop over scaffolds
  foreach my $pScaffd (@ScaffdReport) {

    # concatenate contig sequences
    my $pSeq = &ContigSeq ($$pScaffd{cend}[0]{contig});
    my $pSeqNext;
    delete $$pSeq{EN};
    if ($$pScaffd{cend}[0]{idnum} > 0) {
      $pSeq = &SeqRevcompl ($pSeq, -debug=>$dbg2);
    }
    foreach my $pCend (@{ &ScaffdCendLocat ($$pScaffd{cend}[0])->{cend}{'-1'}{'1'} }) {
      unless ($pSeqNext = &ContigSeq ($$pCend{contig})) {
        printf STDERR "ERROR: unable to retrieve sequence for contig %s\n", $$pCend{contig}||"''";
        next;
      }
      if ($$pCend{idnum} > 0) {
        $pSeqNext = &SeqRevcompl ($pSeqNext, -debug=>$dbg2);
      }
      $pSeq = &SeqConcat ($pSeq, $pSeqNext, -spacer=>$spacer, -debug=>$dbg2);
    }

    # finish sequence data structure
    $$pSeq{id} = $$pScaffd{id};
    chop $$pSeq{id};
    $$pSeq{descr} = 'scaffold, source specificity: ' .
      join (', ', map{"$_ $$pScaffd{SrcSpecif}{$_}"}
        sort keys %{$$pScaffd{SrcSpecif}});
    printf &SeqentryToExper ($pSeq);
  }
}


# map contigs of assembly data input according to mapped reads contained
# in the assembly
#
# INTERFACE
# - argument 1:    map data file, table with column order:
#                    MarkerID, MapGroup, MapPos
#                  A MapPos value of -1 tells us that the marker is
#                  unmapped.
# - argument 2+:   assembly data sources
#
# - global options:
#   -debug         [STD]
#   -OutStump      [STD]
#   -OutTab        same as -OutStump - we have multi-file tabular output.
#                  Default: $ProgParam{default}{GapIndex}
#   -RcTgtSpecif   [STD], takes effect in &ContigStruct
#   -SlcCnum       [STD], not implemented for -SlcData=scaffd
#   -SlcData       possible: contig=read, scaffd
#   -SlcLen        [STD]
#   -SlcRnum       [STD], not implemented for -SlcData=scaffd
#   -SlcSpecif     [STD], takes effect in &ContigStruct
#   -SlcSpecifDlt  [STD], takes effect in &ContigStruct
#   -timer         [STD]
#
# DEBUG, CHANGES, ADDITIONS
# - important verification step: warn for contigs which have high disagreement
#   in marker/map positions
#
sub ProgMapbyRead {
  my ($PathMap,@ArgSrc) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;
  my %out;
  $out{base} = &PrepOstump();
  $out{method} = $ProgMode;
  unless (@ArgSrc) {
    die sprintf "%s. ERROR: need a minimum of 2 args: <MapFile> <ContigSource>\n", &MySub;
  }

  ##############################################################################
  # get input data

  # get contig set data structure
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pCtgStruct = &ContigStructMeta ([@ArgSrc], %ProgOpt,
    -cend  => 0,  # calculate cend binding for all reads
    -debug => $dbg2);
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ContigStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct
  $bTimer and printf STDERR "%s. CPU time for constructing contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;

  # empty assembly data source
  unless (values %{$$pCtgStruct{contig}}) {
    print  STDERR "no contigs found\n";
    exit 0;
  }

  # read mapping data
  # - delete markers which have MapPos==-1 (not mapped)
  my $pMapTab = &PlainToTable ($PathMap, -TabType=>'HIA',
    -ColLabel=>['Marker ID', 'Linkage Group', 'Marker Pos'],
    -comments=>1, -debug=>$dbg2);
  unless ($pMapTab) {
    die sprintf "ERROR: no list entries in map data file %s\n", $PathMap||"''";
  }
  $debug and printf STDERR "%s. %d data line%s in input file %s\n", &MySub,
    int(keys %$pMapTab), (int(keys(%$pMapTab))==1) ? '':'s', $PathMap||"''";
  foreach (keys %$pMapTab) { if($$pMapTab{$_}[1]==-1) { delete $$pMapTab{$_}; } }
  foreach (values %$pMapTab) { $#$_=4 }

  ##############################################################################
  # pre-calculate map/seq dimensions
  #
  # - internal data structure %MapDat
  #   CtMarker::$GrpID  number of mapped & assembled markers, for particular
  #                     group
  #   CtMarkerSum       total number of mapped & assembled markers
  #   DimMap::$GrpID    &Max (@MapPos) - &Min (@MapPos)
  #   DimMapSum         total map size (unit scale)
  #   DimSeqSum         total length of mapped contigs (nt scale)
  #   MarkerDistSmp     reference to array of marker distance samples (one
  #                     sample per group), each sample being a pair (array) of:
  #                     [0]  mean marker distance between pair of markers
  #                     [1]  size of distance sample
  #                     This is used as input to &SampleMetricsWgt
  #   MarkerDist        result of marker distance sample analysis (done by
  #                     &SampleMetricsWeighted). This is a hash reference.
  my (%MapDat,$ItGrp,@SlcData);
  my ($pCtg,$pCtgPrev,@GrpCtg,$pRead);

  # enter map positions into read data structures
  foreach $pRead (values %{$$pCtgStruct{read}}) {
    if (exists($$pMapTab{$$pRead{id}}) and $$pMapTab{$$pRead{id}}) {
      $$pRead{map} = {
        group => $$pMapTab{$$pRead{id}}[0],
        pos   => $$pMapTab{$$pRead{id}}[1],
        };
      $MapDat{CtMarkerSum} ++;
      push @{$$pRead{contig}{MapRead}}, $pRead;
    }
  }
  unless ($MapDat{CtMarkerSum}) {
    printf STDERR "no mapped readings found - exiting\n";
    exit;
  }
  $debug and printf STDERR "%s. %d mapped and assembled marker%s\n", &MySub,
    $MapDat{CtMarkerSum}, ($MapDat{CtMarkerSum}==1) ? '':'s';

  # size of map ranges for each group
  foreach $ItGrp (map{ @{$_||[]} }
    &DataTreeSlc ($pMapTab, [[0,'all'],[0]], -unique=>1)
  ) {
    $MapDat{CtMarker}{$ItGrp} = int (grep{ $_->[0] eq $ItGrp } values %$pMapTab);
    $MapDat{CtMarker}{$ItGrp} or next;
    $MapDat{DimMap}{$ItGrp} =
      &Max (map{ $_->[1] } grep{ $_->[0] eq $ItGrp } values %$pMapTab) -
      &Min (map{ $_->[1] } grep{ $_->[0] eq $ItGrp } values %$pMapTab);
    push @{$MapDat{MarkerDistSmp}}, [
      $MapDat{DimMap}{$ItGrp} / (($MapDat{CtMarker}{$ItGrp}-1) || 0.001),
      (($MapDat{CtMarker}{$ItGrp}-1) || 0.001),
      ];
    $debug and printf STDERR "%s. map group %s: %d marker%s, spanning %s map units\n", &MySub,
      $ItGrp, $MapDat{CtMarker}{$ItGrp}, ($MapDat{CtMarker}{$ItGrp} == 1) ? '':'s', $MapDat{DimMap}{$ItGrp};
  }

  # total seq size of mapped contigs
  foreach $pCtg (grep{ exists($_->{MapRead}) and @{$_->{MapRead}} }
          values %{$$pCtgStruct{contig}}) {
    $MapDat{DimSeqSum} += $$pCtg{length};
  }

  # total map size
  # - calculate mean of marker distance
  # - calculate map size as sum of observed map sizes per group and additional
  #   mean of marker distance
  $MapDat{MarkerDist} = &SampleMetricsWgt ($MapDat{MarkerDistSmp});
  unless ($MapDat{MarkerDist}{mean} and $MapDat{MarkerDist}{mean}) {
    die sprintf "%s. ERROR: no estimate of mean marker distance on map available\n", &MySub;
  }
  $MapDat{DimMapSum} = &Sum (values %{$MapDat{DimMap}})
    + int (values %{$MapDat{DimMap}}) * $MapDat{MarkerDist}{mean};
  unless ($MapDat{DimMapSum}) {
    die sprintf "%s. ERROR: unable to calc. total of map unit ranges, possibly singlet-only map\n", &MySub;
  }

  # map/seq conversion factors
  $MapDat{DimSbM} = $MapDat{DimSeqSum} / $MapDat{DimMapSum};
  $MapDat{DimMbS} = $MapDat{DimMapSum} / $MapDat{DimSeqSum};
  if ($debug) {
    printf STDERR "%s. total ranges: map %s, seq %s\n", &MySub,
      $MapDat{DimMapSum}, $MapDat{DimSeqSum};
    printf STDERR "%s. conversion factors for map/seq: s->m %s, m->s %s\n", &MySub,
      $MapDat{DimMbS}, $MapDat{DimSbM};
  }
  $debug and &DataPrint (\%MapDat, -handle=>\*STDERR);

  ##############################################################################
  # calculate map
  # - for single-group-mapped contigs:
  #   - determine centre of gravity for map/seq position correspondence
  #   - determine likely orientation (regression method)
  #   - determine likely contig orientation on map by spacial optimisation
  #     *** not implemented ***
  #   - construct preliminary map of contig positions (GFF output)
  #
  # - additions to internal data structure %MapDat
  #   CtgMapped         reference to array of mapped contigs
  #   CtgNonmapped      reference to array of non-mapped contigs
  #   GFF               preliminary map of contig positions in data
  #                     format close to GFF feature format.
  #                     structure hold only one map group at a time.

  # mapped and non-mapped markers
  @{$MapDat{CtgNonmapped}} = grep{ !exists($_->{MapRead}) or !int(@{$_->{MapRead}}) }
    values %{$$pCtgStruct{contig}};
  @{$MapDat{CtgMapped}} = grep{ exists($_->{MapRead}) and int(@{$_->{MapRead}}) }
    values %{$$pCtgStruct{contig}};

  # map single-marker-group-holding contigs
  foreach $pCtg (@{$MapDat{CtgMapped}}) {
    # determine map group
    @SlcData = map{ @{$_||[]} } &DataTreeSlc ($$pCtg{MapRead},
      [[0,'all'],['map'],['group']], -unique=>1);
    if (int(@SlcData) > 1) {
      printf STDERR "WARNING: contig %s is part of several map groups: %s\n",
        $$pCtg{id}, join (', ', @SlcData);
      next;
    }
    $$pCtg{MapAnchor}{group} = $SlcData[0];

    # determine centre of gravity for map/seq position correspondence
    $$pCtg{MapAnchor}{PosMap} = &Mean (map{ $_->{map}{pos} } @{$$pCtg{MapRead}});
    $$pCtg{MapAnchor}{PosSeq} = int &Mean (map{ ($_->{pos}{'-1'} + $_->{pos}{'1'}) / 2; } @{$$pCtg{MapRead}});

    # determine likely contig orientation on map by linear regression
    # default (errors?) => 1
    if (@{$$pCtg{MapRead}} > 1) {
      @SlcData = map {
        [ $_->{map}{pos}*$MapDat{DimSbM}, &Mean($_->{pos}{'-1'},$_->{pos}{'1'}) ]
        } @{$$pCtg{MapRead}};
      $debug and printf STDERR "%s. linear regression:\n%s", &MySub,
        join ('', map{ "$_->[0]  $_->[1]\n" } @SlcData);
      $$pCtg{MapAnchor}{orient} = &Sign ((&RegressLin(\@SlcData,-debug=>$dbg2)||{})->{ay});
    }

    # meanwhile debug
    $debug and printf STDERR "%s. anchored contig %s at map %s(%.3f) / seq %d, orientation %s, %d marker%s\n", &MySub,
      $$pCtg{id}, $$pCtg{MapAnchor}{group},
      $$pCtg{MapAnchor}{PosMap}, $$pCtg{MapAnchor}{PosSeq}, &SignChar ($$pCtg{MapAnchor}{orient}),
      int(@{$$pCtg{MapRead}}), (@{$$pCtg{MapRead}}==1)?'':'s';
  }  # end loop single-group-mapped contigs

  # determine likely contig orientation on map by spacial optimisation
  # - this applies to single-marker contigs
  foreach $pCtg (grep{ @{$_->{MapRead}}==1 } @{$MapDat{CtgMapped}}) {

    # sorry, still don't know how to do this
    # ...

    # default orientation: plus
    $$pCtg{MapAnchor}{orient} = 1;
  }  # end loop single-marker contigs

  # construct presentable map of contig positions
  # construct GFF-formatted feature table. Contigs may still overlap in this
  #   map version.
  # loop over map groups
  foreach $ItGrp (map{ @{$_||[]} } &DataTreeSlc($pMapTab,[[0,'all'],[0]],-unique=>1)) {
    delete $MapDat{GFF};

    # loop over contigs mapped into group
    @GrpCtg = sort{ $a->{MapAnchor}{PosMap}<=>$b->{MapAnchor}{PosMap} }
              grep{ exists($_->{MapAnchor}) and $_->{MapAnchor}{group} eq $ItGrp }
              @{$MapDat{CtgMapped}};
    foreach $pCtg (@GrpCtg) {
      push @{$MapDat{GFF}}, {
        method => $out{method}.'Seq',
        type   => 'contig',
        offset => int ($$pCtg{MapAnchor}{PosMap} * $MapDat{DimSbM}) -
          (($$pCtg{MapAnchor}{orient} >= 0) ? $$pCtg{MapAnchor}{PosSeq} : $$pCtg{length} - $$pCtg{MapAnchor}{PosSeq}),
        end    => int ($$pCtg{MapAnchor}{PosMap} * $MapDat{DimSbM}) +
          (($$pCtg{MapAnchor}{orient} >= 0) ? $$pCtg{length} - $$pCtg{MapAnchor}{PosSeq} : $$pCtg{MapAnchor}{PosSeq}),
        group  => (($$pCtg{MapAnchor}{orient} >= 0) ? '':'-'). $$pCtg{id},
        };
      foreach $pRead (@{$$pCtg{MapRead}}) {
        push @{$MapDat{GFF}}, {
          method  => $out{method}.'Seq',
          type    => 'MarkerSeq',
          offset  => int ($$pCtg{MapAnchor}{PosMap} * $MapDat{DimSbM}) +
            $$pCtg{MapAnchor}{orient} * (int (&Mean ($$pRead{pos}{'-1'}, $$pRead{pos}{'1'})) - $$pCtg{MapAnchor}{PosSeq}),
          group   => $$pRead{id},
          comment => $$pCtg{id},
          };
        $MapDat{GFF}[-1]{end} = $MapDat{GFF}[-1]{offset};
        push @{$MapDat{GFF}}, {
          method => $out{method}.'Map',
          type   => 'MarkerMap',
          offset => int ($$pRead{map}{pos} * $MapDat{DimSbM}),
          group  => $$pRead{id},
          };
        $MapDat{GFF}[-1]{end} = $MapDat{GFF}[-1]{offset};
      }
    }
    $out{GFF} = $out{base} . "Pre_$ItGrp.gff";
    &WriteFile ($out{GFF},
      &SeqentryToFFmt({id=>$ItGrp,annot=>$MapDat{GFF}},-format=>'GFF',-debug=>$dbg2));
  }

  ##############################################################################
  # create scaffolds
  # - for single-group-mapped contigs
  #   - create scaffold data structures
  # - for multi-group-mapped contigs:
  #   - join scaffolds
  #   - join map groups[?]
  #   - join scaffolds
  #   - calculate global map positions in a map containg all groups[?]
  # - output scaffolds as contig index tables

  # create scaffold data structures corresponding to map groups
  foreach $ItGrp (map{ @{$_||[]} } &DataTreeSlc($pMapTab,[[0,'all'],[0]],-unique=>1)) {
    # loop over contigs mapped into group
    @GrpCtg = sort{ $a->{MapAnchor}{PosMap}<=>$b->{MapAnchor}{PosMap} }
              grep{ exists($_->{MapAnchor}) and $_->{MapAnchor}{group} eq $ItGrp }
              @{$MapDat{CtgMapped}};
    $pCtgPrev = shift @GrpCtg;
    foreach $pCtg (@GrpCtg) {

      # join contigs
      $debug and printf STDERR "%s. joining contigs: %s%s / %s%s\n", &MySub,
        &SignChar($$pCtgPrev{MapAnchor}{orient}), $$pCtgPrev{id}||"''", &SignChar($$pCtg{MapAnchor}{orient}), $$pCtg{id}||"''";
      unless (&ScaffdJoin (
        $$pCtgPrev{end}{$$pCtgPrev{MapAnchor}{orient}},
        $$pCtg{end}{$$pCtg{MapAnchor}{orient}*-1},
        -debug=>$dbg2)
      ) {
        printf STDERR "%s. ERROR: unable to join contigs: %s%s / %s%s\n", &MySub,
          &SignChar($$pCtgPrev{MapAnchor}{orient}), $$pCtgPrev{id}||"''",
          &SignChar($$pCtg{MapAnchor}{orient}), $$pCtg{id}||"''";
      }

      # go on to next pair of contigs
      $pCtgPrev = $pCtg;
    }
  }
  # force singlet contigs into scaffold data structures
  foreach $pCtg (grep{ !exists($_->{scaffd}) or !$_->{scaffd} }
          values %{$$pCtgStruct{contig}}) {
    $debug and printf STDERR "%s. creating scaffold for singlet contig %s\n", &MySub, $$pCtg{id}||"''";
    &ScaffdSingle ($pCtg);
  }

  # map multi-marker-group-holding contigs
  # - we get information how to arrange mapped groups in relation to each other
  # - join scaffolds, join map groups
  foreach $pCtg (@{$MapDat{CtgMapped}}) {

    # contig in multiple map groups
    @SlcData = map{ @{$_||[]} }
      &DataTreeSlc ($$pCtg{MapRead}, [[0,'all'],['map'],['group']], -unique=>1);
    if (int(@SlcData) <= 1) { next }
    printf STDERR "%s. WARNING: contig %s maps into more than one map group: %s\n", &MySub,
      $$pCtg{id}||"''", join (', ', @SlcData);

    # are markers on contig corresponding to group borders?
    # ...

    # sorry, still don't know what to do here
    # ...
  }

  # warn for contigs which have high disagreement in marker/map positions

  ##############################################################################
  # output data

  # do output elsewhere
  $ProgOpt{-SlcData} = 'scaffd';
  $ProgOpt{-var}{spacer} ||= 100;
  &ProgIndex ('internal', $pCtgStruct);  # exit done there
}


################################################################################
# analyse and report: advanced sequencing
################################################################################


# guess chances of special sequencing strategies for a GAP4 database
#
# INTERFACE
# - argument 1:    GAP4 database
# - argument 2*:   contig specifier(s) for selection
#
# - global options
#   -debug         print debug protocol to STDOUT / STDERR (in SUBs)
#   -RcCloneLen    *** not implemented ***
#   -RcTgtSpecif   [STD], takes effect in &ContigStruct
#   -SlcID         select for read IDs
#   -SlcLen        [STD]
#   -SlcCnum       [STD]
#   -SlcRnum       [STD]
#   -SlcSpecif     [STD]
#   -SlcSpecifDlt  [STD]
#   -timer         [STD]
#
# DESCRIPTION
# - The routine uses a contig end range corrensponding to a confidence interval
#   of ~0.95 for clone length expectancy.
#
sub ProgSeqExtra {
  my $LenConfidCutoff = 0.95;
  my $MinLenLonggel = 350;
  my $MinAddExpect  = 20;
  my ($PathProj, @contig) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;
  $debug and printf "%s. analysis of database %s\n", &MySub, $PathProj||"''";

  # get contig set data structure from assembly data source
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pCtgStruct = &ContigStructMeta ([$PathProj],
    -cend      => $ProgOpt{-SlcEnds} || 0,  # default: calculate cend binding for all reads
    -pair      => 1,  # pair analysis always!
    -RelyOn    => $ProgOpt{-SlcID},
    -SlcContig => \@contig,
    %ProgOpt,
    # we don't use -SlcCnum here by default, cause its performance will depend
    # on the read/clone naming syntax
    -SlcRnum   => $ProgOpt{-SlcRnum} || 2,
    -debug     => $dbg2);
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ContigStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct
  if ($bTimer) {
    printf STDERR "%s. CPU time for retrieving contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    $time = (times)[0];
  }

  ##############################################################################
  # filter reads in contig end zones
  # use length cutoff parameter in ReadWatch::Library package
  my ($pCtg,$pRead,$pField,%report);

  foreach $pCtg (values %{$$pCtgStruct{contig}}) {
    unless ($$pCtg{effend}{'-1'}) { next }
    $report{CtContig} ++;

    # sample read if contig end distance < clone length cutoff parameter
    foreach $pRead (values %{$$pCtg{read}}) {
      unless (exists($$pRead{CEndDist}) and $$pRead{CEndDist}) { next }
      $$pRead{NameField} = $pField = &ReadidToFields ($$pRead{id});
      $$pField{lib} or next;
      if ($$pRead{CEndDist} > (&CloneLenCutoff($$pField{lib},-default=>1) * $LenConfidCutoff)) { next }
      push @{$report{EdgeRead}}, $pRead;
    }
  }
  $debug and printf "%s. found %d interesting reads at contig ends\n", &MySub,
    int @{$report{EdgeRead}};

  ##############################################################################
  # filter out reads having distal clone end in sight

  if (@{$report{EdgeRead}}) {

    # export Experiment files from GAP4 database
    my $TmpDirExper = $ProgParam{TmpManag}->Create();
    my $pCall = &CallExtClosure ($CorePath{call}{GapExportPreass},
      $PathProj, $TmpDirExper, join(' ',@{$report{EdgeRead}}), '>/dev/null');
    unless (&$pCall()) {
      printf STDERR "%s. ERROR: call to export reads failed:\n  %s\n", &MySub,
        &$pCall('WhatRUCalling');
    }

    # loop over interesting reads
    foreach $pRead (@{$report{EdgeRead}}) {

      # load sequence, calculate maximum clone length
      my $pSeq;
      unless ($pSeq = &{$SeqFFmtGet{Experiment}{FuncEntry}} ("$TmpDirExper/$$pRead{id}", -debug=>$dbg2)) {
        printf STDERR "%s. ERROR: unable to retrive Experiment data for read $$pRead{id}\n", &MySub;
      }
      $$pSeq{SR} ||= $$pSeq{QL}
        + &CloneLenCutoff ($$pRead{NameField}{lib}, -default=>1);
      $$pRead{CloneEndAdd} = &Max (0, $$pSeq{SR}-$$pSeq{QR});

      # sort read into method categories
      # - missing counter read (preliminary list of candidates)
      # - short sequence / end distance => long gel
      if ($$pRead{length}+$$pRead{CloneEndAdd}-$$pRead{CEndDist} > $MinAddExpect) {
        push @{$report{CounterMiss}}, $pRead;
      }
      else { next }
      if ($$pRead{CEndDist} < $MinLenLonggel) {
        push @{$report{LongGel}}, $pRead;
      }
    }

    # tidy up or debug
    if ($debug) {
      printf "%s. interesting reads found:\n", &MySub;
      printf "  missing counter-reading: %d\n", int @{$report{CounterMiss}};
      printf "  suggest long gel: %d\n", int @{$report{LongGel}};
    } else {
      unlink (glob "$TmpDirExper/*");
      rmdir $TmpDirExper;
    }
  }

  # no interesting reads found - that's mysterious!
  elsif ($debug) {
    printf "%s. no interesting reads found - probably error\n", &MySub;
    exit 1;
  }

  ##############################################################################
  # final analysis for interesting clones missing the counter-reading

  @{$report{CounterMiss}} = sort {
    $a->{contig}{idnum} <=> $b->{contig}{idnum} or
      $a->{cend}{idnum} <=> $b->{cend}{idnum}
    } @{$report{CounterMiss}};
  for (my $i=0; $i<@{$report{CounterMiss}}; $i++) {
    $pRead = $report{CounterMiss}[$i];
    $pField = $$pRead{NameField};
    my $pCtgClone = $$pRead{contig}{clone}{$$pField{cln}};
    unless (defined($pCtgClone) and %$pCtgClone) {
      printf STDERR "%s. ERROR: read %s present in contig %d but corresponding clone %s is not\n", &MySub,
        $$pRead{id}, $$pRead{contig}{idnum}, $$pField{cln};
    }

    # counter-read in same contig => throw it out
    if ($$pCtgClone{$ReadidField{prdAnti}{$$pField{prd}}}) {
      splice @{$report{CounterMiss}}, $i, 1;
      $report{CtMissSame} ++;
      $i --; next;
    }

    # counter-read in another contig => bridging clone
    my $pGlobClone = $$pCtgStruct{clone}{$$pField{cln}};
    if (map{@$_} &DataTreeSlc($$pGlobClone{$ReadidField{prdAnti}{$$pField{prd}}},
        [[0,'all'],['id']]) ) {
      splice @{$report{CounterMiss}}, $i, 1;
      $report{BridgeClone}{$$pField{cln}} = $pGlobClone;
      $report{CtMissBridge} ++;
      $i --; next;
    }
  }
  $bTimer and printf STDERR "%s. CPU time for analysis: %.3f s\n", &MySub, (times)[0]-$time;

  ##############################################################################
  # print report

  printf "date/time: %s\n", &TimeStr();
  printf "GAP4 database: %s\n", $PathProj||"''";
  printf "%scontigs: %d\n", int(@contig) ? 'selected ':'', int keys %{$$pCtgStruct{contig}};
  printf "analysed contigs: %d\n", $report{CtContig};
  printf "clones: %d\n", int keys %{$$pCtgStruct{clone}};
  printf "reads: %d\n", int keys %{$$pCtgStruct{read}};
  printf "clone length rc: %s\n", &CloneLenRcFile();

  print  "\n\n";
  print  "CONTIG-BRIDGING CLONES\n";
  print  "call $ProgFile -PairReport\n";

  print  "\n\n";
  print  "SUGGESTIONS FOR LONG READS\n";
  printf "- end of insert is less than %d bp far from contig end\n", $MinLenLonggel;
  print  "\n";
  if (@{$report{LongGel}}) {
    print  "new_read\tcontig\tcontig_end\tcontig_end_dist\tread_len\tclone_end_add\n";
    foreach $pRead (sort {
      $a->{contig}{idnum} <=> $b->{contig}{idnum} or
        $a->{cend}{idnum} <=> $b->{cend}{idnum} or
           $a->{CEndDist} <=> $b->{CEndDist}
    } @{$report{LongGel}}) {
      printf "%s\t%s\t%s\t%d\t%d\t%d\n",
        $ProgParam{store}{ReadIndex}->FirstExtra ($$pRead{NameField}{cln}.'.'.$$pRead{NameField}{prm}),
        $$pRead{contig}{id} .',#'. $$pRead{contig}{idnum},
        $SyntaxTranslDrc{num2word}{$$pRead{cend}{idnum}}, $$pRead{CEndDist},
        $$pRead{length}, $$pRead{CloneEndAdd};
    }
  } else {
    print "*** NONE ***\n";
  }

  print  "\n\n";
  print  "MISSING COUNTER-READS OF END CLONES\n";
  printf "- clone has an expectancy to yield a read beyond the contig end\n";
  print  "- some statistics:\n";
  printf "  found counter-reading in same contig: %d\n", $report{CtMissSame};
  printf "  missing counter-reading (listed below): %d\n", int @{$report{CounterMiss}};
  print  "\n";
  if (@{$report{CounterMiss}}) {
    print  "new_read\tcontig\tcontig_end\tcontig_end_dist\tclone_end_add\n";
    foreach $pRead (sort {
      $a->{contig}{idnum} <=> $b->{contig}{idnum} or
        $a->{cend}{idnum} <=> $b->{cend}{idnum} or
           $a->{CEndDist} <=> $b->{CEndDist}
      } @{$report{CounterMiss}}) {
      printf "%s\t%s\t%s\t%d\t%d\n",
        $ProgParam{store}{ReadIndex}->FirstExtra($$pRead{NameField}{cln}
          .'.'.  $ReadidField{prd2prm}{GSCJ}{$ReadidField{prdAnti}{$$pRead{NameField}{prd}}}),
        $$pRead{contig}{id} .',#'. $$pRead{contig}{idnum},
        $SyntaxTranslDrc{num2word}{$$pRead{cend}{idnum}}, $$pRead{CEndDist},
        $$pRead{CloneEndAdd};
    }
  } else {
    print "*** NONE ***\n";
  }
}


# find clones / walking primers at contig ends
# *** building place ***
#
# INTERFACE
# - argument 1:  GAP4 database
# - argument 2*: contig specifier(s) for selection
#
# DEBUG, CHANGES, ADDITIONS
# - use new features of &ContigStruct
#
sub ProgSeqWalk {
  my ($PathProj,@contig) = @_;
  my $debug = $ProgOpt{-debug};
  my $bTimer = $ProgOpt{-timer}; my $time;
  my %gap = %{ &GapNameFields($PathProj) };

  ##############################################################################
  # write and execute temporary script

  $bTimer and $time = &Sum ((times)[0,2]);

  # write Tcl script
  my $PathScript = $ProgParam{TmpManag}->Create();
  open (OUTSCRIPT, ">$PathScript");
  print OUTSCRIPT <<END_SCRIPT;
set contigs [CreateAllContigList \$io]

set osp_defs [keylget gap_defs OSP]
keylset osp_defs prod_gc_low   0.40
keylset osp_defs prod_gc_high  0.55
keylset osp_defs min_prim_len  15
keylset osp_defs max_prim_len  24
keylset osp_defs prim_gc_low   0.20
keylset osp_defs prim_gc_high  0.70
keylset osp_defs prim_tm_low   54
keylset osp_defs prim_tm_high  62

find_primers               \\
  -io           \$io       \\
  -contigs      \#1442
END_SCRIPT
  close OUTSCRIPT;
#find_primers               \\
#  -io           \$io       \\
#  -contigs      \$contigs  \\
#  -search_from  20         \\
#  -search_to    200        \\
#  -params       \$osp_defs

  # execute Tcl script
  my $ret = int (system ("$CorePath{call}{GapExec} $gap{full} $PathScript") / 256);

  # tidy up
  $debug or unlink $PathScript;
  $bTimer and printf "%s. CPU time for finding oligos: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
}


################################################################################
# analyse and report: miscellaneous
################################################################################


# calculate target specificity of contigs
#
# INTERFACE
# - argument 1:    assembly data source
# - argument 2*:   contig specifier(s) for selection
#
# - global options:
#   -debug         print debug protocol to STDOUT / STDERR (in SUBs)
#   -RcTgtSpecif   [STD]
#   -SlcCnum       [STD]
#   -SlcLen        [STD]
#   -SlcRnum       [STD]
#   -SlcSpecif     [STD]
#   -SlcSpecifDlt  [STD]
#   -timer         [STD]
#
sub ProgSrcSpecif {
  my ($PathSrc, @contig) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;
  $debug and printf "%s. analysis of database %s\n", &MySub, $PathSrc||"''";

  # get contig set data structure from assembly data source
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pCtgStruct = &ContigStructMeta ([$PathSrc], %ProgOpt,
    -SlcContig => \@contig,
    -debug     => $dbg2);
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ContigStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct
  $bTimer and printf STDERR "%s. CPU time for retrieving contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;

  ##############################################################################
  # calculate and report

  # start report header
  unless (&TgtspecifRc (-rc=>$ProgOpt{-RcTgtSpecif})) {
    die sprintf "%s. ERROR: no valid target specificity rc file (%s)\n", &MySub,
      $ProgOpt{-RcTgtSpecif} || $ReadWatch::Library::LibGlob{TgtSpecif}{default}{PathRc};
  }
  my @column = ('contig', 'length', 'num_seqs', 'num_reads',
    &TgtspecifTgtRegard ('genome', -rc=>$ProgOpt{-RcTgtSpecif}));
  print  "# $ProgFile -$ProgMode\n";
  print  "# source probabilities log(P())\n";
  printf "# date/time: %s\n", &TimeStr();
  printf "# assembly data source: %s\n", $PathSrc;
  printf "# %scontigs: %d\n", (@contig or $ProgOpt{-SlcCnum} or $ProgOpt{-SlcLen} or $ProgOpt{-SlcRnum}) ? 'selected ' : '',
    int keys %{$$pCtgStruct{contig}};
  printf "# target specificity resource file: %s\n", &TgtspecifRcPath(); 
  printf "#\n# column labels:\n# %s\n", join("\t",@column);
  foreach my $pCtg (values %{$$pCtgStruct{contig}}) {
    my @read = keys %{$$pCtg{read}};

    # calculate report entry
    my $pCalc = &TgtspecifTgtProbab (\@read, 'Read', -target=>'genome',
      -rc=>$ProgOpt{-RcTgtSpecif});
    $$pCalc{contig} = $$pCtg{id};
    $$pCalc{length} = $$pCtg{length};
    $$pCalc{num_seqs} = int keys %{$$pCtg{read}};
    $$pCalc{num_reads} = &Sum ( map{ $_? values(%$_):() }
      &TgtspecifLibgrpNumEval (\@read, 'Read', -rc=>$ProgOpt{-RcTgtSpecif})
      );

    # output report line
    printf "%s\n", join ("\t", @{$pCalc}{@column});
  }
}


################################################################################
# accuracy and annotation
################################################################################


# determine distribution of sequence (consensus) quality for a GAP4 database
#
# INTERFACE
# - argument 1:    GAP4 database
# - argument 2*:   contig specifier(s) for selection
#
# - global options
#   -debug         print debug protocol to STDOUT / STDERR (in SUBs)
#   -RcTgtSpecif   [STD]
#   -SlcLen        [STD]
#   -SlcCnum       [STD]
#   -SlcRnum       [STD]
#   -SlcSpecif     [STD]
#   -SlcSpecifDlt  [STD]
#
# DESCRIPTION
# - the procedural concept of this function is much like &ProgSeq (and
#   &ProgAddRev).
# - process logging must be done to STDERR, cause STDOUT is the default
#   destination of sequence output.
#
# DEBUG, CHANGES, ADDITIONS
# - Currently, the function only works with true GAP4 database sources.
#   A assembly data source, though, might also be non-database. But later on,
#   the retrieval of the actual sequences depends on the knowledge where
#   and how to get the sequences from.
#
sub ProgSeqQual {
  require "SeqLab/SeqStreamIn.pm";
  my (@ArgSrc) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;

  # arguments
  my %gap = %{ &GapNameFields($ArgSrc[0]) };
  if ($gap{exists}) {
    $debug and printf STDERR "%s. working on database %s\n", &MySub, $gap{full}||"''";
  } else {
    die sprintf "%s. unable to read GAP4 database %s\n", &MySub, $gap{full}||"''";
  }

  # prepare assembly data structure from complete assembly data source
  # selection is done below
  $bTimer and $time = &Sum ((times)[0,2]);
  my $pCtgStruct = &ContigStructMeta ([@ArgSrc], %ProgOpt,
    -pair   => undef,
    -RelyOn => undef,
    -debug  => $dbg2);
    # summary debug of contig set data structure possible in &SeqAlign::Assembly::ContigStruct
  $pCtgStruct or exit 1;
    # error message was done in &SeqAlign::Assembly::ContigStruct
  $bTimer and printf STDERR "%s. CPU time for retrieving contig set data structure: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
  unless (values %{$$pCtgStruct{contig}}) {
    printf STDERR "WARNING: no contigs found in assembly data source %s! (after selection)\n",
      $$pCtgStruct{root}{source}[0] || $ArgSrc[0];
    exit 0;
  }

  # contig consensus sequences
  # - selection according to contig specifiers is done via &ContigStructMeta
  my %path;
  # list of contig IDs
  my @IdAnalyse = map{ $_->{id} } values %{$$pCtgStruct{contig}};

  # call to get consensi
  $path{TmpCons} = $ProgParam{TmpManag}->Create();
  my $call = join (' ', $CorePath{call}{GapExportCons}, '-f Experiment',
    $gap{full}, $path{TmpCons}, @IdAnalyse);
  if (my $ret = &GapSafeCall($call)) {
    die "ERROR: GAP4 consensus call failed (code $ret):\n  $call\n";
  }
  if (-z $path{TmpCons}) {
    $debug and print  STDERR "ERROR: GAP4 consensus has zero size, call was:\n  $call\n";
  }
  my $pSeqQueueIn = SeqLab::SeqStreamIn->new($path{TmpCons});

  # analyse consensus sequences
  my ($SeqCt,$iSeqLen,$SeqGapCt,%QualIdx);
  while (my $pSeq = $pSeqQueueIn->GetNext()) {
    $SeqCt ++;
    my (%SeqGap, $SeqPos);
    $iSeqLen += length ($$pSeq{sequence});
    while ($$pSeq{sequence} =~ m/[^a-z]/ig) {
      $SeqGap{pos($$pSeq{sequence})-1} = 1;
    }
    $SeqGapCt += int (keys(%SeqGap));
    foreach my $line (map{ chomp;$_; }@{$$pSeq{AV}}) {
      foreach (grep{ length($_) and $_ ne 'AV' }split(/\s+/,$line)) {
        if ($SeqGap{$SeqPos++}) { next }
        $QualIdx{$_} ++;
      }
    }
  }

  # summarise analysis, report
  {
    my $QualSum = &Sum (values %QualIdx);  # corresponds to ungapped sequence
    printf "GAP4 database: %s\n", $gap{full};
    printf "contigs: %d\n", $SeqCt;
    printf "total consensus length (incl. gaps): %d\n", $iSeqLen;
    printf "total consensus length (no gaps): %d\n", $QualSum;
    printf "date/time: %s\n", &TimeStr();
    print  "cumulative fraction of bases fulfilling quality thresholds:\n";
    my $valcum;
    for (my $val=0; $val<=100; ++$val) {
      $valcum += $QualIdx{$val}||0;
      if ($val and !(($val+1)%10) and $val<40) {
        printf "  < %3d : %6.2f %%\n", $val+1, $valcum/$QualSum*100;
      }
      if ($val and !($val%10) and ($val<=40 or $val==100)) {
        printf "  <=%3d : %6.2f %%\n", $val, $valcum/$QualSum*100;
      }
    }
  }

  # tidy up
  $debug or unlink ($path{TmpCons});
}


# for a batch of Experiment files: adjust accuracy values
#
# INTERFACE
# - argument 1*: Experiment files, default: files listed in ./fofn
#
# - global options:
#   -debug       [STD]
#
# DESCRIPTION
# - The target Experiment files typically represent a directed assembly
#   that has been generated by GAP4's export function.
#
sub ProgQualAdjust {
  require "SeqLab/SeqStreamIn.pm";
  require "SeqLab/SeqStreamOut.pm";
  my $QualMaxEdit = defined($ProgOpt{-QualAdjust}[0]) ?
    $ProgOpt{-QualAdjust}[0] : $ProgParam{default}{QualAdjust}{edit};
  my %QualMax = (N=>2, '-'=>2, map{ ($_=>$QualMaxEdit) } qw(a c g t));
  my $SmbQualLookup = join ('', sort(keys %QualMax));
  my $QualExtern = defined($ProgOpt{-QualAdjust}[1]) ?
    $ProgOpt{-QualAdjust}[1] : $ProgParam{default}{QualAdjust}{extern};
  my (@ExperArg) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $pProcLog = {};
  my $bFromFofn = 0;
  unless (@ExperArg and -r './fofn') {
    @ExperArg = @{ &LoadFoid ('./fofn') };
    $bFromFofn = 1;
  }
  print  "$ProgFile -$ProgMode\n";
  printf "sequences: %d%s\n", int(@ExperArg), $bFromFofn?' found in ./fofn':'';
  @ExperArg or exit 0;

  ##############################################################################
  # filter for extern sequences
  printf "adjust accuracy values in extern sequences to %d\n", $QualExtern;

  # get data structure
  my @ExperArgDo;
  { my (@ExperPort);
    my @ExperArgCln = @ExperArg;
    my %ExperIdx = map{ ($_=>1) } @ExperArg;
    while (@ExperPort = splice(@ExperArgCln,0,&Min(5000,int(@ExperArgCln)))) {
      my @ExperFlt = map{ m/(.+)\s*\n/ } &ReadFile ("$CorePath{call}{egrep} -l '^(LN|LT)' ".
        join(' ',@ExperPort) . ' |');
      foreach (@ExperFlt) { delete $ExperIdx{$_} }
    }
    @ExperArgDo = keys %ExperIdx;
  }

  ##############################################################################
  # inspect/change Experiment files
  my ($pSeqQueueIn,$pSeq,$iSeqLen,$pSeqQueueOut);

  if (@ExperArgDo) {

    # initialise sequence output queue
    $pSeqQueueOut = SeqLab::SeqStreamOut->new(
       -file   => 'rewrite',
        $dbg2 ?
      (-debug  => $dbg2) : ());

    # loop over input sequences
    $pSeqQueueIn = SeqLab::SeqStreamIn->new(@ExperArgDo);
    $pSeqQueueIn->AddSwitch(-debug=>$dbg2);
    while ($pSeq = $pSeqQueueIn->GetNext()) {
      if ($$pSeq{SrcFmt} ne 'Experiment') {
        printf STDERR "ERROR: sequence %s is not in Experiment file format, but is %s\n",
          $$pSeq{id}||"''", $$pSeq{SrcFmt}||"''";
        next;
      }
      $$pProcLog{CtSeq} ++;
      my (@AV, $CtChg);

      # change AV lines to array of values
      foreach my $AvLine (@{$$pSeq{AV}}) {
        push @AV, grep{ $_ ne '' } split(/ +/,$AvLine);
      }

      # change AV field
      $iSeqLen = length ($$pSeq{sequence});
      for (my $i=0; $i<$iSeqLen; $i++) {
        if ($AV[$i] != $QualExtern) {
          $AV[$i] = $QualExtern;
          $CtChg ++;
        }
      }

      # changes? => rewrite sequence
      if ($CtChg) {
        $$pProcLog{CtChgSeq} ++;
        $$pProcLog{CtChgN} += $CtChg;

        # get AV lines from array of values
        delete $$pSeq{AV};
        for (my $i=0; $i<$iSeqLen; $i+=20) {
          push @{$$pSeq{AV}}, ($i ? '     ' : '') .
            join ('', map{ sprintf('%d ',$_) }
              splice (@AV, 0, &Min($iSeqLen-$i,20)));
        }

        # re-write sequence to file
        unless ($pSeqQueueOut->Push ($pSeq)) {
          printf STDERR "%s. ERROR: unable to rewrite sequence %s\n", &MySub, $$pSeq{id}||"''";
        }
      }
    }

    # report changes
    printf "  sequences inspected: %d\n", $$pProcLog{CtSeq}||0;
    printf "  sequences changed: %d\n", $$pProcLog{CtChgSeq}||0;
    printf "  total changes: %d\n", $$pProcLog{CtChgN}||0;

  } else {
    print  "  no Experiment files entered for change\n"
  }

  ##############################################################################
  # inspect/change edited Experiment files

  printf "adjust accuracy values of sequence edits (N=%d, a/c/g/t<=%d)\n",
    $QualMax{N}, $QualMax{a};
  undef $pProcLog;

  # initialise sequence output queue
  $pSeqQueueOut = SeqLab::SeqStreamOut->new(
     -file   => 'rewrite',
     $dbg2 ? (-debug  => $dbg2):());

  # loop over input sequences
  $pSeqQueueIn = SeqLab::SeqStreamIn->new(@ExperArg);
  $pSeqQueueIn->AddSwitch (-debug => $debug ? $debug-1:undef);
  while ($pSeq = $pSeqQueueIn->GetNext()) {
    if ($$pSeq{SrcFmt} ne 'Experiment') {
      printf STDERR "ERROR: sequence %s is not in Experiment file format, but is %s\n",
        $$pSeq{id}||"''", $$pSeq{SrcFmt}||"''";
      next;
    }
    $$pProcLog{CtSeq} ++;
    my (@AV, $CtChg);

    # change AV lines to array of values
    foreach my $AvLine (@{$$pSeq{AV}}) {
      push @AV, grep{ $_ ne '' } split(/ +/,$AvLine);
    }

    # search for sequence symbols that origin from edits
    # - apply regexp /[$SmbQualLookup]/ in a case-sensitive manner
    # - for hits: adjust quality value in @AV
    while ($$pSeq{sequence} =~ m/[$SmbQualLookup]/go) {
      $debug and printf STDERR "  hit: %s at %d, quality %d\n",
        $&, length($`)+1, $AV[length($`)];
      if ($AV[length($`)] > $QualMax{$&}) {
        $debug and printf STDERR "    adjusting\n",
          $&, length($`)+1, $AV[length($`)];
        $AV[length($`)] = $QualMax{$&};
        $CtChg ++;
      }
    }

    # changes? => rewrite sequence
    if ($CtChg) {
      $$pProcLog{CtChgSeq} ++;
      $$pProcLog{CtChgN} += $CtChg;

      # get AV lines from array of values
      delete $$pSeq{AV};
      $iSeqLen = length ($$pSeq{sequence});
      for (my $i=0; $i<$iSeqLen; $i+=20) {
        push @{$$pSeq{AV}}, ($i ? '     ' : '') .
          join ('', map{ sprintf "%d ", $_; }
            splice (@AV, 0, &Min($iSeqLen-$i,20)));
      }

      # re-write sequence to file
      unless ($pSeqQueueOut->Push($pSeq)) {
        printf STDERR "%s. ERROR: unable to rewrite sequence %s\n", &MySub, $$pSeq{id}||"''";
      }
    }
  }

  # report changes
  printf "  sequences inspected: %d\n", $$pProcLog{CtSeq}||0;
  printf "  sequences changed: %d\n", $$pProcLog{CtChgSeq}||0;
  printf "  total changes: %d\n", $$pProcLog{CtChgN}||0;
}
# $Id: Gap.pl,v 1.42 2007/12/30 12:12:31 szafrans Exp $
