#! /usr/local/bin/perl
################################################################################
#
#  GSCJ ReadWatch Concept
#  Command Suite
#
#  copyright (c)
#    Fritz Lipmann Institute Jena, Genome Analysis Group, 2013
#    Institute of Molecular Biotechnology Jena, Dept. Genome Analysis, 1999-2000,2002-2004
#  author
#    Karol Szafranski, karol.szafranski@leibniz-fli.de
#
################################################################################
#
#  DESCRIPTION
#
# - See function &usage for description of command line syntax
#
# - each function comes along with a description at the beginning of the code
#   block
#
################################################################################
#
#  FUNCTIONS, DATA
#
# - MAIN
#   %GlobStore
#   $ProgFile,$ProgFstump
#   %ProgParam
#   $ProgMode,%ProgOpt,@ProgArg
#
# - usage help, command line arguments
#   &usage
#   &AddSwitch
#
# - library analysis
#   &ProgCloneLen
#   &ProgTgtSpecifCalib
#
# - read pool diagnosis/action
#   &ProgBlastRept
#   &ProgPairDiag
#   &ProgReadRename
#
# - miscellaneous
#   &ProgXmlTrace
#   &ProgClusterStat
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - GscjGap.pl -ReadRename doesn't work at the moment
#   have a look on call 'Gap.pl -index'
#
# - look also for notes in the header of each function block
#
################################################################################


# global constants and variables
# also used by modules as a unique global data anchor
our %GlobStore;

# include path(s), includes
use strict; #use warnings;  # OK 20040813
use Cwd qw(realpath);
BEGIN {
  unshift @INC, grep{$_} split(/:+/,$ENV{KPERLPATH}||$ENV{PERLPATH}||'');
  my ($mypath) = realpath(__FILE__)=~m/(^.*)\//;
  push @INC, $mypath;
}
use MainLib::StrRegexp qw(&TimeStr);
use MainLib::Data;
use MainLib::Path;
use MainLib::Cmdline qw(&GetoptsNArgs &GetWriteHandle);
use MainLib::File;
use MainLib::FileTmp;
use MainLib::Misc;
use Math::kCalc;
use Math::PlotImg;  # this is depending on library GD
use database::DbPlain;
use SeqLab::SeqFormat;
use SeqAlign::Gap;
use ReadWatch::Read;
use ReadWatch::Library;
use ReadWatch::PlateObj;
use ReadWatch::ReadIndex;
use ReadWatch::Cluster;


# script ID
# - program name as specified on the command line
our $ProgFile = ( split('/',__FILE__) )[-1];
our $ProgFstump=$ProgFile; $ProgFstump=~s/\.\w{1,4}$//;

# global constants (esp. default values)
our %ProgParam;
$ProgParam{call}{BlastMatch}  = $CorePath{call}{BlastWrap}      . ' -all2all -db=dicty.reads -param=SimCluster -ThreshID=0.918 -ValS=280';
$ProgParam{call}{BlastRept}   = $CorePath{call}{BlastWrap}      . ' -all2all -db=dicty.mask  -param=SimCluster -ThreshID=0.918 -ValS=280 -MaskPoly=11';
$ProgParam{call}{ReadProvide} = $CorePath{call}{GapReadProvide} . ' -timer -v';
# $ProgParam{call}{ReadProvide} = $CorePath{call}{GapReadProvide} . ' -v -tag';
$ProgParam{call}{Mask1}       = $CorePath{call}{BlastWrap}      . ' -MaskSeq -db=dicty.mask  -param=DictyMask -ThreshId=0.885';
$ProgParam{call}{Mask2}       = $CorePath{call}{BlastWrap}      . ' -MaskSeq -db=dicty.trna  -param=DictyMask -ThreshId=0.900 -MaskPoly=11';
$ProgParam{call}{StatSample}  = $CorePath{call}{PerlScript} .'/'. 'Statist.pl -sample';
# we need the following if we want to do changes
$ProgParam{default}{OutImgWidth} = 640;
$ProgParam{default}{ProgMode} = 'BlastRept';

# working desk
$ProgParam{store} = undef;


# manage I/O #####################################

# organiz I/O handles
&Unbuffer();

# organiz tmp files
# - encapsulation in a sub{} allows MainLib::FileTmp to identify the calling
#   package
my $pcFT = sub{ $ProgParam{TmpManag} = MainLib::FileTmp->new(); };
&$pcFT;


# command line interface #########################
# NOTE:
# - &GetoptsNArgs (via &AddSwitch) modifies global variables:
#   $ProgMode @ProgArg %ProgOpt
# - @ProgArg may be pre-filled in &GetoptsNArgs via &AddSwitch (option -fofn)

# arguments, switches, default subprogram
our $ProgMode = undef;
our @ProgArg = ();
our %ProgOpt = ();
@ProgArg = &GetoptsNArgs();
$ProgMode ||= $ProgParam{default}{ProgMode};

# eventually open LOG file
if ($ProgOpt{-log}) {
  $ProgOpt{LogFile} = ($ProgOpt{-log} ne '1')? $ProgOpt{-log} : undef;
  &LogOpen (-file=>$ProgOpt{LogFile}, -stamp=>$ProgFstump, -prog=>"$ProgFile -$ProgMode");
}
END {
  $ProgOpt{-log} and &LogClose();
}


# work flow manifold #############################

# chain to program mode without input argument(s)
if (0) { }
elsif (!@ARGV or $ProgMode=~m/^h(elp)?$/i) { &usage() }
elsif ($ProgMode =~ m/^CloneLen$/i) {
  &ProgCloneLen();
  exit 0;
}
elsif ($ProgMode =~ m/^TgtSpecifCalib$/i) {
  &ProgTgtSpecifCalib (@ProgArg);
  exit 0;
}

# ensure input argument(s)
unless (@ProgArg) {
  print STDERR "ERROR: input arguments missing\n";
  exit 1;
}
my ($arg);

# chain to program mode (with input argument(s))
if (0) { }
elsif ($ProgMode =~ m/^BlastRept$/i) {
  &ProgBlastRept (@ProgArg);
}
elsif ($ProgMode =~ m/^ClusterStat$/i) {
  &ProgClusterStat (@ProgArg);
}
elsif ($ProgMode =~ m/^PairDiag$/i) {
  foreach $arg (@ProgArg) {
    &ProgPairDiag ($arg);
  }
}
elsif ($ProgMode =~ m/^ReadRename$/i) {
  &ProgReadRename (@ProgArg);
}
elsif ($ProgMode =~ m/^XmlTrace$/i) {
  &ProgXmlTrace (@ProgArg);
}
else {
  print STDERR "ERROR: unknown program mode or switch '$ProgMode'\n";
  exit 1;
}

# exit script successfully
# cf. END blocks!
exit 0;



################################################################################
# usage help, command line arguments, basic I/O
################################################################################


sub usage {
  print "\n";
  print <<END_USAGE;
DESCRIPTION
 $ProgFile is part of the realisation of the Dictyostelium ReadWatch
 concept. Keep tight control over the flood of read data!

COMMAND LINE SYNTAX
 $ProgFile  -<ModeSwitch> [-<OptionalSwitch> ...] <Arg1> [<Arg2> ...]

Arguments
---------
 See function descriptions.

path arguments:
 Path "-" resolves to STDIN or STDOUT, depending on the context.

ModeSwitch (case-insensitive)
-----------------------------
<none>            default ModeSwitch -$ProgParam{default}{ProgMode} if program arguments are given.
                  Otherwise like ModeSwitch -help.
-BlastRept        BLAST all reads of a plate against contamination/repeat
                  sequences. Returns tabular output in the fashion of
                  Blast.pl -all2all
                  Arg1+       plate identifier(s)
-CloneLen         perform clone length analysis and write to rc file
                  -RcCloneLen=S
                              custom clone length rc file
-ClusterStat      calculate statistics for AlnK sequence cluster
                  Arg1        cluster specifier ("project path,name") or
                              cluster library entry (set of cluster projects
                              as defined in
			      $CorePath{ReadWatch}{home}/$CorePath{ReadWatch}{ClusterRc}).
-h(elp)           output command line syntax description and exit
-PairDiag         calculate probability of pairing status for a bunch (gel)
                  of reads
                  Arg1        read identifying gel
-PairSeek         look out for pairing partners for a bunch (gel) of reads
                  that are assumed to be misnamed.
                  *** implement me ***
-ReadRename       rename reads in all GAP4 databases, Experiment files etc.
                  Arg1        file containing list of current read names
                  Arg2        file containing list of new read names
                  Arg3+       multiple pairs of source/target name files
                              (Arg1,Arg2) are possible
-TgtSpecifCalib   calibrate target specificity measures for clone libraries
                  using mapped contigs
                  -RcTgtSpecif
                              custom target specificity rc file
-XmlTrace         format Experiment format sequence file to XML trace file format.
                  Output is done to STDOUT.
                  Arg1+       file of filenames

OptionalSwitch (case-insensitive)
---------------------------------
switch argument types: B:=boolean, F:=floating point/scientific, N:=integer,
S:=string, X:=varying type.

-debug(=N)        print debug protocol to STDERR (sometimes STDOUT). Keep
                  temporary files.
                  N  debug depth value
-log(=S)          redirect STDOUT and STDERR to LOG file
                  S  log file path, default path worked out automatically from
                     built-in directives.
-OutImgTransp     turn an image's background transparent
-OutImgWidth=N    define pixel width of an output image (plot data field),
                  default: $ProgParam{default}{OutImgWidth}
-OutStump=S       path stump for multi-file output
-rc               ReadWatch rc file, default built-in, annotated example:
                  $CorePath{call}{MeInstDoc}/$CorePath{ReadWatch}{ReadWatchXmpl}
-RcCloneLen=S     rc file for clone length data, default:
                  $ReadWatch::Library::LibGlob{CloneLen}{default}{PathRc}
                  annotated example:
                  $CorePath{call}{MeInstDoc}/$CorePath{ReadWatch}{CloneLenXmpl}
-RcTgtSpecif=S    rc file for target specificity data, default:
                  $ReadWatch::Library::LibGlob{TgtSpecif}{default}{PathRc}
                  annotated example:
                  $CorePath{call}{MeInstDoc}/$CorePath{ReadWatch}{TgtSpecifXmpl}
-timer            print time performance protocol to STDOUT/STDERR.
-update           update all underlying ReadWatch data

Environment Variables
---------------------
 \$PERLPATH        primary search path for Perl package look-up
 \$TEMPPATH        directory for storage of temporary files, default /tmp

Temporary Files
---------------
 The program may generate temporary files. These will be placed either in a
 directory specified by \$ENV{TEMPPATH} or in /tmp.
END_USAGE
  print "\n";
  exit 0;
}


# add program switches to global table (hash)
#
# INTERFACE
# - argument 1:  switch argument without leading '-'
#
# - global data:
#   $ProgMode
#   %ProgOpt  switch data which gets processed here
#
# DESCRIPTION
# - this function gets called by &MainLib::Misc::GetoptsNArgs
# - switch arguments are tested for validity. Arguments are parsed with highest
#   possible tolerance. This way, syntax errors can reported in accordance to
#   the actual switch, rather than reporting ANY syntax error.
#
sub AddSwitch {
  my $switch = shift;

  # optional switches
  if ($switch =~ m/^debug(=(\d+))?$/i) {
    $ProgOpt{-debug} = defined($2) ? int($2) : 1;
    return;
  }
  if ($switch =~ m/^log(=(.+))?$/i) {
    $ProgOpt{-log} = $2 ? &PathExpand($2) : 1;
    return;
  }
  if ($switch =~ m/^OutImgTransp(ar)?$/i) {
    $ProgOpt{-OutImg} ||= '';
    $ProgOpt{-OutImgTransp} = 1;
    return;
  }
  if ($switch =~ m/^OutImgWidth=(\d+)?$/i) {
    $ProgOpt{-OutImg} ||= '';
    $ProgOpt{-OutImgWidth} = $1;
    return;
  }
  if ($switch =~ m/^OutSt[au]mp=(.+)$/i) {
    $ProgOpt{-OutStump} = &PathExpand ($1);
    return;
  }
  if ($switch =~ m/^RcCloneLen=(.+)$/i) {
    $ProgOpt{-RcCloneLen} = &PathExpand ($1);
    return;
  }
  if ($switch =~ m/^RcTgtSpecif=(.+)$/i) {
    $ProgOpt{-RcTgtSpecif} = &PathExpand ($1);
    return;
  }
  if ($switch =~ m/^timer$/i) {
    $ProgOpt{-timer} = 1;
    return;
  }
  if ($switch =~ m/^update(=(\d+))?$/i) {
    $ProgOpt{-update} = $2 || 1;
    return;
  }

  # program mode switches
  if (defined $ProgMode) {
    printf STDERR "ERROR: multiple specification of program mode or unknown switch, %s and %s\n",
      '-'.($ProgMode||"''"), '-'.($switch||"''");
    exit 1;
  }
  else {
    $ProgMode = $switch;
    return;
  }
}


################################################################################
# library analysis
################################################################################


# perform clone length analysis, and write to rc file
#
# INTERFACE
# - global options:
#   -debug       print debug protocol to STDOUT
#
sub ProgCloneLen {
  my ($debug, $dbg2, $FileRc, $WorkingDir);
  my ($pRc, @DataSrc, $call);
  my ($PlainHtml, $time, $pGrp, %graph, $CtLib, $pPlot);
  my (%file, $hOut, $ret);

  # function constants
  my %FileTpl = (
    GrpImg      => 'LibGroup_%s_cumul.png',
    GrpImgDat   => 'LibGroup_%s_cumul_img.dat',
    LibDisp     => 'Lib_%s_disp',
    LibCumulDat => 'Lib_%s_disp_cumfreq_img.dat',
    LibSlc      => 'Lib_%s_select',
    LibSlcStat  => 'Lib_%s_select_sample.dat',
    SrcReport   => 'EmpirData_%s.txt',
    SrcTable    => 'EmpirData_%s.tab',
    );

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # read rc, get started
  unless ($pRc = &CloneLenRc(-rc=>$ProgOpt{-RcCloneLen},-debug=>$debug)) {
    printf STDERR "ERROR: unable to read rc file %s\n",
      $ProgOpt{-RcCloneLen} || &CloneLenRcFile();
    exit 1;
  }
  $FileRc = &CloneLenRcFile();
  printf "running with rc file %s\n", $FileRc;

  # choose working/output directory
  $file{stamp} = $ProgOpt{-OutStump} || &PathChgSuffix(&CloneLenRcFile(),'',-last=>1);
  $WorkingDir = &PathSplit($file{stamp})->{dir};
  $file{stamp} = &PathSplit($file{stamp})->{name};
  $WorkingDir ||= &PathSplit($FileRc)->{dir};
  unless (chdir $WorkingDir) {
    print  STDERR "ERROR: unable to change to working directory $WorkingDir: $!\n";
    exit 1;
  }
  if ($debug) {
    require Cwd; Cwd->import();
    printf STDERR "%s. changed to directory %s\n", &MySub, &cwd();
  }
  $file{html} = $file{stamp} . 'Report.html';

  ######################################################################
  # get data from contig data sources
  # - you may comment this passage off

  # invoke clone length reports for all info sources (GAP4 projects)
  # source may be either:
  # - rc-globally defined: %{$$pRc{DataSource}} or
  # - library-associated: @{$$pRc{library}{$lib}{DataSource}}
  #   which may contain references to the global source definitions, hash field
  #   syntax (LibEntry => $DataSourceLibLabel)
  @DataSrc = grep{ exists($_->{AssemblySource}) and $_->{AssemblySource} }
    values (%{$$pRc{DataSource}});
  push @DataSrc, grep{ !$_->{LibEntry} } map{@{$_||[]}}
    &DataTreeSlc ($$pRc{library}, [[0,'all'],['DataSource'],[0,'all']], -unique=>1);
  foreach my $pDataSrc (sort{ $a->{id} cmp $b->{id} } @DataSrc) {
    $file{SrcReport} = $file{stamp} . sprintf($FileTpl{SrcReport},$$pDataSrc{id});
    $file{SrcTable} = $file{stamp} . sprintf($FileTpl{SrcTable},$$pDataSrc{id});
    $call = join (' ', $CorePath{call}{GapPairReport}, $$pDataSrc{AssemblySource},
      "-OutTab=$file{SrcTable}", ">$file{SrcReport}");
    if ($debug) {
      printf "%s. invoking clone length report: %s\n", &MySub, $call;
    } else {
      printf "invoking clone length report: %s\n", $$pDataSrc{id};
    }
    if ($ret = &GapSafeCall($call)) {
      printf STDERR "ERROR in call %s (ret code: %d)\n", $call, $ret;
      exit 1;
    }
  }
  $debug and printf STDERR "%s. passed data retrieval\n", &MySub;

  ######################################################################
  # calculate library statistics
  # - you may comment this passage off

  # loop over libraries
  foreach my $pLib (sort{ $a->{id} cmp $b->{id} }
    grep{ $_->{id} ne 'default' and $_->{DataSource} } values %{$$pRc{library}}
  ) {
    my $pTable;  # needs to get initialized with every new library

    # sample clones of current library from available reports
    # loop over source entries in library entry
    unless (exists($$pLib{DataSource}) and @{$$pLib{DataSource}}) {
      printf STDERR "WARNING: no data source for library %s\n", $$pLib{id};
      next;
    }
    printf "sampling data for library %s\n", $$pLib{id};
    foreach my $pDataSrcOrig (@{$$pLib{DataSource}}) {
      my $pDataSrc = $$pDataSrcOrig{LibEntry} ?
        $$pRc{DataSource}{$$pDataSrcOrig{LibEntry}} : $pDataSrcOrig;

      # load clone length table
      $file{SrcTable} = $file{stamp} . sprintf($FileTpl{SrcTable},$$pDataSrc{id});
      my $pTabPrim = &PlainToTable ($file{SrcTable}, -TabType=>'AH', -comments=>1);
      my @TabAdd = map { $_->{clone_len} }
        grep { $_->{clone}=~m/^$$pLib{id}/ } @$pTabPrim;
      $debug and printf STDERR "  %d total entries in %s, added %d\n",
        int(@$pTabPrim), $file{SrcTable}, int(@TabAdd);
      push @$pTable, @TabAdd;
    }

    # save sample data using different length cutoff values
    # - for display
    # - for statistical calculation: CutoffSelect is typically larger
    #   than CutoffValid in order to avoid scew of library's mean measure
    @$pTable = sort { $a<=>$b } @$pTable;
    my $CutoffDisp;
    foreach my $pLibGrp (@{$$pRc{LibraryGroup}}) {
      if (grep { $_ eq $$pLib{id} } @{$$pLibGrp{library}}) {
        $CutoffDisp = $$pLibGrp{DispRangeLen}[1];
        last;
      }
    }
    $CutoffDisp = &Max ($CutoffDisp, $$pLib{CutoffValid}, $$pLib{CutoffSelect});
    while ($$pTable[-1] > $CutoffDisp) { pop @$pTable; }
    $file{LibDisp} = $file{stamp} . sprintf($FileTpl{LibDisp},$$pLib{id});
    &WriteFile ($file{LibDisp}, join ('', map { "$_\n" } @$pTable));
    while ($$pTable[-1] > $$pLib{CutoffSelect}) { pop @$pTable; }
    $file{LibSlc} = $file{stamp} . sprintf($FileTpl{LibSlc},$$pLib{id});
    &WriteFile ($file{LibSlc}, join ('', map { "$_\n" } @$pTable));

    # invoke calculation of sample statistics
    $ret   = system ("$ProgParam{call}{StatSample} -OutImg $file{LibDisp}");
    $ret ||= system ("$ProgParam{call}{StatSample} -OutImg $file{LibSlc}");
    if ($ret) {
      print  STDERR "ERROR in call $ProgParam{call}{StatSample}\n";
      exit 1;
    }

    # load Gauss parameters
    $file{LibSlcStat} = $file{stamp} . sprintf($FileTpl{LibSlcStat},$$pLib{id});
    $$pLib{GaussParam} = (&DataRead($file{LibSlcStat})||{})->{metrics};
  }

  # save rc file back to disk
  if ($hOut = &GetWriteHandle($FileRc,-filestm=>'rc file')) {
    &DataPrint ($pRc, -handle=>$hOut, -NoAddr=>1);
  }

  ######################################################################
  # prepare group report

  # start up HTML document
  $time = &TimeStr();
  $PlainHtml = <<ProgCloneLen_HTML_INTRO_END;
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
  <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
  <META HTTP-EQUIV="Author" CONTENT="Karol Szafranski">
  <META HTTP-EQUIV="Author" CONTENT="ReadWatcher.pl">
  <TITLE>Clone Length Distributions</TITLE>
</HEAD>
<BODY TEXT="#000000" BGCOLOR="#FFFFFF">
time: $time<BR>
<BR>

<P><H2>Clone Length Distributions</H2>

<P>Clone length estimates are be based upon analysis of
fwd/rev-sequenced clones matching to relatively large known
sequences (e.g. extrachromosomal elements: mtDNA, rDNA), or larger
consistent assemblies.</P>

<P>The following figures present the statistical distribution
of observed clone lengths for each of the sequenced DNA libraries.
In detail, the plots show the cumulation of relative frequencies
(y axis) against rising clone lengths (x axis).</P>

<BR><BR>


ProgCloneLen_HTML_INTRO_END

  # loop over library groups
  foreach $pGrp (@{$$pRc{LibraryGroup}}) {

    # start up graph
    %graph = (
      BgTranspar => $ProgOpt{-OutImgTransp},
      plot => [ ],
      scale => [
          { PlotNum  => 0,
            location => 'bottom',
            color    => 'black',
          },
          { PlotNum  => 0,
            location => 'top',
            color    => 'black',
          },
          { PlotNum  => 0,
            location => 'left',
            color    => 'black',
          },
          { PlotNum  => 0,
            location => 'right',
            color    => 'black',
          },
        ],
      );
    $file{GrpImg} = $file{stamp} . sprintf($FileTpl{GrpImg},$$pGrp{id});
    $file{GrpImgDat} = $file{stamp} . sprintf($FileTpl{GrpImgDat},$$pGrp{id});

    # enter image link to HTML doc
    $PlainHtml .= <<ProgCloneLen_HTMLIMG_TABOFF;
<P><H3>Library Group ${$pGrp}{id}</H3>

<IMG SRC="$file{GrpImg}">

<P><TABLE BORDER=0 CELLPADDING=3><TR>
ProgCloneLen_HTMLIMG_TABOFF

    # loop over libraries
    foreach ($CtLib=0; $CtLib<@{$$pGrp{library}}; $CtLib++) {
      my $pLib = $$pRc{library}{$$pGrp{library}[$CtLib]};
      my $pPlotRef;
      $file{LibCumulDat} = $file{stamp} . sprintf($FileTpl{LibCumulDat},$$pLib{id});
      unless (-e $file{LibCumulDat}) {
        printf STDERR "WARNING: no cumulative clone length data for library %s\n", $$pLib{id}||"''";
        next;
      }
      unless ($pPlotRef = (&DataRead($file{LibCumulDat})||{})->{plot}[0]) {
        printf STDERR "ERROR: unable to read plot image data from file %s\n",
          $file{LibCumulDat}||"''";
        next;
      }
      $debug and printf STDERR "%s. entering cumulative plot for library %s, file %s\n", &MySub,
        $$pLib{id}, $file{LibCumulDat};
      push @{$graph{plot}}, {
        DataType      => $$pPlotRef{DataType},
        data          => $$pPlotRef{data},
        DataRangeRef  => 0,
        ReprType      => 'line',
        ReprSizePixel => 3,
        ReprColor     => $$pRc{ColorFlute}[$CtLib],
        };
      $PlainHtml .= sprintf ("<TD BGCOLOR='%s'>\&nbsp;\&nbsp;\&nbsp;</TD> <TD>%s (n=%d)</TD>",
        $$pRc{ColorFlute}[$CtLib], $$pLib{id}, $$pLib{GaussParam}{n});
      $PlainHtml .= " <TD>\&nbsp;\&nbsp;\&nbsp;</TD>\n",
    }

    # finish graph
    $pPlot = $graph{plot}[0];
    delete $$pPlot{DataRangeRef};
    $$pPlot{DimPixel}{x} = $ProgOpt{ImgWidth}||$ProgParam{default}{OutImgWidth};
    $$pPlot{HeightRel} = 0.30;
    $$pPlot{DataRange} = {
      x => $$pGrp{DispRangeLen},
      y => [ 0, 1 ]
      };
    if ($hOut = &GetWriteHandle($file{GrpImgDat})) {
      &DataPrint (\%graph, -handle=>$hOut, -debug=>$dbg2);
    }
    if (&Graph (\%graph, -save=>$file{GrpImg}, -debug=>$dbg2)) {
      printf "writing file %s\n", $file{GrpImg};
    } else {
      printf STDERR "ERROR: unable to write file %s\n", $file{GrpImg}||"''";
    }

    # finish library
    $PlainHtml .= <<ProgCloneLen_HTMLIMG_TABEND;
</TR></TABLE></P>

<BR><BR>



ProgCloneLen_HTMLIMG_TABEND
  }

  # end HTML document
  $PlainHtml .= "</BODY></HTML>\n";
  $hOut = &GetWriteHandle ($file{html}, -exit=>1);
  print $hOut $PlainHtml;
}


# calibrate target specificity measures for clone libraries
#
# INTERFACE
# - global options:
#   -debug        print debug protocol to STDERR
#   -RcTgtSpecif  use this rc file for target specificity library
#   -update       update all underlying ReadWatch data
#
sub ProgTgtSpecifCalib {
  my ($debug, $dbg2, @update, @RcTgtSpecif);
  my ($pRc, %file, $call);
  my (%CtgDat, $pCtg, $pSeq);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  @update = exists ($ProgOpt{-update}) ?
    (-update=>$ProgOpt{-update}) : ();
  @RcTgtSpecif = exists ($ProgOpt{-RcTgtSpecif}) ?
    (-rc=>$ProgOpt{-RcTgtSpecif}) : ();

  # analysis on array of contigs
  $pRc = &TgtspecifRc (@update, @RcTgtSpecif);
  $file{readlst} = $ProgParam{TmpManag}->Create(-touch=>1);
  $file{seq} = $ProgParam{TmpManag}->Create(-touch=>1);
  foreach $pCtg (@{$$pRc{calibrate}}) {
    $CtgDat{$$pCtg{id}}{id} = $$pCtg{id};
    $CtgDat{$$pCtg{id}}{id_read} = $$pCtg{contig};
    $CtgDat{$$pCtg{id}}{target} = $$pCtg{target};

    # get list of reads
    $call = join (' ', $CorePath{call}{GapContigReads}, $$pCtg{db},
      $file{readlst}, $$pCtg{contig});
    if (&GapSafeCall ($call)) {
      printf STDERR "ERROR: don't get read list from contig '%s'\n", $$pCtg{id};
      print  STDERR "call: $call\n";
      exit 1;
    }
    @{$CtgDat{$$pCtg{id}}{loid}} = @{ &PlainToTable ($file{readlst}, -TabType=>'A1') };
    $CtgDat{$$pCtg{id}}{NumSeq} = int @{$CtgDat{$$pCtg{id}}{loid}};

    # get consensus length
    $call = join (' ', $CorePath{call}{GapExportCons}, '-f fastA', '-m REPT', '-s',
      $$pCtg{db}, $file{seq}, $$pCtg{contig});
    if (&GapSafeCall($call) or ! ($pSeq = map{ ${$_||[]}[0] } &SeqarrFromFFmt($file{seq}))) {
      printf STDERR "ERROR: don't get consensus sequence from contig %s\n", $$pCtg{id}||"''";
      print  STDERR "  call: $call\n";
      exit 1;
    }
    $CtgDat{$$pCtg{id}}{length} = length ($$pSeq{sequence}) - $$pCtg{mask};
    # mask long repeat ranges (annotation, label 'REPT')
    # *** implement me ***
    # currently, we regard precalculated value of repeat content

    # calculate specificity
    $CtgDat{$$pCtg{id}}{TgtSpecif} = &TgtspecifTgtProbab (
      $CtgDat{$$pCtg{id}}{loid}, 'Read', -target=>'genome', @RcTgtSpecif);
  }

  # analysis on array of sequencing targets
  # - loop over targets that are covered by calibration contigs
  # - sample attributes of target-specific contig pool => %CalibDat
  my %RegardLib = map{($_=>1)} TgtspecifLibgrpRegard('probab'); 
  my %CalibDat;
  foreach my $ItTgt (map{@{$_||[]}} &DataTreeSlc(\%CtgDat,[['','all'],['target']],-unique=>1)) {
    $CalibDat{$ItTgt}{NumCtg} =
      int (grep { $_->{target} eq $ItTgt } values %CtgDat);
    @{$CalibDat{$ItTgt}{loid}} = map { @{$_->{loid}} }
      grep { $_->{target} eq $ItTgt } values %CtgDat;
    $CalibDat{$ItTgt}{NumSeqSum} = int @{$CalibDat{$ItTgt}{loid}};
    $CalibDat{$ItTgt}{length} = &Sum (
      map { $_->{length} }
      grep { $_->{target} eq $ItTgt }
      values %CtgDat);
    unless ($CalibDat{$ItTgt}{length}) {
      printf STDERR "WARNING: calibration probes miss effective length, target %s\n", $ItTgt;
      next;
    }
    $CalibDat{$ItTgt}{LenRel} = $CalibDat{$ItTgt}{length} /
      ($$pRc{target}{$ItTgt}{NumNt} || $CalibDat{$ItTgt}{length});
    $CalibDat{$ItTgt}{NumRead} = &TgtspecifLibgrpNumEval (
      $CalibDat{$ItTgt}{loid}, 'Read', @RcTgtSpecif);
    $CalibDat{$ItTgt}{NumReadSum} = &Sum (values %{$CalibDat{$ItTgt}{NumRead}});
    foreach (grep{ exists($RegardLib{$_}) } keys %{$CalibDat{$ItTgt}{NumRead}}) {
      if (! $$pRc{LibGroup}{$_}{NumRead}) { next }
      $CalibDat{$ItTgt}{LibSpecif}{$_} = $CalibDat{$ItTgt}{NumRead}{$_}
        / $CalibDat{$ItTgt}{LenRel} / $$pRc{LibGroup}{$_}{NumRead};
    }
    delete $CalibDat{$ItTgt}{loid};
    foreach (grep { $_->{target} eq $ItTgt } values %CtgDat) {
      delete $_->{loid};
    }
  }

  # report analysis
  print  "\nprimary analysis on array of contigs:\n";
  &DataPrint (\%CtgDat);
  print  "\nanalysis on array of sequencing targets:\n";
  &DataPrint (\%CalibDat);

  # tidy up
  $debug or unlink ($file{readlst}, $file{seq});
}


################################################################################
# read pool diagnosis/action
################################################################################


# perform BLAST for diagnosis of paired readings
#
# INTERFACE
# - argument 1+: plate identifier(s)
#
# - global options
#   -debug      print debug protocol to STDOUT
#   -timer      print time performance protocol to STDOUT
#
sub ProgBlastRept {
  my (@ArgPlate) = @_;
  my ($debug, $dbg2, $time);
  my ($PlateId, $pPlate, @ExperList, $ret);

  # function parameters
  $debug = $ProgOpt{-debug} || 1;
  $dbg2  = $debug ? $debug-1 : undef;

  # loop over specified plate identifiers
  foreach $PlateId (@ArgPlate) {

    # create plate data object
    $debug and printf STDERR "%s. analysis of plate %s\n", &MySub, $PlateId||"''";
    $time = (times)[0];
    if ($pPlate = ReadWatch::PlateObj->new($PlateId)) {
      printf "%s. created ReadWatch::PlateObj object for plate ID %s, CPU time %.3f\n", &MySub,
        $pPlate->{id}||"''", (times)[0]-$time;
    } else {
      printf "%s. ERROR: failed to create ReadWatch::PlateObj object for plate ID %s\n", &MySub,
        $PlateId||"''";
      next;
    }
    $pPlate->{switch}{-debug} = 1;

    # validate existence of plate
    if ($pPlate->Exists()) {
      printf "%s. referenced plate %s exists, %d reads, CPU time %.3f\n", &MySub,
        $pPlate->{id}||"''", int $pPlate->ExperList(), (times)[0]-$time;
    } else {
      printf "%s. ERROR: referenced plate %s doesn't exist\n", &MySub, $PlateId||"''";
      next;
    }

    # list of Experiment files
    $time = (times)[0];
    if (@ExperList = $pPlate->ExperList()) {
      printf "%s. %d Experiment files according to plate %s, CPU time %.3f\n", &MySub,
        int @ExperList, $PlateId||"''", (times)[0]-$time;
    } else {
      printf "%s. ERROR: no Experiment files found for plate %s\n", &MySub, $PlateId||"''";
      next;
    }

    # do BLAST against repeats
    $time = &Sum ((times)[0,2]);
    if ($ret = $pPlate->BlastReptDo()) {
      printf "%s. BLAST against repeat database successful, CPU time %.3f\n", &MySub,
        &Sum((times)[0,2])-$time;
    } else {
      printf "%s. BLAST against repeat database unsuccessful\n", &MySub;
      next;
    }
  }

}


# prepare report suggesting possible (re-)pairing for reads on a read gel
#
# INTERFACE
# - argument 1: read identifier
#
sub ProgPairDiag {
  my ($ReadArg) = @_;
  my ($debug, $dbg2);
  my ($pReadField, %path, $line);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # define read target, print report header
  $pReadField = &ReadidToFields ($ReadArg);
  printf "# read name argument: %s\n", $ReadArg||"''";
  printf "# plate section: target %s, library %s, plate %s, plate section %s"
    .", primer %s, primer direction %d\n",
    $$pReadField{tgt} ||"''",
    $$pReadField{lib0}||"''",
    $$pReadField{plt} ||"''",
    $$pReadField{sec} ||"''",
    $$pReadField{prm} ||"''",
    $$pReadField{prd},
    ;

  # build filename patterns and file structures
  $path{temp} = $ProgParam{TmpManag}->Create(-touch=>1);
  $path{ReadSection} = $$pReadField{lib}.$$pReadField{plt}.
    $ReadidField{SectionToSuffix}{$$pReadField{sec}} .
    '.'. $$pReadField{prm};
  $path{DirBase}  = $ProgParam{folder}{pairing} . $path{ReadSection};
#  $path{DirLog}   = $path{DirBase} . '_log/';
  $path{DirExper} = $path{DirBase} . '_exper/';
  foreach ($path{DirExper}) {
#  foreach ($path{DirLog}, $path{DirExper}) {
    if (-e $_ or -d $_) {
      open (INPROC, "rm -fR $_ |");
      while (defined ($line=<INPROC>)) { print $line; }
      close INPROC;
    }
    if ($debug) {
      printf "%s. creating path $_\n", &MySub;
    }
    mkdir ($_);
  }

  # request sequences
  $path{RequestLog}  = $path{DirExper} .'get.log';
  $path{RequestGot}  = $path{DirExper} .'got.foid';
  $path{RequestFoid} = $path{DirExper} .'get.foid';
  open (OUTFOID, ">$path{RequestFoid}");
  print OUTFOID "", $$pReadField{lib}.$$pReadField{plt}.
    $ReadidField{SectionToRegexp}{$$pReadField{sec}}.
    '.'. $$pReadField{prm} . '\d',
    "\n";
  close OUTFOID;
  system ("$ProgParam{call}{ReadProvide} -log=$path{RequestLog} -OutPass=$path{RequestGot} $path{RequestFoid} $path{DirExper}");

  # build fastA file of masked sequences
  $path{SeqMasked} = $ProgParam{folder}{pairing} . $path{ReadSection} .
    '_masked.fa';
  chdir $path{DirExper};
  open (INSEQ, "$ProgParam{call}{Mask1} -fofn=$path{RequestGot} |");
  &FileCopy (\*INSEQ, $path{temp});
  close INSEQ;
  open (INSEQ, "$ProgParam{call}{Mask2} $path{temp} |");
  &FileCopy (\*INSEQ, $path{SeqMasked});
  close INSEQ;

  # do BLAST against repeats / available reads
  $path{BlastRepeat} = $ProgParam{folder}{pairing} . $path{ReadSection} .
    '_repeat.bll';
  open (INREPORT, "$ProgParam{call}{BlastRept} -fofn=$path{RequestGot} |");
  &FileCopy (\*INREPORT, $path{BlastRepeat});
  close INREPORT;
  $path{BlastAll} = $ProgParam{folder}{pairing} . $path{ReadSection} .
    '_all.bll';
  open (INREPORT, "$ProgParam{call}{BlastMatch} $path{SeqMasked} |");
  &FileCopy (\*INREPORT, $path{BlastAll});
  close INREPORT;

  # tidy up
  $debug or unlink $path{temp};
}


# rename reads in complete GSCJ read data / assembly environment
#
# INTERFACE
# - argument 1: file of source names
# - argument 2: file of target names
# - argument (3+): multiple pairs of source/target name lists are possible.
#
# DESCRIPTION
# - The user has to account for a list of source names that is non-redundant.
#
# DEBUG, CHANGES, ADDITIONS
# - implement &MainLib::FileAccAsync::PhysicalAccessCtrl
#
sub ProgReadRename {
  my $NameFake = 'JCXa01a01.s1';
  my ($debug, $dbg2, $PathEdited, $PathTmp);
  my ($FileSrc, $FileTgt, @NameSrc, @NameTgt);
  my ($pGapList, @IsInProject);
  my (%fail, @chain, %job, @request);
  my ($entry, $area, $CtI, $ret, $buffer, @del, $bWait);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $PathEdited = $CorePath{ReadWatch}{ExperEdited};

  # list of rename pairs
  while (($FileSrc, $FileTgt) = splice (@_, 0, 2)) {
    push @NameSrc, @{ &PlainToTable ($FileSrc, -TabType=>'A1') };
    push @NameTgt, @{ &PlainToTable ($FileTgt, -TabType=>'A1') };
  }
  if (@NameSrc != @NameTgt) {
    printf STDERR "ERROR: different number of source/target readnames (src %d, tgt %d)\n",
      int @NameSrc, int @NameTgt;
    exit 1;
  } elsif ($debug) {
    printf STDERR "%s. number of read names: src %d, tgt %d\n", &MySub,
      int @NameSrc, int @NameTgt;
  }

  ########################################################
  # create/update read index

  # specified database => create read list
  if (my $project = $ProgOpt{-GapDB}) {
  }

  # Dictyostelium project =>
  # update Dictyostelium GAP4 read index, read index
  else {
    print  "updating GAP4 read index\n";
    system "$CorePath{call}{GapIndex} > /dev/null";
    $pGapList = &PlainToTable ($ProgParam{default}{GapIndex}.'Read.tab', -comments=>1, -debug=>$dbg2);
    $debug and printf STDERR "%s. got %d index entries from file %s\n", &MySub,
      int @$pGapList, $ProgParam{default}{GapIndex}.'Read.tab';
    unless (defined $pGapList) {
      printf STDERR "ERROR: no entries in GAP4 index file %s\n",
        $ProgParam{default}{GapIndex}.'Read.tab';
      exit 1;
    }
  }

  ########################################################
  # locate reads to be renamed

  # for each read name pair that actually exists in the particular area
  # sort pair into area for renaming action
  for ($CtI=0; $CtI<@NameSrc; $CtI++) {

    # enter read to GAP4 database renaming area(s)
    if (@IsInProject = grep { $$_[0] eq $NameSrc[$CtI]; } @$pGapList) {
      foreach $entry (@IsInProject) {
        $job{$$entry[1]}{SrcTgt}{$NameSrc[$CtI]} = $NameTgt[$CtI];
        $job{$$entry[1]}{TgtSrc}{$NameTgt[$CtI]} = $NameSrc[$CtI];
        unless ($job{$$entry[1]}{folder}) {
          unless ($$entry[1] =~ m|[^/]+$|) {
            printf STDERR "%s. ERROR: no folder derived from database name %s\n", &MySub, $$entry[1];
            exit 1;
          }
          $job{$$entry[1]}{folder} = $`;
        }
      }
    } else {
      push @{$fail{NFgap}}, $NameSrc[$CtI];
    }

    # enter read to 'edited folder' renaming area
    if (-e $PathEdited.$NameSrc[$CtI]) {
      $job{edited}{SrcTgt}{$NameSrc[$CtI]} = $NameTgt[$CtI];
      $job{edited}{TgtSrc}{$NameTgt[$CtI]} = $NameSrc[$CtI];
    } else {
      push @{$fail{NFedited}}, $NameSrc[$CtI];
    }
  }
  if ($debug) {
    if (@{$fail{NFgap}}) {
      printf "\nNOT FOUND IN ANY GAP4 PROJECT\n%s\n", join ("\n", @{$fail{NFgap}});
    }
    if (@{$fail{NFedited}}) {
      printf "\nNOT FOUND IN EDITED FOLDER\n%s\n",   join ("\n", @{$fail{NFedited}});
    }
  }

  ########################################################
  # create to-do list

  # for each renaming area fish out chains of renaming actions
  # check chains for cases of renaming circles (e.g. pairwise name exchanges
  #   in the simplest case)
  foreach $area (keys %job) {

    # while entries in area -> create new chain
    while ( $chain[0]{source} = (keys %{$job{$area}{SrcTgt}})[0] ) {
      $chain[0]{target} = $job{$area}{SrcTgt}{$chain[0]{source}};
      delete $job{$area}{SrcTgt}{$chain[0]{source}};
      delete $job{$area}{TgtSrc}{$chain[0]{target}};

      # any entry with the last target label as the source label?
      # => append to renaming chain
      while ($job{$area}{SrcTgt}{$chain[$#chain]{target}}) {
        push @chain, {
          source => $chain[$#chain]{target},
          target => $job{$area}{SrcTgt}{$chain[$#chain]{target}},
          };
        delete $job{$area}{SrcTgt}{$chain[$#chain]{source}};
        delete $job{$area}{TgtSrc}{$chain[$#chain]{target}};

        # renaming chain forming a circle?
        if ($chain[$#chain]{target} eq $chain[0]{source}) {
          push @chain, { %{$chain[0]} };
          $chain[0]{source} = $chain[$#chain]{target} = $NameFake;
          last;
        }
      }

      # enter chain in the chronological to-do list
      while (defined ($entry = pop @chain)) {
        push @{$job{$area}{do}}, $entry;
      }
    }
  }

  ########################################################
  # do the job

  # do the job for the edited folder
  if (@{$job{edited}{do}}) {
    printf "\nrenaming in edited folder\n";
    foreach $entry (@{$job{edited}{do}}) {
      printf "%s -> %s ", $$entry{source}, $$entry{target};

      # conflict => delete source and target
      if (-e $PathEdited.$$entry{target}) {
        printf "- conflict, both files deleted\n";
        push @del, $PathEdited.$$entry{source}, $PathEdited.$$entry{target};
      }

      # rename file, change ID entries
      elsif (-e $PathEdited.$$entry{source}) {
        $buffer = &ReadFile ($PathEdited.$$entry{source});
        if ($buffer =~ m/$$entry{source}/) {
          $buffer =~ s/$$entry{source}/$$entry{target}/ge;
          open (OUTEXP, '>'.$PathEdited.$$entry{target});
          print OUTEXP $buffer;
          close OUTEXP;
          print  "- done\n";
        } else {
          unless ($debug) {
            print  "- error, source file deleted\n";
          }
        }
        unlink $PathEdited.$$entry{source};
      }

      # no source file?
      else {
        print  "- source not found\n";
      }
    }
  }
  delete $job{edited};

  # do the job for each GAP4 database
  # do the job stored in the to-do list
  foreach $area (keys %job) {
    printf "\nrenaming in GAP4 database %s\n", $area;
    undef @request;
    foreach $entry (@{$job{$area}{do}}) {

      # wait for database to be accessible
      # replace by ReadWatch::Read::PhysicalAccessCtrl
      while (-e $area.'.BUSY' or -e $area.'.BUSY') {
        unless ($bWait) {
          printf "time %s - waiting for ongoing editing process\n", &TimeStr();
          $bWait = 1;
        }
        sleep 5;
      }
      if ($bWait) {
        printf "time %s - waiting done\n", &TimeStr();
        undef $bWait;
      }

      # rename in GAP4 database
      printf "%s -> %s ", $$entry{source}, $$entry{target};
      $PathTmp = $ProgParam{TmpManag}->Create();
      $ret = int (system ("$CorePath{call}{GapReadRename} $area '$$entry{source}' '$$entry{target}' > $PathTmp") / 256);
      push @del, $PathTmp;
      printf "- %s\n", $ret ? 'error' : 'done';
      if ($ret) {
        $buffer = &ReadFile ($PathTmp);
        $buffer =~ s/^.+$/  $&/gm;
        printf $buffer;
      }

      # put SCF on refresh list
      unlink $job{$area}{folder} . $$entry{source} .'SCF',
             $job{$area}{folder} . $$entry{target} .'SCF';
      push @request, $$entry{target};
    }

    # request entries on SCF-refresh list
    print "refreshing SCF links\n";
    $PathTmp = $ProgParam{TmpManag}->Create();
    &WriteFile ($PathTmp, join("\n",@request));
    system "$ProgParam{call}{ProjRequest} $PathTmp $job{$area}{folder} > /dev/null & ";
    push @del, $PathTmp;
  }

  # do the job for all presentation files
  # ...

  # tidy up
  unlink @del;
  print "\n";
}


################################################################################
# miscellaneous
################################################################################


# format Experiment format sequence file to XML trace file format
#
# INTERFACE
# - argument 1+: files of filenames referring the sequence source files
#                in Experiment file format.
#
# - options:
#   -debug       [STD]
#   -space       indent XML statements by x spaces at the beginning of the line
#
# - return val:  plain XML trace file format
#
sub ProgXmlTrace {
  my (%TranslEnd);
  my (@FofnSrc) = @_;
  my ($debug, $dbg2, $space);
  my ($ItSrc, $pSeqQueue, $pSeq);
  my (%XmlEntry, $XmlPlain, $pIdField, $pClenEstim);
  my (@FieldSlc, $ItEnd, $ItFld);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $space = '';
  @FofnSrc or return;

  # load fofns to input queue, loop over source sequences
  require SeqLab::SeqStreamIn;
  $pSeqQueue = SeqLab::SeqStreamIn->new();
  $pSeqQueue->AddSwitch (-debug=>$dbg2);
  foreach $ItSrc (@FofnSrc) {
    $pSeqQueue->Push (@{ &PlainToTable($ItSrc,-TabType=>'A1')||[] });
  }
  unless ($pSeqQueue->{PathSrc} and @{$pSeqQueue->{PathSrc}} > @FofnSrc) {
    printf STDERR "%s. ERROR: input queue initialization failed\n", &MySub;
    exit 1;
  }
  $debug and printf STDERR "%s. input queue keeps %d input files\n", &MySub,
    int @{$pSeqQueue->{PathSrc}};

  ##############################################################################
  
  # loop over input sequences
  while ($pSeq = $pSeqQueue->GetNext()) {
    undef %XmlEntry;
    undef $XmlPlain;
    $debug and printf STDERR "%s. current sequence: %s\n", &MySub, $$pSeq{id}||"''";

    # XML from basic fields in sequence data structure
    $XmlEntry{TRACE_NAME} = $$pSeq{id};
    $XmlEntry{TRACE_FILE} = $$pSeq{id} . 'SCF';  # GSCJ's default

    # XML from basic fields in sequence data structure
    if ($pIdField = &ReadidToFields($$pSeq{id})) {
      $XmlEntry{CLONE_ID} = $$pIdField{cln};
      $XmlEntry{PLATE_ID} = $$pIdField{lib} . $$pIdField{plt};
      $XmlEntry{SEQ_LIB_ID} = $$pIdField{lib};
      $XmlEntry{TRACE_DIRECTION} = $$pIdField{prd};

      # needs special retrieval: clone length estimate
      if ($$pIdField{lib} and
        $pClenEstim = &CloneLenEstim ($$pIdField{lib}, -rc=>$ProgOpt{-RcCloneLen}, -debug=>$dbg2)
      ) {
        $XmlEntry{INSERT_SIZE} = int $$pClenEstim{mean};
        $XmlEntry{INSERT_STDEV} = int $$pClenEstim{s};
      }
    } else {
      printf STDERR "%s. WARNING: name field analysis failed: %s\n", &MySub,
        $$pSeq{id};
    }

    # vector clip, quality clip
    %TranslEnd = ( L=>'LEFT', R=>'RIGHT' );
    foreach $ItEnd (qw(L R)) {
      if (@FieldSlc = grep { defined($_) and $_ } @{$pSeq}{'C'.$ItEnd,'S'.$ItEnd}) {
        $XmlEntry{'CLIP_VECTOR_'.$TranslEnd{$ItEnd}} = ($ItEnd eq 'L') ?
          &Max(@FieldSlc) : &Min(@FieldSlc);
      }
      if (@FieldSlc = grep { defined($_) and $_ } ($$pSeq{'Q'.$ItEnd})) {
        $XmlEntry{'CLIP_QUALITY_'.$TranslEnd{$ItEnd}} = ($ItEnd eq 'L') ?
          &Max(@FieldSlc) : &Min(@FieldSlc);
      }
    }

    # is this compliant to XML-specification? Probably not.
    $XmlEntry{QUALITY_PASS} = ((grep { defined($_) and $_ } @{$pSeq}{'QL','QR'}) == 2) ? '1' : '0';

    ############################################################################
    # formatting

    $XmlPlain .= $space . "<TRACE>\n";
    foreach $ItFld (sort grep { defined $XmlEntry{$_} } keys %XmlEntry) {
      $XmlPlain .= $space . "  <$ItFld>$XmlEntry{$ItFld}</$ItFld>\n";
    }
    $XmlPlain .= $space . "</TRACE>\n";
    
    print $XmlPlain;
  }

  # exit SUB
  return $XmlPlain;
}


# calculate statistics for AlnK sequence cluster
#
# INTERFACE
# - argument 1:   cluster specifier (project path and name) or
#                 cluster library entry (set of cluster projects)
#
# - global options
#   -debug        print debug protocol to STDERR
#   -RcTgtSpecif  ...
#   -update       update underlying ReadWatch data
#
sub ProgClusterStat {
  my ($ArgCluster) = @_;
  my ($debug, $dbg2, @update, @RcTgtSpecif);
  my (%DataFin, $CurrEntry, @LibGroup);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  @update = exists ($ProgOpt{-update}) ?
    (-update=>$ProgOpt{-update}) : ();
  @RcTgtSpecif = exists ($ProgOpt{-RcTgtSpecif}) ?
    (-RcTgtSpecif=>$ProgOpt{-RcTgtSpecif}) : ();

  # set of library groups
  %DataFin = %{ &ClusterStat ($ArgCluster, @update, @RcTgtSpecif) || {} };
  unless (%DataFin) {
    printf STDERR "ERROR: got no data from \&ClusterStat\n";
    exit 1;
  }
  @LibGroup = sort { lc ($a) cmp lc ($b) } keys %DataFin;

  # output table
  printf "# column labels:\n# %s\n", join ("\t", 'data', @LibGroup);
  foreach $CurrEntry (sort keys %{$DataFin{$LibGroup[0]}}) {
    printf "%s\n", join ("\t", $CurrEntry,
      map { $DataFin{$_}{$CurrEntry} } @LibGroup);
  }
}
# $Id: ReadWatcher.pl,v 1.35 2018/06/05 18:02:56 szafrans Exp $
