#! /usr/local/bin/perl
################################################################################
#
#  Sequence Laboratory
#  Motifs in Sequences
#
#  copyright (c)
#    Fritz Lipmann Institute Jena, CF Bioinformatics, 2018
#    Fritz Lipmann Institute Jena, Genome Analysis Group, 2005, 2007, 2013
#    Karol Szafranski, 2006-2007
#    Karol Szafranski at UPenn Philadelphia, Center for Bioinformatics, 2004-2005
#    Institute of Molecular Biotechnology Jena, Dept. Genome Analysis, (1998-)2001-2004
#  author
#    Karol Szafranski, karol.szafranski@leibniz-fli.de
#
################################################################################
#
#  DESCRIPTION
#
# - See function &usage for description of command line syntax
#
# - each function comes along with a description at the beginning of the code
#   block
#
# - for supported sequence input file formats see
#   - %SeqLab::SeqFormat::SeqFFmtGet
#
################################################################################
#
#  FUNCTIONS, DATA
#
# - MAIN
#   %GlobStore
#   $ProgFile,$ProgFstump
#   %ProgParam
#   $ProgMode,%ProgOpt,@ProgArg
#
# - usage help, command line arguments
#   &usage
#   &AddSwitch
#
# - basic I/O
#   &SeqQueue
#   &PrepOstump
#   &OutTabIntro
#   &OutTabHeadln
#   &OutTabLine
#   &OutTabTail
#
# - motif construction & housekeeping
#   &ProgMotifLib
#
# - motif search
#   &ProgSearchMotif
#   &ProgCpG
#
# - tuple analysis
#   &ProgTupleLib
#   &ProgTupleDiffDiag
#    &TupleTransit
#   &ProgTupleDist
#   &ProgTuplePos
#   &ProgTupleFocus
#   &ProgTupleScore
#   &ProgTupleCmp
#
# - miscellaneous
#   &ProgRandomize
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - complete implementation of switch -TupleSize=N1..N2 Have a look at
#   functions: &ProgTupleLib, ...
#
# - switch -OutImg=S won't work right in multi-sequence processing
#   instead, treat the argument S like in -OutStump=S. Have a look at functions:
#   &ProgCpG, ...
#
# - look also for notes in the header of each function block
#
################################################################################

# global constants and variables
# also used by modules as a unique global data anchor
our %GlobStore;

# include path(s), includes
use strict; #use warnings;  # OK 20040813
use Cwd qw(realpath);
BEGIN {
  unshift @INC, grep{$_} split(/:+/,$ENV{KPERLPATH}||$ENV{PERLPATH}||'');
  my ($mypath) = realpath(__FILE__)=~m/(^.*)\//;
  push @INC, $mypath;
}
use FileHandle;
use GD;  # this is not part of standard Perl distribution
  if ($GD::VERSION < 1.20 and ! $main::GlobStore{GdWarn}) {
    printf STDERR "WARNING: GD v%s doesn't support png\n", $GD::VERSION;
    $main::GlobStore{GdWarn} = 1;
  }
use MainLib::StrRegexp;
use MainLib::Data;
use MainLib::Path;
use MainLib::Cmdline;
use MainLib::File;
use MainLib::FileTmp qw(&PathUnique);
use MainLib::Internet qw(&HtmlCharEncode);
use MainLib::Misc qw(&MySub);
use Math::Round qw(&nearest &nearest_ceil);
use Math::kCalc;
use Math::Range;
use Math::Statist;
use Math::Plot2D;
use Math::PlotImg;  # this is depending on library GD
use database::DbPlain;
use database::Table qw(&TableConvert);
use SeqLab::SeqBench;
use SeqLab::SeqFormat qw (%SeqFFmtOutFunc %SeqidFunc &SeqentryToFFmt);
use SeqLab::SeqStreamIn;
use SeqLab::MotifLib;
use SeqLab::MotifIUPAC;
use SeqLab::MotifRE;  # &LibMtfRead &LibRestric*
use SeqLab::SuffixTrie;


# script ID
# - program name as specified on the command line
our $ProgFile = ( split('/',__FILE__) )[-1];
our $ProgFstump=$ProgFile; $ProgFstump=~s/\.\w{1,4}$//;

# global constants (esp. default values)
our %ProgParam;
$ProgParam{default}{OutImgRelhigh}{CpG} = 0.20;
$ProgParam{default}{OutImgRelhigh}{TupleFocus} = 0.5;
$ProgParam{default}{OutImgRelhigh}{TupleScore} = 0.15;
$ProgParam{default}{OutImgWidth} = 640;
$ProgParam{default}{ProgMode} = 'SearchMotif';
$ProgParam{default}{tuple}{SizeAnal} = 6;
$ProgParam{default}{tuple}{SizeCorr} = 1;
$ProgParam{default}{WinSize}{CpG} = 120;
$ProgParam{default}{WinSize}{SearchMotif} = 500;
$ProgParam{default}{WinSize}{TupleFocus} = 8;
$ProgParam{default}{WinSize}{TupleScore} = 30;

$ProgParam{MaskLen}  = 8;
$ProgParam{MaskChar} = undef;  # see SeqLab::SeqQueueIn.pm

# working desk
$ProgParam{store} = undef;


# manage I/O #####################################

# organiz I/O handles
$ProgParam{handle}{tab} = \*STDOUT;
&Unbuffer();

# organiz tmp files
# - encapsulation in a sub{} allows MainLib::FileTmp to identify the calling
#   package
my $pcFT = sub{ $ProgParam{TmpManag} = MainLib::FileTmp->new(); };
&$pcFT;


# command line interface #########################
# NOTE:
# - &GetoptsNArgs (via &AddSwitch) modifies global variables:
#   $ProgMode @ProgArg %ProgOpt
# - @ProgArg may be pre-filled in &GetoptsNArgs via &AddSwitch (option -fofn)

# arguments, switches, default subprogram
our $ProgMode = undef;
our @ProgArg = ();
our %ProgOpt = ();
unshift @ProgArg, &GetoptsNArgs();
$ProgMode ||= $ProgParam{default}{ProgMode};

# eventually open LOG file
if ($ProgOpt{-log}) {
  $ProgOpt{LogFile} = ($ProgOpt{-log} ne '1')? $ProgOpt{-log} : undef;
  &LogOpen (-file=>$ProgOpt{LogFile}, -stamp=>$ProgFstump, -prog=>"$ProgFile -$ProgMode");
}
END {
  $ProgOpt{-log} and &LogClose();
}

# initialize sequence output queue
if ($ProgMode =~ m/^(randomi[sz]e|xy)$/i) {
  require SeqLab::SeqStreamOut;
  $ProgParam{store}{queue}{SeqOut} = SeqLab::SeqStreamOut->new(
    $ProgOpt{-OutDir} ?    (-dir    => $ProgOpt{-OutDir}) : (),
    $ProgOpt{-OutSeq} ?    (-file   => $ProgOpt{-OutSeq}) : (),
    $ProgOpt{-OutSeqFmt} ? (-format => $ProgOpt{-OutSeqFmt}) : (),
    $ProgOpt{-debug} ?     (-debug  => $ProgOpt{-debug}-1) : (),
    );
}


# work flow manifold #############################

# chain to program mode which is optionally argument-less
if (0) { }
elsif (!@ARGV or $ProgMode=~m/^h(elp)?$/i) { &usage() }
elsif ($ProgMode =~ m/^MotifLib=(.+)$/i) {
  unless ($ProgOpt{-motif} = &LibMtfRead($1,-debug=>$ProgOpt{-debug})) {
    die sprintf "ERROR: unable to find motif library $1\n";
  }
  $ProgOpt{-MotifLib} = $1;
  &ProgMotifLib();
  exit 0;
}

# ensure input argument(s)
my $arg;
unless (@ProgArg) { die "ERROR: input arguments missing\n" }
# validation of input argument(s) is done for each ProgMode separately, since
# some support non-file arguments

# initialize sequence input queue
$ProgParam{store}{queue}{SeqIn} = SeqLab::SeqStreamIn->new(@ProgArg);
$ProgParam{store}{queue}{SeqIn}->AddSwitch(
  -ClipQuality => $ProgOpt{-ClipQuality},
  -ClipUnk     => $ProgOpt{-ClipUnk},
  -fast        => (!$ProgOpt{-ClipQuality} and !$ProgOpt{-ClipUnk}) ? 1 : undef,
  -FilterDescr => $ProgOpt{-FilterDescr},
  -FilterID    => $ProgOpt{-FilterID},
  -lower       => $ProgOpt{-lower},
  -MatchID     => $ProgOpt{-MatchID},
  -pure        => $ProgOpt{-pure} ||
                  (($ProgMode =~ m/^(CpG$|tuple)/i) ? 1 : undef),
  -SlcDescr    => $ProgOpt{-SlcDescr},
  -SlcEnds     => $ProgOpt{-SlcEnds},
  -SlcID       => $ProgOpt{-SlcID},
  -SlcLen      => $ProgOpt{-SlcLen},
  -SlcType     => $ProgOpt{-SlcType},
  -upper       => $ProgOpt{-upper} ||
                  (($ProgMode =~ m/^(TupleFocus$)/i) ? 1 : undef),
  -debug       => $ProgOpt{-debug} ? $ProgOpt{-debug}-1 : undef,
  );

# chain to program mode (with input argument(s))
if (0) { }
elsif ($ProgMode =~ m/^CpG$/i) {
  &ProgCpG (@ProgArg);
}
elsif ($ProgMode =~ m/^PwmFromSeq$/i) {
  die "ERROR: program function no longer exists\n  now realized as stand-alone code seq2pwm_v1.pl (package pwmsuite)\n";
}
elsif ($ProgMode =~ m/^(?:randomize|randomize)$/i) {
  $ProgOpt{-TupleSize} ||= Math::Range->new([1,undef]);
  $ProgOpt{-TupleSize}[1] ||= 1;
  $ProgOpt{-TupleSize}[0] = $ProgOpt{-TupleSize}[1];
  &ProgRandomize();
}
elsif ($ProgMode =~ m/^RestrCheck$/i) {
  delete $ProgParam{store}{queue}{SeqIn};
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  # for code see SeqLab::MotifRE.pm
  &LibRestricCheck ($ProgArg[0], -debug=>$ProgOpt{-debug});
}
elsif ($ProgMode =~ m/^RestrTuple$/i) {
  delete $ProgParam{store}{queue}{SeqIn};
  &InargFileExists ($ProgArg[0], -stdin=>1, -exit=>\&usage);
  # for code see SeqLab::MotifRE.pm
  &LibRestricScoreTuple (@ProgArg[0..2], -debug=>$ProgOpt{-debug});
}
elsif ($ProgMode =~ m/^RestrUpdate$/i) {
  delete $ProgParam{store}{queue}{SeqIn};
  for (my $CtArg=0; $CtArg<2; ++$CtArg) {
    &InargFileExists ($ProgArg[$CtArg], -stdin=>1, -exit=>\&usage);
  }
  # for code see SeqLab::MotifRE.pm
  &LibRestricUpdate (@ProgArg[0..1], -debug=>$ProgOpt{-debug});
}
elsif ($ProgMode =~ m/^SearchMotif$/i) {
  die "ERROR: missing motif argument in program mode -SearchMotif\n";
}
elsif ($ProgMode =~ m/^SearchMotif=(.+)$/i) {
  $arg = $1;
  $ProgMode = 'SearchMotif';
  my $debug = $ProgOpt{-debug};
  # load motif library
  $ProgOpt{-motif} = SeqLab::MotifLib->new();
  $ProgOpt{-motif}->AddSwitch(-debug=>$debug);
  if ($ProgOpt{-motif}->Load($arg)) {
    $debug and printf STDERR "%s. %d motif%s from motif argument %s\n",
      join ('',__PACKAGE__,', line ',__LINE__),
      $ProgOpt{-motif}->Size(), ($ProgOpt{-motif}->Size()==1)?'':'s', $arg;
  } else {
    $debug and printf STDERR "%s. unable to find motif library $arg, interpreting it as a plain motif\n",
      join ('',__PACKAGE__,', line ',__LINE__);
    require SeqLab::MotifIUPAC;
    my $pMtf = SeqLab::MotifIUPAC->new({id=>'custom',motif=>$arg});
    $ProgOpt{-motif}->Push($pMtf);
  }
  # perform searches for array of sequences
  while (defined ($arg=&SeqQueue())) {
    &ProgSearchMotif ($arg);
  }
}
elsif ($ProgMode =~ m/^(?:TupleCmp|TupelCmp)(=(\d+)(?:,(\d+))?)?$/i) {
  &ProgTupleCmp (
    $2 || $ProgParam{default}{tuple}{SizeAnal},
    defined ($3) ? $3 : $ProgParam{default}{tuple}{SizeCorr},
    @ProgArg[0,1]);
}
elsif ($ProgMode =~ m/^(?:TupleDiffDiag|TupelDiffDiag)(=(.+))?$/i) {
  length($1) and &AddSwitch('TupleSize='.$2);
  $ProgOpt{-TupleSize} ||= Math::Range->new_parsed($ProgParam{default}{tuple}{SizeAnal});
  $ProgOpt{-TupleSize}[1] ||= $ProgParam{default}{tuple}{SizeAnal};
  &ProgTupleDiffDiag (@ProgArg[0,1]);
}
elsif ($ProgMode =~ m/^TupleDist(=(.+))?$/i) {
  length($1) and &AddSwitch('TupleSize='.$2);
  $ProgOpt{-TupleSize} ||= Math::Range->new([1,$ProgParam{default}{tuple}{SizeAnal}]);
  $ProgOpt{-TupleSize}[1] ||= $ProgParam{default}{tuple}{SizeAnal};
  &ProgTupleDist();
}
elsif ($ProgMode =~ m/^(?:TupleFocus|TupelFocus)(=(.+))?$/i) {
  length($1) and &AddSwitch('TupleSize='.$2);
  $ProgOpt{-TupleSize} ||= Math::Range->new_parsed($ProgParam{default}{tuple}{SizeAnal});
  $ProgOpt{-TupleSize}[1] ||= $ProgParam{default}{tuple}{SizeAnal};
  &ProgTupleFocus();
}
elsif ($ProgMode =~ m/^(?:TupleLib|TupelLib)(=(.+))?$/i) {
  length($1) and &AddSwitch('TupleSize='.$2);
  $ProgOpt{-TupleSize} ||= Math::Range->new([1,$ProgParam{default}{tuple}{SizeAnal}]);
  $ProgOpt{-TupleSize}[1] ||= $ProgParam{default}{tuple}{SizeAnal};
  &ProgTupleLib();
}
elsif ($ProgMode =~ m/^(?:TuplePos|TupelPos)(=(.+))?$/i) {
  length($1) and &AddSwitch('TupleSize='.$2);
  $ProgOpt{-TupleSize} ||= Math::Range->new_parsed($ProgParam{default}{tuple}{SizeAnal});
  $ProgOpt{-TupleSize}[1] ||= $ProgParam{default}{tuple}{SizeAnal};
  &ProgTuplePos();
}
elsif ($ProgMode =~ m/^(?:TupleScore|TupelScore)(=(\d+))?$/i) {
  $ProgMode = 'TupleScore';
  length($1) and &AddSwitch('TupleSize='.$2);
  $ProgOpt{-TupleSize} ||= Math::Range->new_parsed($ProgParam{default}{tuple}{SizeAnal});
  $ProgOpt{-TupleSize}[1] ||= $ProgParam{default}{tuple}{SizeAnal};
  &ProgTupleScore(shift @{$ProgParam{store}{queue}{SeqIn}->{PathSrc}});
}
else {
  die "ERROR: unknown program mode or switch '$ProgMode'\n";
}

# exit script successfully
# cf. END blocks!
exit 0;


################################################################################
# usage help, command line arguments
################################################################################


sub usage {
  print "\n";
  print <<END_USAGE;
DESCRIPTION
 $ProgFile is designed for derivatisation and search application of sequence
 motifs.

COMMAND LINE SYNTAX
 $ProgFile  -<ModeSwitch> [-<OptionalSwitch> ...]  <Arg1> [<Arg2> ...]

Arguments
---------
 Typically, args specify sequence input files. Deviations are stated in the
 listing of ModeSwitches below.

path arguments:
 Path "-" resolves to STDIN or STDOUT, depending on the context.

File Format Support
-------------------
 Input file format is determined automatically and may be one of:
 Experiment     may be multi-sequence
 fastA          Pearson format
 GAP4 database  contig consensus sequences
                NOTE: sequence ID will be ID of the leftmost reading in the
                contig. Remember this when you are using switch -SlcID=S.
 GFF            open GFF-file(s) and an accompanying sequence file which will
                be recognised by having the same name root and one of the
                suffixes: '.fa', '.fasta', '.tbl', '.table', '.pln', ''.
 plain          just the plain sequence. The filename will be interpreted
                to yield a sequence identifier.
 table          table format file containing lines with ID & plain sequence
                separated by TAB / spaces.
 For details of sequence format processing see module SeqLab::SeqFormat.pm.

 Output is written to STDOUT by default

ModeSwitch (case-insensitive)
-----------------------------
switch argument types B:=boolean, F:=floating point/scientific, N:=integer,
S:=string, X:=varying type.

<none>            default ModeSwitch -$ProgParam{default}{ProgMode} if program arguments are given.
                  Otherwise like ModeSwitch -help.
-CpG              plot local CpG content in an image
                  -OutImgRelHigh=F
                              height in relation to width, default: $ProgParam{default}{OutImgRelhigh}{CpG}
                  -WinSize    window size for smoothening, default: $ProgParam{default}{WinSize}{CpG}
-h(elp)           output command line syntax description and exit
-MotifLib=S       print motif library data in TAB format
                  S           motif library
-randomize        randomize input sequences according to an nth Markov model
                  -TupleSize  Markov order, default: 1 (eq. 0 order)
-RestrCheck       test restriction motif library file
                  Arg1        restriction library file
-RestrTuple       enter a score entry into the restriction motif library
                  according to tuple frequency tables
                  Arg1        old motif library file
                  Arg2        tuple table filename stem for update
                  Arg3        field label for new score entry in the motif
                              library
-RestrUpdate      update restriction enzyme motif library according to a
                  REBASE file. Updated motif data is printed to STDOUT.
                  Arg1        old motif library file
                  Arg2        REBASE release file #32 for update
                  -debug      (recommended:) report changes made to the motif
                              file to STDERR.
-SearchMotif=S    perform motif search on purified input sequence(s).
                  S           motif or a motif library. A motif library file
                              *.mtf will be searched either in \$ENV{MOTIFPATH}
                              or in the current working directory.
                  -OutImg(=S) prepare site map image (S: path, default derived
                              from input path)
                  -OutTabFmt=gff
                              feature name will be the IUPAC syntax motif
                  -strands=N  specify strand to be searched: 0 both (default),
                              1 plus strand, -1 minus strand.
                  --instorient=S
                              output sequence instance in the specified
                              orientation: "orig" = like in the template seq
                              (default), "fwd", "rev"
                  --sequpr=1  output complete hit sequence string letters in
                              upper case, default: core hit upper, flanks lower
                              case
-TupleCmp(=N1(,N2))
                  compare tuple distribution for two given sequence sources
                  or tuple tables (output of ProgMode -TupleLib)
                  N1          tuple width for comparison, default: $ProgParam{default}{tuple}{SizeAnal}
                  N2          tuple width for correction, default: $ProgParam{default}{tuple}{SizeCorr}
                  Arg1        sequence source / tuple library 1 (fname root)
                  Arg2        sequence source / tuple library 2 (fname root)
                  -strands=N  specify strand to be indexed: 0 both (default),
                              1 plus strand, -1 minus strand. Option takes
                              effect only if input contains sequences.
-TupleDiffDiag    calculate tuple diagnosis score table from two sequence
                  sources or tuple tables (output of ProgMode -TupleLib)
                  Arg1        sequence source / tuple library 1 (fname root)
                              diagnostic score tends to negative values
                  Arg2        sequence source / tuple library 2 (fname root)
                              diagnostic score tends to positive values
                  -OutStump=S invoke multi-file output and supply path stump
                  -strands=N  specify strand to be indexed: 0 both (default),
                              1 plus strand, -1 minus strand. Option takes only
                              effect if input contains sequences.
                  -TupleSize  tuple size range, default: $ProgParam{default}{tuple}{SizeAnal}
                  --alpha=F   alpha value for scatter correction
                  --corr=S    scatter correction mode
-TupleDist        sample tuple distance measures from sequences
                  Arg1+       sequence source
-TupleFocus       fully analyse tuples for crowding significance. Multi-file
                  output includes images for local density distributions. The
                  input needs to be a physical file.
                  -OutImgRelHigh=F
                              height in relation to width, default: $ProgParam{default}{OutImgRelhigh}{TupleFocus}
                  -OutStump=S invoke multi-file output and supply path stump
                  -strands=N  specify strand to be compared: 0 both (default),
                              1 plus strand, -1 minus strand.
                  -TupleSize  tuple size range, default: $ProgParam{default}{tuple}{SizeAnal}
                  --PosDone=1
                              program mode -TupleFocus has already been
                              performed, and the precompiled working data shall
                              be re-used.
                  -WinSize    window size for smoothening, default: $ProgParam{default}{WinSize}{TupleFocus}
-TupleLib         create tuple library from input sequences.
                  -OutStump=S invoke multi-file output and supply path stump
                  -strands=N  specify strand to be analysed: 0 both (default),
                              1 plus strand only, -1 minus strand only.
                  -TupleSize  tuple size range, default: 1..$ProgParam{default}{tuple}{SizeAnal}
-TuplePos         create tuple library from input sequences reflecting
                  positional crowding significance.
                  -strands=N  specify strand to be compared: 0 both (default),
                              1 plus strand, -1 minus strand.
                  -TupleSize  tuple size range, default: $ProgParam{default}{tuple}{SizeAnal}
-TupleScore       determine positional score according to a dictionary of tuple
                  scoring values, in a column labeled (in order of preference):
                  bayes_score transit_score score freq_corr freq ct.
                  Tabular output done to STDOUT.
                  Arg1        tuple library, either as specific filename or
                              as a filename root to library of tuple tables.
                  Arg2+       sequence source
                  -OutImg=S   invoke graph image output
                  -OutImgRelHigh=F
                              height in relation to width, default: $ProgParam{default}{OutImgRelhigh}{TupleScore}
                  -TupleSize  tuple size range, default: $ProgParam{default}{tuple}{SizeAnal}
                  --col=S     tuple table column that keeps the score values,
                              default look-up order: score freq_corr freq ct
                  --GC=1      additionally, determine GC content
                  --log=F     turn tuple score values to logarithm. Global
                              scoring is always performed by summing local
                              scores.
                  -WinSize    window size for smoothening, default: $ProgParam{default}{WinSize}{TupleScore}

OptionalSwitch (case-insensitive)
---------------------------------
switch argument types: B:=boolean, F:=floating point/scientific, N:=integer,
S:=string, X:=varying type.

-ClipQual(ity)    clip off minor quality range from sequence, e.g. in
                  Experiment file format
-ClipUnk          clip unknown symbols at the end of the sequence string
-debug(=N)        print debug protocol to STDERR (sometimes STDOUT). Keep
                  temporary files.
                  N           debug depth value
-FilterDescr=S    specify a RegExp which shall be applied on description
                  text to filter sequence input.
-FilterID=S       specify a RegExp which shall be applied on sequence ID
                  to filter sequence input.
-fofn=S           supply list of command arguments in a file. The entries of the
                  file will be appended to the argument list. Multiple -fofn
                  switch statements are allowed.
-log(=S)          redirect STDOUT and STDERR to LOG file
                  S           log file path, default path worked out
                              automatically from built-in directives.
-lower            force input sequence string letters to lower case
-MatchID=S        specify a file containing a list of identifiers that shall be
                  used for sequence entry selection.
-MtfSlcID=S       select motifs by motif identifier
-MtfSlcKey=S      select motifs by keyword entry
-MtfSurrd=N       display flanks of motif hits, size N, default: 0
-OutDir=S         directory for multi-file output. This switch overrides any
                  directory statement provided with switch -OutStump.
-OutIdFmt=S       modify sequence identifiers prior to output, case-insens-
                  itive option argument:
                  acc         try to extract bare Acc.Nos. from complex identi-
                              fier fields
                  acc2        try to extract Acc.Nos. from complex identifier
                              fields, including database prefix
                  acc3        try to extract Acc.Nos. from complex identifier
                              fields, including database prefix and version
                              suffix
                  gi          try to extract gi numbers from complex identifier
                              fields
                  gidel       delete gi number component from complex identifier
                              fields
                  word        try to extract bare Acc.No. or first word from
                              complex identifier field. The danger is that
                              sequence identifiers turn non-unique.
-OutImg(=S)       force program to produce image output and (optionally)
                  specify output path
-OutImgRelhigh=F  relative height of an output image in relation to its width,
                  default depending on program mode (see ModeSwitch description)
-OutImgTransp     turn an image's background transparent
-OutImgWidth=N    define pixel width of an output image (plot data field),
                  default: $ProgParam{default}{OutImgWidth}
-OutSeq=S         file path for sequence output, default: STDOUT. For multi-
                  file output use switch -OutDir and -OutStump.
-OutSeq=rewrite   preserve the file structure as in sequence input
                  Combination with other switches:
                  -OutSeqFmt  rewrite in specified file format
                  -OutDir     rewrite files into specified directory
-OutSeq=SingleSeq write single-sequence output with filenames identical to
                  sequence ID
-OutSeqFmt=S      format for sequence output, case-sensitive:
                  Experiment  Staden Experiment file.
                  fastA       fastA or Pearson format (default)
                  GFF         GFF
                  plain       sequence output in condensed plain text format.
                              Line feed every 60 characters.
                  struct      plain data structure format
                              This is mostly usefully for debugging.
                  table       TAB-delimited table format
-OutStump=S       path stump for multi-file output. A default is derived from
                  input file names in most cases.
-OutTab=S         file path for tabular output
-OutTabFmt=S      format for table output, case-sensitive
                  gff         GFF format
                  html        HTML format
                  tab         TAB-delimited (default)
-pid=S            output pid to file
-pure(=S)         purify input sequence strings and leave only sequence
                  encoding letters.
                  You may specify a sequence type (possible: DNA, DNA5, RNA,
                  RNA5, protein). Then, fuzzy letters are converted to
                  official 'unknowns'.
-SlcDescr=S       specify a list of sequence description keywords (RegExps
                  in a file) which shall be used to select entries from the
                  sequence source.
-SlcEnds=N        select sequence ends having the specified length N bp/aa. A
                  sequence smaller than two times this value will be taken
                  completely. A switch argument value lower than 1 will be
                  ignored.
-SlcHitNum=N      enforce hit number per motif
-SlcID=S          specify a regexp that shall be used for sequence ID selection
-SlcKnown=N       select sequences from input which have an at least n bp
                  spanning continuous non-masked sequence range. Masked
                  ranges are defined by poly(N) / poly(X) having a length
                  of >= $ProgParam{MaskLen} bp / aa.
-SlcLen=N1(..N2)  select input sequences according to their length
                  N1          minimum length
                  N2          maximum length, default: no limit
-SlcScore=F|N     select results by minimal score.
-SlcType=S        select for sequence type S
-strands=N        strand model for motif analysis
-timer            print time-performance protocol to STDERR
-TupleSize=N1(..N2)
                  range of tuple sizes. Single N is interpreted as range
                  N..N
-upper            force input sequence string letters to upper case
-v(erbose)        print extended protocol to STDOUT. You'll need to set
                  option -OutSeq=S if you require valid sequence output
-WinSize=N        specify window size for smoothening etc.
--*               program mode-specific switches. See the descriptions there.
                  Case-sensitive!

Environment Variables
---------------------
 \$PERLPATH        primary search path for Perl package look-up
 \$TEMPPATH        directory for storage of temporary files, default /tmp

Temporary Files
---------------
 The program may generate temporary files. These will be placed either in a
 directory specified by \$ENV{TEMPPATH} or in /tmp.

Further Reading
---------------
 A detailed, though incomplete, manual is available at
 $CorePath{call}{MeInstDoc}/$CorePath{man}{SeqMotif} .
END_USAGE
  print "\n";
  exit 1;
}


# add program switches to global table (hash)
#
# INTERFACE
# - argument 1:  switch argument without leading '-'
#
# - global options:
#   -debug       [STD]
#
# - global data:
#   $ProgMode
#   @ProgArg     may be pre-filled here beside it's typically used to store
#                return value from &GetoptsNArgs via &AddSwitch (option
#                -fofn)
#   %ProgOpt  switch data which gets processed here
#
# DESCRIPTION
# - this function gets called by &MainLib::Misc::GetoptsNArgs
# - switch arguments are tested for validity. Arguments are parsed with highest
#   possible tolerance. This way, syntax errors can reported in accordance to
#   the actual switch, rather than reporting ANY syntax error.
#
sub AddSwitch {
  my $switch = shift;
  my ($debug, $SwitchArg);
  my ($pTable, $pSlc, $pMtfLib);

  # function parameters
  $debug = $ProgOpt{-debug};

  # optional switches
  if ($switch =~ m/^ClipQual(ity)?$/i) {
    $ProgOpt{-ClipQuality} = 1;
    return;
  }
  if ($switch =~ m/^ClipUnk$/i) {
    $ProgOpt{-ClipUnk} = 1;
    return;
  }
  if ($switch =~ m/^debug(=(\d+))?$/i) {
    $ProgOpt{-debug} = defined($2) ? int($2) : 1;
    return;
  }
  if ($switch =~ m/^FilterDescr=(.+)$/i) {
    $ProgOpt{-FilterDescr} = $1;
    return;
  }
  if ($switch =~ m/^FilterID=(.+)$/i) {
    $ProgOpt{-FilterID} = $1;
    return;
  }
  if ($switch =~ m/^fofn=(.+)$/i) {
    $SwitchArg = ($1 eq '-') ? $1 : &PathExpand($1);
    if ($pTable = &LoadFoid($SwitchArg)) {
      push @ProgArg, @$pTable;
      $debug and printf STDERR "%s. %d entries loaded from fofn %s\n", &MySub, int @$pTable, $SwitchArg||"''";
    } else {
      die sprintf "ERROR: unable to read entries from file of filenames %s (-> %s)\n",
        $1, $SwitchArg;
    }
    return;
  }
  if ($switch =~ m/^log(=(.*))?$/i) {
    $ProgOpt{-log} = $2 ? &PathExpand($2) : 1;
    return;
  }
  if ($switch =~ m/^lower$/i) {
    $ProgOpt{-lower} = 1;
    return;
  }
  if ($switch =~ m/^MatchID=(.+)$/i) {
    ($SwitchArg) = grep { ($_ eq '-') or (-r $_ and ! -d $_ and -s $_) }
      &PathExpand($1), $1;
    if ($SwitchArg and $pSlc=&LoadFoid($SwitchArg)) {
      $ProgOpt{-debug} and printf STDERR "read %d entr%s from file of ID selectors: %s\n",
        int(@$pSlc), (@$pSlc==1) ? 'y':'ies', $SwitchArg||"''";
      $ProgOpt{-MatchID} = $pSlc;
    } else {
      die "ERROR: unable to read file $1 (option -MatchID)\n";
    }
    return;
  }
  if ($switch =~ m/^M(tf)?(Slc|Select)ID=(.+)$/i) {
    $ProgOpt{-MtfSlcID} = $3;
    return;
  }
  if ($switch =~ m/^M(tf)?(Slc|Select)Key=(.+)$/i) {
    $ProgOpt{-MtfSlcKey} = $3;
    return;
  }
  if ($switch =~ m/^MtfSurrd=(.+)$/i) {
    $ProgOpt{-MtfSurrd} = int ($1);
    return;
  }
  if ($switch =~ m/^OutDir=(.+)$/i) {
    $ProgOpt{-OutDir} = &PathExpand ($1);
    unless (-d $ProgOpt{-OutDir}) {
      if (-e $ProgOpt{-OutDir}) {
        die sprintf "ERROR: output destination exists, but is not a directory: %s\n", $ProgOpt{-OutDir}||"''";
      }

      # this dialogue will also appear if there's no output do be done to any
      # file/directory
      else {
        if (int(grep{ $_ eq '-' }@ARGV)) {
          printf STDERR "creating non-existent output directory (skip dialogue in STDIN input mode)\n";
          mkdir ($ProgOpt{-OutDir});
        } else {
          printf STDERR "output directory does not exist, create?";
          if (&QueryConfirm()) {
            mkdir ($ProgOpt{-OutDir});
          } else { exit 1 }
        }
      }
    }
    return;
  }
  if ($switch =~ m/^OutIdFmt=([\w-]+)$/i) {
    ($ProgOpt{-OutIdFmt}) = grep{ lc($1) eq lc($_) }
      grep{ ref($SeqidFunc{$_}) eq 'CODE' } keys(%SeqidFunc);
    unless ($ProgOpt{-OutIdFmt}) {
      die sprintf "ERROR: specified identifier format action %s (opton -OutIdFmt) is not available\n", $1;
    }
    return;
  }
  if ($switch =~ m/^OutImg(=(.+))?$/i) {
    $ProgOpt{-OutImg} = $2 ? &PathExpand($2) : '';
    return;
  }
  if ($switch =~ m/^OutImgRelhigh=([0-9\.eE+-]+)?$/i) {
    $ProgOpt{-OutImg} ||= '';
    $ProgOpt{-OutImgRelhigh} = $1;
    return;
  }
  if ($switch =~ m/^OutImgTransp(ar)?$/i) {
    $ProgOpt{-OutImg} ||= '';
    $ProgOpt{-OutImgTransp} = 1;
    return;
  }
  if ($switch =~ m/^OutImgWidth=(\d+)?$/i) {
    $ProgOpt{-OutImg} ||= '';
    $ProgOpt{-OutImgWidth} = $1;
    return;
  }
  if ($switch =~ m/^Out(Seq|File)=(.+)$/i) {
    require SeqLab::SeqStreamOut;
    $ProgOpt{-OutSeq} =
      ($2 eq '-' or int(grep{$2 eq $_} @{$SeqLab::SeqStreamOut::LibGlob{FileMagic}})) ?
      $2 : &PathExpand($2);
    return;
  }
  if ($switch =~ m/^OutSeqFmt=(\w+)$/i) {
    ($ProgOpt{-OutSeqFmt}) = grep{ lc($1) eq lc($_) }
      grep{ ref($SeqFFmtOutFunc{$_}) eq 'CODE' } keys(%SeqFFmtOutFunc);
    unless ($ProgOpt{-OutSeqFmt}) {
      die sprintf "ERROR: specified sequence output format %s is not available\n", $1;
    }
    $ProgOpt{-debug} and printf STDERR "%s. set seq output format to %s\n", &MySub, $ProgOpt{-OutSeqFmt};
    return;
  }
  if ($switch =~ m/^OutSt[au]mp=(.+)$/i) {
    $ProgOpt{-OutStump} = $1;
    return;
  }
  if ($switch =~ m/^OutTab=(.+)$/i) {
    $ProgOpt{-OutTab} = ($1 eq '-') ? '&STDOUT' : &PathExpand($1);
    $ProgParam{handle}{tab} = FileHandle->new(">$ProgOpt{-OutTab}");
    unless ($ProgParam{handle}{tab}) {
      die sprintf "ERROR: unable to open specified table output file %s\n", $ProgOpt{-OutTab}||"''";
    }
    return;
  }
  if ($switch =~ m/^OutTabFmt=(\S+)$/i) {
    $ProgOpt{-OutTabFmt} = $1;
    if ($ProgOpt{-OutTabFmt} !~ m/^(gff|html|tab)$/i) {
      die sprintf "ERROR: specified table output format %s is not available\n", $ProgOpt{-OutTabFmt}||"''";
    }
    return;
  }
  if ($switch =~ m/^pid=(.+)$/i) {
    $SwitchArg = ($1 eq '-') ? $1 : &PathExpand($1);
    &WriteFile ($SwitchArg, "$$\n");
    return;
  }
  if ($switch =~ m/^pure(=(.+))?$/i) {
    $ProgOpt{-pure} = $2 || 1;
    return;
  }
  if ($switch =~ m/^Select/i) {
    die sprintf "ERROR: selector switches are now spelled \"-Slc*\"\n";
  }
  if ($switch =~ m/^SlcDescr=(.+)$/i) {
    $ProgOpt{-SlcDescr} = $1;
    return;
  }
  if ($switch =~ m/^SlcEnds=(\d+)$/i) {
    if ($1 > 0) {
      $ProgOpt{-SlcEnds} = $1;
    } else {
      warn "WARNING: option -SlcEnds=N does not take effect with N <= 0\n";
    }
    return;
  }
  if ($switch =~ m/^SlcID=(.+)$/i) {
    $ProgOpt{-SlcID} = $1;
    return;
  }
  if ($switch =~ m/^SlcKnown=(\d+)$/i) {
    $ProgOpt{-SlcKnown} = $1;
    $ProgOpt{-SlcLen} ||= Math::Range->new($1,undef);
    return;
  }
  if ($switch =~ m/^SlcLen=(.+)$/i) {
    unless ($ProgOpt{-SlcLen} = Math::Range->new_parsed($1)) {
      die "ERROR: invalid argument for switch -SlcLen: $1\n";
    }
    return;
  }
  if ($switch =~ m/^SlcScore=([0-9\.eE+-]+)$/i) {
    $ProgOpt{-SlcScore} = $1;
    return;
  }
  if ($switch =~ m/^SlcType=(.+)$/i) {
    $ProgOpt{-SlcType} = $1;
    return;
  }
  if ($switch =~ m/^strands=(-?[12])$/i) {
    $ProgOpt{-strands} = $1;
    return;
  }
  if ($switch =~ m/^timer$/i) {
    $ProgOpt{-timer} = 1;
    return;
  }
  if ($switch =~ m/^(?:TupleSize|TupelSize)=(.+)$/i) {
    $SwitchArg = $1;
    if ($SwitchArg =~ m/^\d+$/) {
      $ProgOpt{-TupleSize} = Math::Range->new([$SwitchArg,$SwitchArg]);
    } else {
      $ProgOpt{-TupleSize} = Math::Range->new_parsed($SwitchArg);
    }
    unless ($ProgOpt{-TupleSize}) {
      die "ERROR: invalid argument for switch -TupleSize: $SwitchArg\n";
    }
    return;
  }
  if ($switch =~ m/^upper$/i) {
    $ProgOpt{-upper} = 1;
    return;
  }
  if ($switch =~ m/^(?:-|var=)(\w+)[,=](.+)$/i) {
    $ProgOpt{-var}{$1} = $2;
    return;
  }
  if ($switch =~ m/^v(erbose)?$/i) {
    $ProgOpt{-verbose} = 1;
    return;
  }
  if ($switch =~ m/^WinSize=([\d.]+)$/i) {
    $ProgOpt{-WinSize} = $1;
    return;
  }

  # program mode switches
  if (defined($ProgMode)) {
    die sprintf "ERROR: multiple specification of program mode or unknown switch, %s and %s\n",
      '-'.($ProgMode||"''"), '-'.($switch||"''");
  }
  else {
    $ProgMode = $switch;
    return;
  }
}


################################################################################
# basic I/O
################################################################################


# produce sequence array data structure from sequence input files
#
# INTERFACE
# - global options:
#   -debug      [STD]
#   -SlcKnown   [STD]
#
# - return val: - reference to sequence data structure
#               - undef if queue is empty or an error occurs
#
# DESCRIPTION
# - object SeqLab::SeqStreamIn initialization has been done in the MAIN body
#   or in the ProgMode function.
#
sub SeqQueue {
  my $debug = $ProgOpt{-debug};

  # redo loop
  my $pSeq;
  {
    # get next sequence from input queue
    unless ($pSeq = $ProgParam{store}{queue}{SeqIn}->GetNext()) { return undef }
    $debug and printf STDERR "%s. preparing sequence %s\n", &MySub, $$pSeq{id}||"''";
    $$pSeq{SeqPure} ||= &SeqStrPure ($$pSeq{sequence}, -upper=>1);

    # masking character for sequences
    # it's derived from sequence type (first entry)
    if ($ProgOpt{-SlcKnown} and !$ProgParam{MaskChar} and $$pSeq{sequence}) {
      $ProgParam{MaskChar} = $SeqSmbUnk{&SeqType($$pSeq{SeqPure})};
      if ($ProgParam{MaskChar}) {
        $debug and printf STDERR "%s. sequence ID %s: type %s, masking character %s\n", &MySub,
          $$pSeq{id}||"''", &SeqType($$pSeq{SeqPure})||"''", $ProgParam{MaskChar}||"''";
      } else {
        $ProgParam{MaskChar} ||= 'N';
        printf STDERR "%s. warning: unable to determine sequence type from sequence ID %s, assigning default masking character %s\n", &MySub,
          $$pSeq{id}||"''", $ProgParam{MaskChar}||"''";
      }
    }

    # select by length of known sequence
    my $FragLen;
    if ($ProgOpt{-SlcKnown}) {
      my $RegexpSplit = sprintf ('%s{%d,}', $ProgParam{MaskChar}, $ProgParam{MaskLen});
      my @fragment = split (/$RegexpSplit/, $$pSeq{SeqPure});
      $FragLen = &Max ($FragLen, map{ length($_) } @fragment);
      if ($FragLen < $ProgOpt{-SlcKnown}) {
        $debug and printf STDERR "%s. sequence %s fails length filter %d (split %d x %s), fragment maximum = %d\n", &MySub,
          $$pSeq{id}||"''", $ProgOpt{-SlcKnown},
          $ProgParam{MaskLen}, $ProgParam{MaskChar},
          $FragLen;
        redo;
      }
    }
  }

  # return reference to sequence structure
  return $pSeq;
}


# work out output path base from sequence source file and/or identifier
# information
#
# INTERFACE
# - argument 1*: reference to sequence data structure
#
# - options:
#   -stamp       append to file-derived name stem if global switch -OutStump
#                is not given
#
# - global options:
#   -debug       [STD]
#   -OutDir      [STD]
#   -OutStump    [STD]
#
# - return val:  output path base
#
sub PrepOstump {
  my ($pSeq,%opt) = @_;
  $pSeq ||= {};
  my $debug = $ProgOpt{-debug};

  # prepare output path base
  # 1st: $ProgOpt{-OutStump}
  # 2nd: $ProgArg[0]
  my $PathStamp = $ProgOpt{-OutStump};
  unless ($PathStamp) {
    my $PathRef = $$pSeq{SrcPath} || $ProgArg[0];
    if ($PathRef eq '-') { $PathRef = 'stdin'; }
    $PathStamp = &PathChgSuffix (&PathExpand($PathRef), '', -last=>1);
    $PathStamp .= $opt{-stamp};
  }

  # determine effective directory
  $ProgOpt{-OutDir} and $PathStamp =
    $ProgOpt{-OutDir} .'/'. &PathSplit($PathStamp)->{name};

  # final refinement: expand to nice, rooted path
  $PathStamp = &PathExpand ($PathStamp);

  # return path
  return $PathStamp;
}


# output line of introductory note preceding a table
#
# INTERFACE
# - argument 1: intro text line (incl. LF)
#
# - global options:
#   -OutTab     [STD]
#   -OutTabFmt  [STD]
#
# - global data:
#   $ProgParam{handle}{tab}  table output file handle
#
sub OutTabIntro {
  my ($LinePlain) = @_;
  my $hOutTab = $ProgParam{handle}{tab};
  my $TabFmt = $ProgOpt{-OutTabFmt} || '';
  my $CodeCRLF = ($TabFmt eq 'html') ? "<BR>\n" : "\n";
  my $CodeComm = ($TabFmt eq 'html') ? '' : '# ';

  # convert LF syntax
  $LinePlain =~ s/\n/$CodeCRLF/;

  # output line
  print  $hOutTab "$CodeComm$LinePlain";
}


# output table header in chosen format
#
# INTERFACE
# - argument 1: reference to field array (header line)
#
# - global options:
#   -OutTab     [STD]
#   -OutTabFmt  [STD]
#
# - global data:
#   $ProgParam{handle}{tab}  table output file handle
#
sub OutTabHeadln {
  my ($pField) = @_;
  my $hOutTab = $ProgParam{handle}{tab};
  my $TabFmt = $ProgOpt{-OutTabFmt} || '';

  # HTML format: start table
  if ($TabFmt eq 'html') {
    print  $hOutTab "<TABLE BORDER=0 CELLPADDING=2 CELLSPACING=2>\n";
  }

  # TAB format: hide column labels in comments
  else {
    print  $hOutTab "#\n# column labels:\n# ";
  }

  # output header line
  &OutTabLine ($pField);
}


# output table line in chosen format
#
# INTERFACE
# - argument 1: reference to field array
#
# - global options:
#   -OutTab     [STD]
#   -OutTabFmt  [STD]
#
# - global data:
#   $ProgParam{handle}{tab}  table output file handle
#
sub OutTabLine {
  my ($pField) = @_;
  my $hOutTab = $ProgParam{handle}{tab};
  my $TabFmt = $ProgOpt{-OutTabFmt} || '';

  # HTML format
  my $line;
  if ($TabFmt eq 'html') {
    $line  = '<TR>';
    foreach (@$pField) {
      my $field = &HtmlCharEncode ($_);
      $field = $field || '&nbsp;';
      $line .= "<TD>$field</TD>";
    }
    $line .= '</TR>';
  }

  # default: TAB-delimited format
  else {
    $line = join ("\t", @$pField);
  }

  # output line
  print  $hOutTab "$line\n";
}


# output table tail in chosen format
#
# INTERFACE
# - global options:
#   -OutTab     [STD]
#   -OutTabFmt  [STD]
#
# - global data:
#   $ProgParam{handle}{tab}  table output file handle
#
sub OutTabTail {
  my $hOutTab = $ProgParam{handle}{tab};

  # only for HTML something to do here
  if ($ProgOpt{-OutTabFmt}||'' eq 'html') {
    print  $hOutTab "</TABLE>\n";
  }
}


################################################################################
# motif construction & housekeeping
################################################################################


# output motif library in tabular format
#
# INTERFACE
# - global options:
#   -debug      [STD]
#   -motif      reference to motif library data structure
#   -MotifLib   motif / motif library specification
#   -OutTab     [STD]
#   -OutTabFmt  [STD]
#
sub ProgMotifLib {

  # function constants
  my @ColumnOrdered = qw(id DefType def score);  # this is preferred order

  # function parameters
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $pMtfLib = $ProgOpt{-motif} or exit 1;
  $debug and printf STDERR "%s. %d motif%s in motif library\n", &MySub,
    int(@$pMtfLib), (@$pMtfLib==1) ? '':'s';

  #################################################################
  # format table

  # strip column labels
  my ($pMtfLibAa,$pColumnLabel) = &TableConvert ('AH', 'AA', $pMtfLib,
    -ColLabel=>\@ColumnOrdered, -debug=>$dbg2);

  # print header
  &OutTabIntro ("$ProgFile -$ProgMode\n");
  &OutTabIntro ("motif library $ProgOpt{-MotifLib}\n");
  &OutTabIntro (sprintf "%s. of entries: %d\n", &MySub, int(@$pMtfLibAa));
  &OutTabHeadln ($pColumnLabel);

  # loop over all motifs, output motif to table
  foreach my $pMtf (@$pMtfLibAa) {
    &OutTabLine ($pMtf);
  }
}


################################################################################
# motif search
################################################################################


# perform motif search
#
# INTERFACE
# - argument 1:   reference to sequence data structure
#
# - global options:
#   -debug        [STD]
#   -motif        reference to motif library data structure (loaded in MAIN)
#   -MotifLib     motif / motif library specification (from ModeSwitch)
#   -OutImg       [STD]
#   -OutImgWidth  [STD]
#   -OutTab       [STD]
#   -OutTabFmt    [STD]
#   -SlcHitNum    enforce hit number per motif
#   -SlcScore     select for minimum hit score
#   -strands      [STD]
#   -timer        [STD]
#   --instorient  see function &usage
#   --sequpr      output sequence instance completely upper case
#
# DESCRIPTION
# - This program function uses the object-oriented code in library
#   SeqLab::MotifIUPAC. Established since 20021125 ff.
#
sub ProgSearchMotif {
  my %SyntaxDrc = (
    list =>    { '0'=>'0', '1'=>'1',  '-1'=>'-1' },
    listgff => { '0'=>'.', '1'=>'+',  '-1'=>'-'  },
    map  =>    { '0'=>'',  '1'=>' >', '-1'=>' <' },
    );
  # function parameters
  my ($pSeq) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;
  my $MtfSurrd = $ProgOpt{-MtfSurrd};
  my $bImg = int(defined($ProgOpt{-OutImg}))
    || $ProgOpt{-OutImgWidth} || $ProgOpt{-OutImgType};

  # search parameters
  my $pMtfLib = $ProgOpt{-motif};
  $debug and $pMtfLib->Statist(\*STDERR);

  # prepare sequence and derive sequence parameters
  # - we conserve the original sequence string cause it may contain
  #   upper/lower-case characters
  $$pSeq{SeqPure} ||= &SeqStrPure ($$pSeq{sequence}, -upper=>1);
  my $iSeqLen = length ($$pSeq{SeqPure});
  my $bSeqGapped = (length($$pSeq{sequence})!=length($$pSeq{SeqPure})) ? 1 : 0;

  ##############################################################################
  # motif search

  # perform search
  $bTimer and $time = (times)[0];
  my $paHit = $pMtfLib->Search($$pSeq{SeqPure},
    -SlcID    => $ProgOpt{-MtfSlcID},
    -SlcKey   => $ProgOpt{-MtfSlcKey},
    -strands  => $ProgOpt{-strands},
    -HitSurrd => $MtfSurrd,
    -isPure   => 1,
    -debug    => $dbg2);
  $bTimer and printf STDERR "%s. CPU time for motif search %.3f\n", &MySub, (times)[0]-$time;

  # build data structure from results
  my (@ResultList,%ctmtf);
  $bTimer and $time = (times)[0];
  foreach my $pHit (@$paHit) {
    if ($ProgOpt{-SlcScore} and $$pHit{score}<$ProgOpt{-SlcScore}) { next }
    my $pMtfRange = [ $$pHit{offset}, $$pHit{offset}+&Max($$pHit{length},1)-1 ];
    if ($bSeqGapped) {
      $pMtfRange = &SeqRangeGapped ($$pSeq{SeqPure}, $$pSeq{sequence}, $pMtfRange);
    }
    foreach (qw(ante post)) { $$pHit{$_} ||= '' }
    push @ResultList, {
      seqid     => &{$SeqidFunc{$ProgOpt{-OutIdFmt}||''}}($$pSeq{id}),
      motifid   => $$pHit{MotifID},
      method    => 'SearchMotif',
      match_off => $$pMtfRange[0],
      match_end => $$pMtfRange[1],
      match_len => $$pMtfRange[1] - $$pMtfRange[0] + 1,
      orient    => $SyntaxDrc{list}{$$pHit{orient}},
      orientgff => $SyntaxDrc{listgff}{$$pHit{orient}},
      instance  => $ProgOpt{-var}{sequpr} ?
        uc($$pHit{ante}.$$pHit{instance}.$$pHit{post}) :
                  ($ProgOpt{-var}{seqlwr} ?
        lc($$pHit{ante}.$$pHit{instance}.$$pHit{post}) :
        lc($$pHit{ante}) . uc($$pHit{instance}) . lc($$pHit{post}) ),
      score     => $$pHit{score},
      score_exp => $$pHit{ScoreExp},
      total_num => '?',
      fakegff   => '.',
      };
    ++ $ctmtf{$$pHit{MotifID}};
    if ($$pHit{orient}<0 and $ProgOpt{-var}{instorient} eq 'fwd') {
      $ResultList[-1]{instance} = &SeqStrRevcompl($ResultList[-1]{instance});
    }
  }
  $bTimer and printf STDERR "%s. CPU time for search result reformatting %.3f\n", &MySub, (times)[0]-$time;

  ##############################################################################
  my %img;  # $img{path} needed later for report
  if ($bImg and int(@ResultList)) {
    $bTimer and $time = (times)[0];
    $img{width} = $ProgOpt{-OutImgWidth} || $ProgParam{default}{OutImgWidth};
    my %ResultMap = (
      pos => [ map{ $_->{match_off} } @ResultList ],
      label => [ map{ $_->{motifid}.$SyntaxDrc{map}{$_->{orient}} } @ResultList ],
      );

    # prepare site map image
    if (@ResultList / $img{width} < 0.1) {
      my %graph = (
        BgTranspar => $ProgOpt{-OutImgTransp},
        plot => [
            # this plot entry is just a fake to provoke correct dimensions
            # the true map is generated by a scale data substructure
            { DimPixel  => { x=>$img{width}, y=>0 },
              HeightRel => 0,
              DataType  => 'MapPos',
              data      => { x=>$ResultMap{pos} },
              DataRange => { x=>[0,$iSeqLen] },
            },
          ],
        scale => [
            { PlotNum  => 0,
              location => 'bottom',
              color    => 'black',
            },
            { PlotNum  => 0,
              location => 'top',
              hasLine  => 0,
              color    => 'black',
              map      => [
                  { data  => { pos=>$ResultMap{pos}, label=>$ResultMap{label}, },
                  },
                ],
            },
          ],
        );
      $img{path} = $ProgOpt{-OutImg} || &PathUnique (-name=>&PrepOstump($pSeq,-stamp=>'_motif').'#.png', -NoSize=>1, -touch=>1);
      unless (&Graph (\%graph, -save=>$img{path}, -timer=>$bTimer, -debug=>$dbg2)) {
        printf STDERR "ERROR: unable to save image to %s\n", $img{path}|"''";
        undef $img{path};
      }
      $bTimer and printf STDERR "%s. CPU time for preparation of motif site map image %.3f\n", &MySub, (times)[0]-$time;
    }

  ##############################################################################
    # prepare density image
    else {
      print  STDERR "WARNING: hit list is too long for site plot, displaying hit density instead\n";
      my (%ResultIdx,$ResultLastEnd);
      foreach (@ResultList) {
        if ($_->{match_off} <= ($ResultLastEnd||0)) { next }
        $ResultIdx{$_->{match_off}} = 1;
        $ResultLastEnd = $_->{match_end};
      }
      my $poPlotDens = Math::Plot2D->new([map{ exists($ResultIdx{$_}) } (1..$iSeqLen)], -TabType=>'A1y', -extrapolate=>'mean');
      if ($bTimer) {
        printf STDERR "%s. CPU time for preparation of primary hit density plot %.3f\n", &MySub, (times)[0]-$time;
        $time = (times)[0];
      }

      # smoothen density plot
      $img{SmoothStep} = 0.5 * $iSeqLen / $img{width};
      $img{SmoothWin}  = $ProgOpt{-WinSize} || $ProgParam{default}{WinSize}{SearchMotif};

      # graph data structure and according image
      my %graph = (
        BgTranspar => $ProgOpt{-OutImgTransp},
        plot => [
            { HeightRel => 0,
              DimPixel  => { x=>$img{width} },
              DataType  => 'AA',
              data      => $poPlotDens->SmoothPlot ($img{SmoothStep}, -window=>$img{SmoothWin}, -debug=>$dbg2),
              DataRange => {
                x => [ 0, $iSeqLen ],
                y => [ 0 ],
                },
              ReprType  => 'line',
              ReprColor => 'black',
            },
          ],
        scale => [
            { PlotNum  => 0,
              location => 'bottom',
              color    => 'black',
            },
            { PlotNum  => 0,
              location => 'left',
              color    => 'black',
            },
            { PlotNum  => 0,
              location => 'right',
              color    => 'black',
            },
          ],
        );
      $img{path} = $ProgOpt{-OutImg} || &PathUnique (-name=>&PrepOstump($pSeq,-stamp=>'_MotifDens').'#.png', -NoSize=>1, -touch=>1);
      unless (&Graph (\%graph, -save=>$img{path}, -timer=>$bTimer, -debug=>$dbg2)) {
        printf STDERR "ERROR: unable to save image to %s\n", $img{path}|"''";
        undef $img{path};
      }
      $bTimer and printf STDERR "%s. CPU time for preparation of hit density map image %.3f\n", &MySub, (times)[0]-$time;
    }
  }

  ##############################################################################
  # prepare table of hits
  # - we do this after creating image, cause we refer to image ouput path
  $ProgOpt{-OutTabFmt} ||= '';

  # column definitions
  my @TabColumn = ($ProgOpt{-OutTabFmt} eq 'gff') ?
    (qw( seqid method motifid match_off match_end fakegff orientgff fakegff fakegff )) :
    (qw( seqid motifid match_off match_end orient instance score score_exp total_num ));

  # failed motifs
  my %MtfId;
  $MtfId{all} = [ $pMtfLib->IDs() ];
  %{$MtfId{IdxMatch}} = map{ ($_=>1) } &unique (map{ $_->{motifid} } @ResultList);
  @{$MtfId{NonMatch}} = grep{ !$MtfId{IdxMatch}{$_} } @{$MtfId{all}};

  # table header
  &OutTabIntro ("$ProgFile -$ProgMode\n");
  &OutTabIntro (sprintf "time: %s\n", &TimeStr());
  &OutTabIntro (sprintf "sequence: %s, %d bp\n", $$pSeq{id}||"''", $iSeqLen);
  &OutTabIntro (sprintf "motif%s: %s\n", (@{$MtfId{all}}==1)?'':'s', join(', ',@{$MtfId{all}}));
  my @mtype = &unique (grep{$_} map{ref($_)} @{$pMtfLib->{motif}});
  &OutTabIntro (sprintf "motif object type%s: %s\n", (@mtype==1)?'':'s', join(', ',@mtype));
  &OutTabIntro (sprintf "non-matching motif%s: %s\n", (@{$MtfId{NonMatch}}==1) ? '':'s', join(', ',@{$MtfId{NonMatch}}));
  &OutTabIntro (sprintf "image path: %s\n", $img{path}||'failed')
    if ($bImg and int(@ResultList));

  # table output type gff
  if (($ProgOpt{-OutTabFmt}||'') eq 'gff') {
    &OutTabHeadln ($SeqLab::SeqFormat::LibGlob{GFF}{ColOut});

    # table
    foreach (sort {
      $a->{match_off} <=> $b->{match_off} or
         $a->{orient} cmp $b->{orient} } @ResultList
    ) {
      &OutTabLine ( [ @{$_}{@TabColumn} ] );
    }
  }

  # table output type plain/html
  else {

    # table
    if (@ResultList) {
      &OutTabHeadln (\@TabColumn);
      foreach (sort {
        $a->{match_off} <=> $b->{match_off} or
           $a->{orient} cmp $b->{orient} } @ResultList
      ) {
        $_->{total_num} = $ctmtf{$_->{motifid}};
        &OutTabLine ( [ @{$_}{@TabColumn} ] );
      }
      &OutTabTail();
    }

    # no results
    else {
      &OutTabIntro ("*** no hits ***\n");
    }
  }
}


# plot local CpG content
#
# INTERFACE
# - global options:
#   -debug        [STD]
#   -OutImg       [STD]
#   -OutImgWidth  [STD]
#   -OutImgRelhigh[STD]
#   -timer        [STD]
#
# DESCRIPTION
# - cmp. program 'CpGplot' by R. Lopez & P. Rice
# - cmp. CpG island definition in F. Larsen et al. GATA 9, 80-85 (1992)
#
sub ProgCpG {

  # function constants
  my %ScoreMatrix = (
    C   => { C=>1,       len=>1 },
    G   => { G=>1,       len=>1 },
    GC  => { C=>1, G=>1, len=>1 },
    CpG => { CG=>1,      len=>2 },
    );

  # function parameters
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;
  my %img;
  $img{width} = $ProgOpt{-OutImgWidth} || $ProgParam{default}{OutImgWidth};
  $img{RelHigh} = $ProgOpt{-OutImgRelhigh} || $ProgParam{default}{OutImgRelhigh}{CpG};

  # loop over sequences
  # - we got purified sequence from SeqStreamIn object
  my $pSeq;
  while ($pSeq = &SeqQueue()) {
    my $iSeqLen = length $$pSeq{sequence};

    # smoothening parameters
    $img{SmoothWin} = $ProgOpt{-WinSize} || $ProgParam{default}{WinSize}{CpG};
    $img{SmoothStep} = 0.5 * $iSeqLen / $img{width};

  ##############################################################################
  # analyse symbols and tuples

    # sample data for G/C and CpG
    my %plot = ();
    $bTimer and $time = (times)[0];
#    foreach my $ItSmb (qw(C G GC CpG)) {  # include separate counts of C and G
    foreach my $ItSmb (qw(GC CpG)) {
      $debug and printf STDERR "%s. sampling raw data: tuple %s\n", &MySub, $ItSmb||"''";
      for (my $CtI=0; $CtI<$iSeqLen; $CtI++) {
        push @{$plot{raw}{$ItSmb}},
          $ScoreMatrix{$ItSmb}{substr($$pSeq{sequence},$CtI,$ScoreMatrix{$ItSmb}{len})} || 0;
      }
    }
    if ($bTimer) {
      printf STDERR "%s. CPU time for raw tuple plots %.3f\n", &MySub, (times)[0]-$time;
      $time = (times)[0];
    }

    # smoothened plots
    foreach my $ItSmb (grep{ exists($plot{raw}{$_}) } qw(C G GC CpG)) {
      $debug and printf STDERR "%s. calculating smoothened data: tuple %s, step %s, window %s\n", &MySub,
        $ItSmb||"''", $img{SmoothStep}, $img{SmoothWin};
      $plot{tmp} = Math::Plot2D->new($plot{raw}{$ItSmb},-TabType=>'A1y',-extrapolate=>'mean');
      $plot{graph}{$ItSmb} = &TableConvert ('AA', 'AC',
        $plot{tmp}->SmoothPlot ($img{SmoothStep}, -window=>$img{SmoothWin}, -debug=>$dbg2)
        );
    }

    # smoothened CpG over G/C
    $plot{graph}{CpGoverGC}[0] = $plot{graph}{CpG}[0];
    for (my $CtI=0; $CtI<@{$plot{graph}{CpG}[0]}; $CtI++) {
      $plot{graph}{CpGoverGC}[1][$CtI] = $plot{graph}{CpG}[1][$CtI] /
        ((0.5 * ($plot{graph}{GC}[1][$CtI] || (0.5 / $img{SmoothWin}))) ** 2);
    }

#    # smoothened CpG over C and G
#    $plot{graph}{CpGoverCG}[0] = $plot{graph}{CpG}[0];
#    for (my $CtI=0; $CtI<@{$plot{graph}{CpG}[0]}; $CtI++) {
#      $plot{graph}{CpGoverCG}[1][$CtI] = $plot{graph}{CpG}[1][$CtI] /
#        (($plot{graph}{C}[1][$CtI] || (0.5 / $img{SmoothWin})) *
#         ($plot{graph}{G}[1][$CtI] || (0.5 / $img{SmoothWin})));
#    }

    if ($bTimer) {
      printf STDERR "%s. CPU time for smoothened plots %.3f\n", &MySub, (times)[0]-$time;
      $time = (times)[0];
    }

    # image dimensions
    $img{yMax} = &Max (map{ @{$plot{graph}{$_}[1]} }
      grep{ exists($plot{graph}{$_}) } qw(C G GC CpG CpGoverCG CpGoverGC) );

  ##############################################################################
  # prepare image

    # sample data
    my %graph = (
      BgTranspar => $ProgOpt{-OutImgTransp},
      plot => [
          { DimPixel  => { x=>$img{width}, y=>0 },
            HeightRel => $img{RelHigh},
            DataType  => 'HCA',
            data      => { x=>$plot{graph}{CpGoverGC}[0], y=>$plot{graph}{CpGoverGC}[1] },
            ReprColor => 'black',
          },
#          { DataType  => 'HCA',
#            data      => { x=>$plot{graph}{C}[0], y=>$plot{graph}{C}[1] },
#            ReprColor => 'yellow',
#          },
#          { DataType  => 'HCA',
#            data      => { x=>$plot{graph}{G}[0], y=>$plot{graph}{G}[1] },
#            ReprColor => 'yellow',
#          },
          { DataType  => 'HCA',
            data      => { x=>$plot{graph}{GC}[0], y=>$plot{graph}{GC}[1] },
            ReprColor => 'blue',
          },
          { DataType  => 'HCA',
            data      => { x=>$plot{graph}{CpG}[0], y=>$plot{graph}{CpG}[1] },
            ReprColor => 'green',
          },
        ],
      scale => [
          { PlotNum  => 0,
            location => 'bottom',
          },
          { PlotNum  => 0,
            location => 'left',
          },
          { PlotNum  => 1,
            location => 'right',
          },
          { PlotNum  => 2,
            location => 'right',
          },
        ],
      );
    map {
      $_->{DataRange} = { x=>[0,$iSeqLen], y=>[-0.001] };
      $_->{ReprType} = 'line';
    } @{$graph{plot}};

    # create and save image
    $img{path} = $ProgOpt{-OutImg} || &PathUnique (-name=>&PrepOstump($pSeq,-stamp=>'_CpG').'#.png', -NoSize=>1, -touch=>1);
    unless (&Graph (\%graph, -save=>$img{path}, -timer=>$bTimer, -debug=>$dbg2)) {
      printf STDERR "ERROR: unable to save image to %s\n", $img{path}|"''";
      undef $img{path};
    } else {
      print  "image saved to $img{path}\n";
    }

  }  # end seq loop
}


################################################################################
# tuple analysis
################################################################################


# create tuple library
#
# INTERFACE
# - global options:
#   -debug      [STD]
#   -strands    [STD]
#   -TupleSize  [STD]
#
sub ProgTupleLib {
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my %TupleOpt;
  $TupleOpt{-debug} = $debug || undef;
  @TupleOpt{'-TupleMin','-TupleMax'} = @{$ProgOpt{-TupleSize}};
  $debug and printf STDERR "%s. tuple size range: %d..%d\n", &MySub,
    @{$ProgOpt{-TupleSize}};
  $TupleOpt{-symbol} = undef;  # = [ qw(A C G T) ];
  $TupleOpt{-strands} = $ProgOpt{-strands};

  # initialize tuple object, grow up tuple tree
  my $pTupTrie = SeqLab::SuffixTrie->new(%TupleOpt);
  while (my $pSeq = &SeqQueue()) {
    $debug and printf STDERR "%s  %s\n", $$pSeq{id}, substr($$pSeq{sequence},0,50);
    $pTupTrie->SeqScan ($$pSeq{sequence});
  }

  # output tuple object data
  my $PathStamp = &PrepOstump();
  my $PathOut = "$PathStamp.dat";
  my $hOut = FileHandle->new ($PathOut,'w');
  unless ($hOut) {
    die sprintf "ERROR: unable to write file %s\n", $PathOut||"''";
  }
  print "writing file $PathOut\n";
  $pTupTrie->DataPrint (-handle=>$hOut);

  # output tuple data tables
  for (my $i=$TupleOpt{-TupleMax}; $i>=($TupleOpt{-TupleMin}||1); $i--) {
    $PathOut = "$PathStamp.tab$i";
    unless ($hOut = FileHandle->new($PathOut,'w')) {
      die sprintf "ERROR: unable to write file %s\n", $PathOut||"''";
    }
    print "writing file $PathOut\n";
    $pTupTrie->LevelPrint($i,-handle=>$hOut);

    # calculate+output information content
    my $sum = $pTupTrie->LevelSum($i);
    my $pTupIdx = $pTupTrie->LevelIndex($i);
    my $entropy=0; my $log2=log(2);
    foreach (values %$pTupIdx) {
      my $freq = $_->{ct} / $sum;
      $entropy -= $freq * log($freq||1)/$log2;
    }
    print  $hOut "#\n";
    printf $hOut "# word entropy (bits): %.3f\n", $entropy;
    printf $hOut "# entropy per position (bits): %.3f\n", $entropy/$i;
  }
}


# calculate tuple diagnosis score table from two tuple frequency tables
#
# INTERFACE
# - global options:
#   -debug      [STD]
#   -strands    [STD]
#   -TupleSize  [STD]
#
sub ProgTupleDiffDiag {
  my (@TupleSrc); ($TupleSrc[1],$TupleSrc[2]) = @_;
  my ($debug, $dbg2, $verbose, $dbgverb, $hDbgverb,
      @OptAlpha, $SigCorr, $TupleSize);
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $verbose = $ProgOpt{-verbose};
  $dbgverb = $debug||$verbose;
  $hDbgverb = $verbose ? \*STDOUT : \*STDERR;
  @OptAlpha = $ProgOpt{-var}{alpha} ? (-alpha=>$ProgOpt{-var}{alpha}) : ();
  $SigCorr = $ProgOpt{-var}{corr};
  $TupleSize = $ProgOpt{-TupleSize}[1] || $ProgParam{default}{tuple}{SizeAnal};
  $debug and printf STDERR "%s. tuple size %d\n", &MySub, $TupleSize;

  ##############################################################################
  # tuple data input
  my ($BakOutStamp);
  my ($pTabA, $pColA, $TabCmtA, $pTabB, $pColB, $TabCmtB, $ValNA, $ValNB);
  my ($freq_exp, $pTransit);

  # ensure / create tuple libraries for input
  $BakOutStamp = $ProgOpt{-OutStump};
  foreach my $ItSrc (\$TupleSrc[1], \$TupleSrc[2]) {
    if (-r "$$ItSrc.tab".$TupleSize) {
      $debug and printf STDERR "%s. source $$ItSrc is already a tuple library (*.tab)\n", &MySub;
    }
    # create temporary tuple library from input sequences
    else {
      unless (-r $$ItSrc) {
        die sprintf "ERROR: unable to read input file %s (sequence?)\n"
          . "  or %s (tuple library table)\n", $$ItSrc, "$$ItSrc.tab".$TupleSize;
      }
      $debug and printf STDERR "%s. creating tuple library from sequence source $$ItSrc\n", &MySub;
      TupleDiffDiagTmp: {
        $ProgOpt{-OutStump} = $ProgParam{TmpManag}->Create();
        mkdir ($ProgOpt{-OutStump});
        $ProgOpt{-OutStump} .= '/tuple' . (($ItSrc eq \$TupleSrc[1])?'A':'B');
      }
      $ProgParam{store}{queue}{SeqIn}->ini($$ItSrc);
      &ProgTupleLib ($TupleSize);
      $$ItSrc = $ProgOpt{-OutStump};
    }
  }
  $ProgOpt{-OutStump} = $BakOutStamp;

  # load tuple libraries
  ($pTabA, $pColA, $TabCmtA) = &PlainToTable ($TupleSrc[1].'.tab'.$TupleSize,
    -TabType=>'AH', -comments=>1);
  $ValNA = &Sum(map{ $_->{ct} }@$pTabA);
  ($pTabB, $pColB, $TabCmtB) = &PlainToTable ($TupleSrc[2].'.tab'.$TupleSize,
    -TabType=>'AH', -comments=>1);
  $ValNB = &Sum(map{ $_->{ct} }@$pTabB);
  if (@$pTabA!=(4**$TupleSize) or @$pTabA!=@$pTabB) {
    die sprintf "ERROR: tables size (%d/%d) does not correspond to tuple size (%d)\n", &MySub,
      int @$pTabA, int @$pTabB, $TupleSize;
  }
  $freq_exp = 1 / 4**$TupleSize;
  $debug and printf STDERR "%s. tuple tables loaded, %d / %d data lines, %d / %d tuple counts\n", &MySub,
    int @$pTabA, int @$pTabB, $ValNA, $ValNB;

  ##############################################################################
  # correct scattering of tuple counts
  # A  correct global tuple frequency
  # B  correct transition frequency
  # C  correct hypergeometric vector
  my $time = (times)[0];
  my ($CtI,$pCol);

  # determine ratio of transition frequencies
  if (0) { }

  # scatter correction - mode A1: shift global frequency towards expected
  elsif ($SigCorr eq 'A1') {
    my ($pCol, $pTab);
    my ($pLine, $ValN, $ValP);
    foreach $pCol ($pColA, $pColB) {
      @$pCol = ( (grep{ $_!~m/^(freq_corr|transit_(wgt|sum|freq))$/ } @$pCol),
        qw(freq_exp freq_corr transit_wgt transit_sum transit_freq) );
    }
    foreach $pTab ($pTabA, $pTabB) {
      $ValN = &Sum(map{ $_->{ct} }@$pTab);
      foreach $pLine (@$pTab) {

        # shift frequency value towards expected
        if ($$pLine{freq} == ($$pLine{freq_exp}=$freq_exp)) {
          $$pLine{freq_corr} = $$pLine{freq};  # (= $freq_exp);
          next;
        }
        $debug and printf STDERR "%s. binomial significance, tuple %s\n", &MySub,
          $$pLine{tuple};
        $ValP = &BinomConfidP ($ValN, $$pLine{ct}, &Sign($$pLine{freq}-$freq_exp),
          -SuggDistP=>$freq_exp, @OptAlpha, -debug=>$dbg2);
        if (! defined($ValP)
        or  &Sign($ValP-$$pLine{freq}) != &Sign($freq_exp-$$pLine{freq})) {
          print  STDERR "ERROR in call of &BinomConfidP\n";
          next;
        }
        $$pLine{freq_corr} = (abs($ValP-$$pLine{freq}) > abs($freq_exp-$$pLine{freq})) ?
          $freq_exp : $ValP;
      }
    }

    # calculate transition frequencies, diagnostic scores
    &TupleTransit ($pTabA, -ColWgt=>'freq_corr', -debug=>$dbg2);
    &TupleTransit ($pTabB, -ColWgt=>'freq_corr', -debug=>$dbg2);
    for ($CtI=0; $CtI<@$pTabA; $CtI++) {
      push @$pTransit, {
        tuple => $$pTabA[$CtI]{tuple},
        transit_score => log($$pTabB[$CtI]{transit_freq}/$$pTabA[$CtI]{transit_freq}),
      };
    }
  }

  # scatter correction - mode A2: shift global frequency towards pairwise mean
  elsif ($SigCorr eq 'A2') {
    my ($pLineA, $pLineB, $freq_mean, $ValPA, $ValPB);
    foreach $pCol ($pColA, $pColB) {
      @$pCol = ( (grep{ $_!~m/^(topol|freq_corr|transit_(wgt|sum|freq))$/ } @$pCol),
        qw(topol freq_corr transit_wgt transit_sum transit_freq) );
    }
    for ($CtI=0; $CtI<@$pTabA; $CtI++) {
      $pLineA = $$pTabA[$CtI];
      $pLineB = $$pTabB[$CtI];

      # determine topology (for report purposes only)
      $$pLineA{topol} = $$pLineB{topol} = join (',',
        map{ $_->{str} } sort{ $a->{val}<=>$b->{val} }
        {str=>'x_A',val=>$$pLineA{freq}}, {str=>'x_B',val=>$$pLineB{freq}}, {str=>'E(x)',val=>$freq_exp} );

      # shift frequency value towards pairwise mean
      if ($$pLineA{freq} == $$pLineB{freq}) {
        $$pLineA{freq_corr} = $$pLineB{freq_corr} = $$pLineA{freq};
        next;
      }
      $freq_mean = ($$pLineA{ct}+$$pLineB{ct}) / ($ValNA+$ValNB);
      $debug and printf STDERR "%s. binomial significance, tuple %s\n", &MySub,
        $$pLineA{tuple};
      $ValPA = &BinomConfidP ($ValNA, $$pLineA{ct},
        &Sign($$pLineA{freq}-$freq_mean),
        -SuggDistP=>$freq_mean, @OptAlpha, -debug=>$dbg2);
      $ValPB = &BinomConfidP ($ValNB, $$pLineB{ct},
        &Sign($$pLineB{freq}-$freq_mean),
        -SuggDistP=>$freq_mean, @OptAlpha, -debug=>$dbg2);
      $debug and printf STDERR "%s. ratio changed:\n"
        . "  %s <--(mean:%s)--> %s\n  %s <--> %s\n", &MySub,
        $$pLineA{freq}, $freq_mean, $$pLineB{freq},
        $ValPA||'undef', $ValPB||'undef';
      if (!defined($ValPA) or !defined($ValPB)) {
        printf STDERR "ERROR in &BinomConfidP, %s%s%s\n",
          !defined($ValPA) ? 'val A':'',
          (!defined($ValPA) and !defined($ValPB)) ? ' and ':'',
          !defined($ValPB) ? 'val B':'';
        next;
      }
      if (&Sign($ValPA-$ValPB) != &Sign($$pLineA{freq}-$$pLineB{freq})) {
        $$pLineA{freq_corr} = $$pLineB{freq_corr} = $freq_mean;
      } else {
        $$pLineA{freq_corr} = $ValPA;
        $$pLineB{freq_corr} = $ValPB;
      }
      $debug and printf STDERR "  %s <--> %s\n",
        $$pLineA{freq_corr}, $$pLineB{freq_corr},
    }

    # calculate transition frequencies, diagnostic scores
    &TupleTransit ($pTabA, -ColWgt=>'freq_corr', -debug=>$dbg2);
    &TupleTransit ($pTabB, -ColWgt=>'freq_corr', -debug=>$dbg2);
    for ($CtI=0; $CtI<@$pTabA; $CtI++) {
      push @$pTransit, {
        tuple => $$pTabA[$CtI]{tuple},
        transit_score => log($$pTabB[$CtI]{transit_freq}/$$pTabA[$CtI]{transit_freq}),
      };
    }
  }

  # scatter correction - mode B1: shift transition frequency towards expected
  #  (from observable transitions, n=4)
  elsif ($SigCorr eq 'B1') { }

  # scatter correction - mode B2: transition frequency towards pairwise mean
  elsif ($SigCorr eq 'B2') { }

  # scatter correction - mode C*: consider transition cases as a hypergeometric
  # vector in the pool of possible hypergeometric vectors (set of cases, omega).
  # But, how can we order the hypergeometric vectors in a linear progression?
  # Use entropy? But, is it sufficient in terms of ambiguities?
  elsif ($SigCorr eq 'C') {
    # planned, but concrete architecture is still missing
  }

  # no scatter correction
  else {
    foreach $pCol ($pColA, $pColB) {
      @$pCol = ( (grep{ $_!~m/^transit_(wgt|sum|freq)$/ } @$pCol),
        qw(transit_wgt transit_sum transit_freq) );
    }
    &TupleTransit ($pTabA, -ColWgt=>(grep{ exists($$pTabA[0]{$_}) }qw(freq_corr freq))[0], -debug=>$dbg2);
    &TupleTransit ($pTabB, -ColWgt=>(grep{ exists($$pTabA[0]{$_}) }qw(freq_corr freq))[0], -debug=>$dbg2);
    for ($CtI=0; $CtI<@$pTabA; $CtI++) {
      push @$pTransit, {
        tuple => $$pTabA[$CtI]{tuple},
        transit_score => log($$pTabB[$CtI]{transit_freq}/$$pTabA[$CtI]{transit_freq}),
      };
    }
  }

  ##############################################################################
  # output diagnostic scores
  my ($PathStamp, $PathOut, $hOutTab);

  # tuple frequency table A - processed
  $debug and printf STDERR "%s. completed calculation of diagnostic scores\n", &MySub;
  foreach (\$TabCmtA, \$TabCmtB) {
    $$_ =~ s/(#\s*($reEndl))?\#\s*(column )?labels:?\s*($reEndl).*$//o;
    while ($$_ =~ s/($reEndl){2}/$sEndl/g) { }
  }

  # tuple frequency table A - processed
  $PathStamp = &PrepOstump();
  $PathOut = $PathStamp . '_tupleA.tab' . $TupleSize;
  printf "writing file %s\n", $PathOut;
  $hOutTab = FileHandle->new($PathOut,'w');
  printf $hOutTab "# %s -%s\n", $ProgFile, $ProgMode;
  printf $hOutTab "# date/time: %s\n", &TimeStr();
  printf $hOutTab "# E(p): %s\n", $freq_exp;
  printf $hOutTab ("#\n%s", $TabCmtA) if ($TabCmtA);
  printf $hOutTab "#\n# column labels:\n# %s\n", join ("\t", @$pColA);
  for ($CtI=0; $CtI<@$pTabA; $CtI++) {
    printf $hOutTab "%s\n", join ("\t", @{$$pTabA[$CtI]}{@$pColA});
  }
  # tuple frequency table B - processed
  $PathOut = $PathStamp . '_tupleB.tab' . $TupleSize;
  printf "writing file %s\n", $PathOut;
  $hOutTab = FileHandle->new($PathOut,'w');
  printf $hOutTab "# %s -%s\n", $ProgFile, $ProgMode;
  printf $hOutTab "# date/time: %s\n", &TimeStr();
  printf $hOutTab "# E(p): %s\n", $freq_exp;
  printf $hOutTab ("#\n%s", $TabCmtB) if ($TabCmtB);
  printf $hOutTab "#\n# column labels:\n# %s\n", join ("\t", @$pColB);
  for ($CtI=0; $CtI<@$pTabB; $CtI++) {
    printf $hOutTab "%s\n", join ("\t", @{$$pTabB[$CtI]}{@$pColB});
  }
  # table of diagnostic scores
  $PathOut = $PathStamp . '_diag.tab';
  printf "writing file %s\n", $PathOut;
  $hOutTab = FileHandle->new($PathOut,'w');
  my @ColTransit = qw(tuple transit_score);
  printf $hOutTab "# %s -%s\n", $ProgFile, $ProgMode;
  printf $hOutTab "# date/time: %s\n", &TimeStr();
  printf $hOutTab "# E(p): %s\n", $freq_exp;
  printf $hOutTab "# computation time: %s s\n", (times)[0]-$time;
  printf $hOutTab "#\n# column labels:\n# %s\n", join ("\t", @ColTransit);
  for ($CtI=0; $CtI<@$pTransit; $CtI++) {
    printf $hOutTab "%s\n", join ("\t", @{$$pTransit[$CtI]}{@ColTransit});
  }

  # tidy up done by manager of temporary files
}


# calculate transition frequency table for tuple frequency table
#
# INTERFACE
# - argument 1: reference to tuple table, table type "AH", column labels
#               tuple, freq (customisable)
#
# - options:
#   -ColWgt     column label for frequency values, default "freq"
#   -debug      [STD]
#
# - return val: reference to tuple table (physically identical to source),
#               columns added:
#               transit_wgt transit_sum transit_freq
#
# DEVELOPER'S NOTE
# - note that object library SeqLab::SuffixTrie also has an implementation
#   for calculating transition frequencies.
#
sub TupleTransit {
  my ($pTupleTab, %opt) = @_;
  my ($ColWgt, $debug, $TupleSize);
  $debug = $opt{-debug};
  $ColWgt = $opt{-ColWgt} || 'freq';
  $TupleSize = length ($$pTupleTab[0]{tuple});

  # create index for prefix tuples, with size TupleSize-1
  my (%IdxPrefix);
  foreach my $pLine (@$pTupleTab) {
    my $StrPrefix = substr($$pLine{tuple},0,$TupleSize-1);
    $IdxPrefix{$StrPrefix} ||= { case=>[], sum=>0, TupleStem=>$StrPrefix };
    push @{$IdxPrefix{$StrPrefix}{case}}, $pLine;
    $IdxPrefix{$StrPrefix}{sum} += $$pLine{$ColWgt};
  }

  # iterate over stem tuples, derive transition frequencies
  foreach my $ItPrefix (values %IdxPrefix) {
    foreach my $pLine (@{$$ItPrefix{case}}) {
      $$pLine{transit_wgt} = $$pLine{$ColWgt};
      $$pLine{transit_sum} = $$ItPrefix{sum};
      $$pLine{transit_freq} = $$pLine{transit_sum} ?
        $$pLine{transit_wgt} / $$pLine{transit_sum} : 0;
    }
  }

  return $pTupleTab;
}


# create tuple indices reflecting tuple distance case distribution
#
# INTERFACE
# - global options:
#   -debug      [STD]
#   -strands    [STD]
#   -TupleSize  [STD]
#
sub ProgTupleDist {
  my ($debug, $dbg2, %TupleOpt);
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  ##############################################################################
  # primary tuple analysis

  # initialize tuple object, grow up tuple tree
  $TupleOpt{-debug} = $debug || undef;
  @TupleOpt{'-TupleMin','-TupleMax'} = @{$ProgOpt{-TupleSize}};
  $TupleOpt{-symbol} = undef;  # = [ qw(A C G T) ];
  $TupleOpt{-strands} = $ProgOpt{-strands};
  my ($pTupTrie, $pActionEnter, $pActionCalc, $pSeq);
  $pTupTrie = SeqLab::SuffixTrie->new(%TupleOpt);
  $pActionEnter = sub {
    my ($pNodeDat, %opt) = @_;
    $$pNodeDat{ct} ++;
    push @{$$pNodeDat{pos}}, $opt{-pos};
    };

  # perform tuple analysis
  $pActionCalc = sub {
    my ($this,undef,undef,$pNode, %opt) = @_;
    unless (exists($$pNode{data}) and @{$$pNode{data}{pos}}) { return }
    for (my $i=1; $i<@{$$pNode{data}{pos}}; ++$i) {
      push @{$$pNode{data}{dist}}, $$pNode{data}{pos}[$i]-$$pNode{data}{pos}[$i-1];
    }
    $$pNode{data}{pos} = [];
    };
  while ($pSeq = &SeqQueue()) {
    $debug and printf STDERR "%s  %s%s\n", $$pSeq{id},
      substr($$pSeq{sequence},0,&Min(length($$pSeq{sequence}),50)),
      (length($$pSeq{sequence})>50)?'...':'';
    $pTupTrie->SeqScan ($$pSeq{sequence}, -action=>$pActionEnter);
    $pTupTrie->NodeIterat (-onAll=>$pActionCalc, -LevelLimit=>$TupleOpt{-TupleMax});
  }

  ##############################################################################
  # output results
  my ($PathStamp, $PathOut, $hOut, $pActionNode);

  $PathStamp = &PrepOstump();

  # debug output of tuple object data
  if ($debug) {
    $PathOut = "$PathStamp.dat";
    unless ($hOut = FileHandle->new($PathOut,'w')) {
      die sprintf "ERROR: unable to write file %s\n", $PathOut||"''";
    }
    printf "writing file $PathOut\n";
    $pTupTrie->DataPrint(-handle=>$hOut);
  }

  # final calculations at tuple nodes
  # anonymous function for output at node level
  my $ThreshSlcfar = 20;
  $pActionNode = sub {
    my ($TupleVal, $pNodeDat, %opt) = @_;
    my ($FreqMean, $FreqRelSlcfar, $DistMax, %DistIdx, %ProbIdx);
    my $hOut = $opt{-handle};

    # sum up counts
    # - $$pNodeDat{ct} is the sum of occurrences for the current tuple.
    # - int(@{$$pNodeDat{dist}}) is the sum of distance cases for the current
    #     tuple.
    #   int(@{$$pNodeDat{dist}}) / $$pNodeDat{ct} should be near to 1
    # - $opt{-sum} is the sum of all tuple instances
    $$pNodeDat{ctdist} = int @{$$pNodeDat{dist}};
    $FreqMean = $$pNodeDat{ct} / $opt{-sum};

    # combine hits to classes referring to individual distance values
    foreach (@{$$pNodeDat{dist}}) { $DistIdx{$_} ++; }
    $DistMax = &Max (keys %DistIdx);

    # statistical test measures against equal distribution
    # - avoid division by zero!
    #   default measures for NULL or near-NULL samples
    $$pNodeDat{meandevsqr} = $$pNodeDat{meandevsqr_slcfar} = 1.0;
    $$pNodeDat{ctqual} = 0;
    if ($$pNodeDat{ctdist}) {

      # statistical test measure against expected distribution:
      # mean deviation square, normalized versus expected distribution
      # - calculate for every position: (E(x)-x)^2 / Var(x)
      #   with: E(x) = P(x) Q(x) N
      #         Var(x) = P(x) Q(x) N =~ P(x) N = E(x)
      #         P(x) = q^(l-1) * p    and Q(x) = 1 - P(x)
      $ProbIdx{1} = $FreqMean;
      for (2 .. &Min(2/$FreqMean,4**$opt{-tsize})) {
        $ProbIdx{$_} = $ProbIdx{$_-1} * (1-$FreqMean);
      }
      $$pNodeDat{meandevsqr} = &SampleMean ( [ map {
        ($DistIdx{$_} - $$pNodeDat{ctdist}*$ProbIdx{$_}) ** 2
          / ($$pNodeDat{ctdist}*$ProbIdx{$_});
        } 1 .. &Min(2/$FreqMean,4**$opt{-tsize}) ] );

      # statistical test measure against expected distribution:
      # mean deviation square, normalized versus expected distribution
      #   OMITTING the clumping range of distance 1..$ThreshSlcfar
      $FreqRelSlcfar = &Sum (map{ $DistIdx{$_} } (($ThreshSlcfar+1)..$DistMax))
        / $$pNodeDat{ctdist};
        # approximate tuple frequency if clumped occurrences are neglected
      $ProbIdx{1} = $FreqMean*$FreqRelSlcfar;
      for (2 .. &Min(2/$FreqMean,4**$opt{-tsize})) {
        $ProbIdx{$_} = $ProbIdx{$_-1} * (1 - $FreqMean*$FreqRelSlcfar);
      }
      if ($FreqRelSlcfar) {
        $$pNodeDat{meandevsqr_slcfar} = &SampleMean ( [ map {
          ($DistIdx{$_} - $$pNodeDat{ctdist}*$ProbIdx{$_}) ** 2
            / ($$pNodeDat{ctdist}*$ProbIdx{$_});
          } ($ThreshSlcfar+1) .. &Min(2/$FreqMean,4**$opt{-tsize}) ] );
      }

      # sample score derived from a saturation function, depending on
      # number of counts / frequency
      # - function to get saturation constant from tuple frequency was found
      #   by trial and error
      if ($FreqRelSlcfar) {
        my $cSatur = 0.10 * 1/($FreqMean*$FreqRelSlcfar);
        $$pNodeDat{ctqual} = (1 * $$pNodeDat{ctdist}*$FreqRelSlcfar) /
          ($cSatur + $$pNodeDat{ctdist}*$FreqRelSlcfar);
      }
    }

    # output table line
    printf $hOut "%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\n", $TupleVal,
      $FreqMean, $$pNodeDat{ct}, $$pNodeDat{ctdist}, $$pNodeDat{ctqual},
      $$pNodeDat{meandevsqr}, $$pNodeDat{meandevsqr_slcfar},
      $$pNodeDat{meandevsqr}/$$pNodeDat{meandevsqr_slcfar}*$$pNodeDat{ctqual},
        $$pNodeDat{meandevsqr_slcfar}*($$pNodeDat{meandevsqr_slcfar}/$$pNodeDat{meandevsqr})*$$pNodeDat{ctqual},
      join (',', map{ sprintf('%d=%d',$_,$DistIdx{$_}) }
        sort{ $a<=>$b } grep{ $_ } keys %DistIdx);
  };

  # invoke output of tuple data tables
  for (my $CtI=$TupleOpt{-TupleMax}; $CtI>=($TupleOpt{-TupleMin}||1); $CtI--) {
    $PathOut = "$PathStamp.tab$CtI";
    unless ($hOut = FileHandle->new($PathOut,'w')) {
      die sprintf "ERROR: unable to write file %s\n", $PathOut||"''";
    }
    printf "writing file $PathOut\n";
    $pTupTrie->LevelPrint ($CtI, -handle=>$hOut, -action=>$pActionNode,
      -TabLabel=>'table of tuple distance cases',
      -column=>[qw(tuple freq ct ctdist ctqual meandevsqr meandevsqr_slcfar signif_crowd signif_else distlist)],
        # there is some external code depending on the field label "distlist"
     );
  }
}


# create tuple indices reflecting positional crowding significance
#
# INTERFACE
# - global options:
#   -debug      [STD]
#   -strands    [STD]
#   -TupleSize  [STD]
#
sub ProgTuplePos {
  my ($debug, $dbg2, %TupleOpt);
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $TupleOpt{-debug} = $debug || undef;
  @TupleOpt{'-TupleMin','-TupleMax'} = @{$ProgOpt{-TupleSize}};
  $TupleOpt{-symbol} = undef;  # = [ qw(A C G T) ];
  $TupleOpt{-strands} = $ProgOpt{-strands};

  my ($pTupTrie, $pSeq, @SeqDens, $SeqDensIntegr);
  my ($pActionNode);

  # initialize tuple object, grow up tuple tree
  # build table of sequence density
  $pTupTrie = SeqLab::SuffixTrie->new(%TupleOpt);
  $pActionNode = sub {
    my ($pNodeDat, %opt) = @_;
    $$pNodeDat{ct} ++;
    $opt{-pos} or warn "WARNING: got position 0";
    push @{$$pNodeDat{pos}}, $opt{-pos};
    };
  while ($pSeq = &SeqQueue()) {
    $debug and printf STDERR "%s  %s%s\n", $$pSeq{id},
      substr($$pSeq{sequence},0,&Min(length($$pSeq{sequence}),50)),
      (length($$pSeq{sequence})>50)?'...':'';
    $pTupTrie->SeqScan ($$pSeq{sequence}, -action=>$pActionNode);
    foreach my $CtI (1 .. (length($$pSeq{sequence})-$TupleOpt{-TupleMax}+1)) {
      $SeqDens[$CtI] ++;
    }
  }
  # total (effective) sequence density
  # wobbles in sequence will disturb this calculus (rely on SeqLab::SuffixTrie)
  $SeqDensIntegr = &Sum (@SeqDens);

  # has there been any sequence?
  if (! $SeqDensIntegr) {
    die "ERROR: no (effective) sequences found in input\n";
  }

  ##############################################################################
  # output results
  my ($hOut);
  my $PathStamp = &PrepOstump();

  # save table of sequence density
  my $PathOut = "${PathStamp}_SeqDens.tab";
  unless ($hOut = FileHandle->new($PathOut,'w')) {
    die sprintf "ERROR: unable to write file %s\n", $PathOut||"''";
  }
  print  "writing file $PathOut\n";
  foreach my $CtI (1 .. $#SeqDens) {
    printf $hOut "$CtI\t$SeqDens[$CtI]\n";
  }

  # debug output of tuple object data
  if ($debug) {
    $PathOut = "$PathStamp.dat";
    unless ($hOut = FileHandle->new($PathOut,'w')) {
      die sprintf "ERROR: unable to write file %s\n", $PathOut||"''";
    }
    printf "writing file $PathOut\n";
    $pTupTrie->DataPrint(-handle=>$hOut);
  }

  # final calculations at tuple nodes
  # anonymous function for output at node level
  # - $$pNodeDat{ct} is the sum of occurrences for the current tuple.
  # - $opt{-sum} is the sum of all tuple instances
  $pActionNode = sub {
    my ($TupleVal, $pNodeDat, %opt) = @_;
    my ($FreqMean, %PosIdx);
    my $hOutTab = $opt{-handle};

    # overall tuple frequency
    $FreqMean = $$pNodeDat{ct} / $opt{-sum};

    # combine hits to classes referring to individual position values
    foreach (@{$$pNodeDat{pos}}) { $PosIdx{$_} ++; }

    # statistical test measures against equal distribution
    # - circumvent division by zero
    if ($$pNodeDat{ct}) {

      # statistical test measure against equal distribution:
      # mean deviation square, normalized versus expected distribution
      # - calculate for every position: (E(x)-x)^2 / Var(x)
      #   with: E(x) = p N
      #         Var(x) = p q N =~ p N = E(x)
      $$pNodeDat{meandevsqr} = &SampleMean ( [ map {
        ($PosIdx{$_} - $FreqMean*$SeqDens[$_]) ** 2
          / ($FreqMean*$SeqDens[$_]);
        } 1 .. $#SeqDens ] );

      # statistical test measure against equal distribution:
      # mean co-deviation of neighbored positions, normalized versus
      #   expected distribution
      # - calculate for every position: (E(x)-x_i)*(E(x)-x_i+1) / Var(x)
      $$pNodeDat{meancodev} = &SampleMean ( [ map {
          ($PosIdx{$_-1} - $FreqMean*$SeqDens[$_])
        * ($PosIdx{$_}   - $FreqMean*$SeqDens[$_])
        / ($FreqMean*$SeqDens[$_]);
        } 2 .. $#SeqDens ] );

    } else {
      $$pNodeDat{meandevsqr} = $$pNodeDat{meancodev} = 1.0;
    }

    # output table line
    printf $hOutTab "%s\t%d\t%s\t%s\t%s\t%s\n", $TupleVal,
      $$pNodeDat{ct}, $FreqMean,
      $$pNodeDat{meandevsqr}, $$pNodeDat{meancodev},
      join (',', map{ sprintf('%d=%d',$_,$PosIdx{$_}); }
        sort{ $a<=>$b } keys %PosIdx);
  };

  # invoke output of tuple data tables
  for (my $CtI=$TupleOpt{-TupleMax}; $CtI>=($TupleOpt{-TupleMin}||1); $CtI--) {
    $PathOut = "$PathStamp.tab$CtI";
    unless ($hOut = FileHandle->new($PathOut,'w')) {
      die sprintf "ERROR: unable to write file %s\n", $PathOut||"''";
    }
    print  "writing file $PathOut\n";
    $pTupTrie->LevelPrint ($CtI, -handle=>$hOut, -action=>$pActionNode,
      -column=>[qw(tuple ct freq meandevsqr meancodev pos)]);
        # there is some external code depending on the field label "pos"
  }
}


# find positionally significant tuples and translate them to PWM
#
# INTERFACE
# - global options:
#   -debug      [STD]
#   -strands    [STD]
#   -TupleSize  [STD]
#
# DEBUG, CHANGES, ADDITIONS
# - The very last step of creating a PWM is not yet performed.
#
sub ProgTupleFocus {

  # function constants
  my $signif = 0.2;

  # function parameters
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $PathStamp = &PrepOstump();
  my (%img,%path,$hOut);
  $img{width} = $ProgOpt{-OutImgWidth} || $ProgParam{default}{OutImgWidth};
  $img{RelHigh} = $ProgOpt{-OutImgRelhigh} || $ProgParam{default}{OutImgRelhigh}{TupleFocus};
  my ($CtPeakShift, %GaussProfile, $SumIntegr, $bGaussRedo);
  my ($pTuple, %TuplePosidx);

  # function parameters - Gauss approximation
  # later, effective positions are calculated as: $ItPos + $CtPeakShift + $CtI
  # $ItPos        iteration through sequence
  # $CtPeakShift  shift of Gauss peak
  # $CtI          relative position in Gauss curve
  my $GaussSigma = $ProgOpt{-WinSize} || $ProgParam{default}{WinSize}{TupleFocus};
  $GaussSigma /= 2;
  $img{StepSizeGauss} = ($GaussSigma<10) ? [0,0.5] : [0];
  $debug and printf STDERR "%s. Gaussian profile: window size %s, peak shifts %s\n", &MySub,
    $GaussSigma, join(', ',@{$img{StepSizeGauss}});
  foreach $CtPeakShift (@{$img{StepSizeGauss}}) {
    my $CtI;
    $SumIntegr = 0;
    if ($CtPeakShift == 0) {
      $SumIntegr = $GaussProfile{$CtPeakShift}{0} =
        &GaussQuant ((0+0.5) / $GaussSigma) - 0.5;
      $GaussProfile{$CtPeakShift}{0} *= 2;
      $CtI = 1;
    } else {
      $CtI = 0.5;
    }
    for (; $SumIntegr<0.49; $CtI+=1.0) {
      $GaussProfile{$CtPeakShift}{$CtI} = $GaussProfile{$CtPeakShift}{-$CtI} =
        &GaussQuant (($CtI+0.5) / $GaussSigma) - 0.5 - $SumIntegr;
      $SumIntegr += $GaussProfile{$CtPeakShift}{$CtI};
      $debug and printf STDERR "%s. Gaussian profile: peak shift %s, step %s, integral add %s, integral %s\n", &MySub,
        $CtPeakShift, $CtI, $GaussProfile{$CtPeakShift}{$CtI}, $SumIntegr;
    }
  }
  $debug and &DataPrint (\%GaussProfile, -handle=>\*STDERR);

  ##############################################################################
  # analyse local tuple occurrences, sample compared to randomized reference
  if (! $ProgOpt{-var}{PosDone}) {

    # calculation of tuple significance
    &ProgTuplePos();

    # calculation of tuple significance on randomized input
    $ProgParam{store}{queue}{SeqIn}->Push(
      join (' ', $CorePath{call}{SeqRandomize}, @ProgArg, '|'));
    if ($ProgOpt{-OutStump}) {
      $ProgOpt{-OutStampBak} = $ProgOpt{-OutStump};
    }
    $ProgOpt{-OutStump} = $PathStamp.'_MtCarlo';
    &ProgTuplePos();
    if ($ProgOpt{-OutStampBak}) {
      $ProgOpt{-OutStump} = $ProgOpt{-OutStampBak};
      delete $ProgOpt{-OutStampBak};
    }
  }

  ##############################################################################
  # significance borders for tuple distribution properties

  # load sequence density table
  my $pSeqDens = &PlainToTable ($PathStamp.'_SeqDens.tab', -TabType=>'AA', -comments=>1);
  printf "reading file %s\n", $PathStamp.'_SeqDens.tab';

  # determine border of significance
  foreach my $CtTuple (($ProgOpt{-TupleSize}[0]||1)..$ProgOpt{-TupleSize}[1]) {

    # load Monte Carlo simulation
    my $pMCarlo = &PlainToTable ($PathStamp.'_MtCarlo.tab'.$CtTuple,
      -TabType=>'AH', -comments=>1);

    # significance measures
    my $pStat = {};
    $$pStat{SignifThresh} = &GaussLimit ($signif / 4**$CtTuple);
    $$pStat{meandevsqr} = &SampleMetrics ( [map{ $_->{meandevsqr} } @$pMCarlo], -median=>1);
    $$pStat{meandevsqr}{SignifThresh} = $$pStat{meandevsqr}{mean}
      + $$pStat{meandevsqr}{s} * $$pStat{SignifThresh};
    $$pStat{meancodev} = &SampleMetrics ( [map{ $_->{meancodev} } @$pMCarlo], -median=>1);
    $$pStat{meancodev}{SignifThresh} = $$pStat{meancodev}{mean}
      + $$pStat{meancodev}{s} * $$pStat{SignifThresh};

    $path{stat} = sprintf '%s_MtCarlo_Stat_%d.dat', $PathStamp, $CtTuple;
    unless ($hOut = FileHandle->new($path{stat},'w')) {
      die sprintf "ERROR: unable to write file %s\n", $path{stat};
    }
    printf "writing file %s\n", $path{stat};
    &DataPrint ($pStat, -handle=>$hOut);

  ##############################################################################
  # select and analyse significant tuples

    # significant tuples - sample them
    $pTuple = [ grep {
      $_->{meandevsqr} > $$pStat{meandevsqr}{SignifThresh} and
      $_->{meancodev} > $$pStat{meancodev}{SignifThresh}
      } @{ &PlainToTable ("$PathStamp.tab$CtTuple", -TabType=>'AH', -comments=>1) } ];
    unless (@$pTuple) {
      print "no significant tuples for size $CtTuple\n";
      next;
    }

    # significant tuples - analyse in detail
    mkdir ($path{DirTuple} = $PathStamp.'_TupleDet_'.$CtTuple);
    printf "writing dir %s\n", $path{DirTuple};
    foreach my $ItTuple (@$pTuple) {
      $$ItTuple{meandevsqr} = &nearest (
        10 ** int(log($$pStat{meandevsqr}{SignifThresh})/log(10)-4),
        $$ItTuple{meandevsqr});
      $$ItTuple{meancodev} = &nearest (
        10 ** int(log($$pStat{meandevsqr}{SignifThresh})/log(10)-4),
        $$ItTuple{meancodev});

      my %graph = (
        BgTranspar => $ProgOpt{-OutImgTransp},
        plot => [
          { DimPixel     => { x=>$img{width} },
            HeightRel    => $img{RelHigh},
            DataType     => 'AH',               # obs. frequency
            data         => [ ],
            DataRange    => { x=>[0], y=>[0] },
            ReprType     => 'line',
            ReprColor    => 'red',
          },
          { DataType     => 'AH',               # mean frequency
            data         => [ ],
            DataRangeRef => 0,
            ReprType     => 'line',
            ReprColor    => 'black',
          },
          { DataType     => 'AH',               # SD of frequency
            data         => [ ],
            DataRangeRef => 0,
            ReprSD       => 1,
            ReprType     => 'line',
            ReprColor    => 'grey25',
          },
          { DataType     => 'AH',               # Gauss approximation of frequency II
            data         => [ ],
            DataRangeRef => undef,
            ReprType     => 'line',
            ReprColor    => 'midblue',
          },
          { DataType     => 'AH',               # Gauss approximation of frequency I
            data         => [ ],
            DataRangeRef => undef,
            ReprType     => 'line',
            ReprColor    => 'lightblue',
          },
          ],
        scale => [
          { PlotNum  => 0,
            location => 'x',
            color    => 'black',
          },
          { PlotNum  => 0,
            location => 'left',
            color    => 'black',
          },
          { PlotNum  => 1,
            location => 'right',
            color    => 'black',
          },
          ],
        );

      # positional distribution of frequency vs. expected frequency
      #   (repeat work from &ProgTuplePos)
      %TuplePosidx = map{ split(/=/,$_) } split(/,/,$$ItTuple{pos});
      foreach my $ItPos (@$pSeqDens) {
        push @{$graph{plot}[0]{data}}, {
          x  => $$ItPos[0],
          y  => $TuplePosidx{$$ItPos[0]} / $$ItPos[1],
          };
        push @{$graph{plot}[1]{data}}, {
          x  => $$ItPos[0],
          y  => $$ItTuple{freq},
          };
        push @{$graph{plot}[2]{data}}, {
          x  => $$ItPos[0],
          y  => $$ItTuple{freq},
          SD => sqrt ($$ItTuple{freq} * $$ItPos[1]) / $$ItPos[1],
          };
      }
      $graph{plot}[0]{DataRange}{y}[1] = &Max (map {
        ($graph{plot}[0]{data}[$_]{y},$graph{plot}[2]{data}[$_]{y}+$graph{plot}[2]{data}[$_]{SD})
        } map{ $_->[0] } @$pSeqDens);
      $graph{plot}[0]{DataRange}{y}[0] = -$graph{plot}[0]{DataRange}{y}[1] / 100;

      # find Gauss peak
      foreach my $ItPos (@$pSeqDens) {
        foreach $CtPeakShift (@{$img{StepSizeGauss}}) {
          $SumIntegr = 0;
          foreach my $CtI (keys %{$GaussProfile{$CtPeakShift}}) {
            my $ItPos2 = $$ItPos[0] + $CtPeakShift + $CtI;
            if ($ItPos2 < 0 or $ItPos2 > $$pSeqDens[-1][0]) { next }
            $SumIntegr += $GaussProfile{$CtPeakShift}{$CtI}
              * ($TuplePosidx{$ItPos2} / $$pSeqDens[$ItPos2-1][1] - $$ItTuple{freq})
              / (sqrt ($$ItTuple{freq} * $$pSeqDens[$ItPos2-1][1]) / $$pSeqDens[$ItPos2-1][1]);
          }
          push @{$graph{plot}[4]{data}}, {
            x => $$ItPos[0]+$CtPeakShift,
            y => $SumIntegr,
            };
        }
      }
      # define initial Gauss my as maximum frequency
      $$ItTuple{GaussMy0} = $$ItTuple{GaussMy} = (sort{ $b->{y}<=>$a->{y} }
        @{$graph{plot}[4]{data}} )[0]->{x};
      $$ItTuple{GaussSigma} = $GaussSigma;
      # iterate Gaussian approximation
      {
        $$ItTuple{bgoff} =
          &Max ($$ItTuple{bgoff}, $$ItTuple{GaussMy} + 3*$$ItTuple{GaussSigma});
        $$ItTuple{freqbg} =
          &Sum (map{ $TuplePosidx{$_} } $$ItTuple{bgoff} .. $#$pSeqDens)
          / (&Sum (map{ $_->[1] }grep{ $_->[0]>=$$ItTuple{bgoff} }@$pSeqDens) || 0.5);
        $$ItTuple{GaussSample} = &SampleMetricsWgt ( [ grep{ $_->[1]>0 }map{
          [ $_->[0], ($TuplePosidx{$_->[0]}/($_->[1]||0.1) - $$ItTuple{freq})
            / ((sqrt($$ItTuple{freq}*$_->[1])||0.1)/($_->[1]||0.1)) ];
          } map{ $$pSeqDens[$_-1] }
            &Max (1, int($$ItTuple{GaussMy}-2.5*$$ItTuple{GaussSigma})) ..
            &nearest_ceil (1, $$ItTuple{GaussMy}+2.5*$$ItTuple{GaussSigma}) ] );
        $$ItTuple{GaussCycle} ++;
        $bGaussRedo = 0;
        if (abs($$ItTuple{GaussSample}{mean}-$$ItTuple{GaussMy}) > 0.1 or
          abs(log($$ItTuple{GaussSample}{s}/$$ItTuple{GaussSigma})) > log(1.05)
        ) {
          if ($$ItTuple{GaussCycle} >= 10) {
            $debug and printf STDERR "%s. terminating Gauss approximation for tuple %s after %d cycles\n", &MySub,
              $$ItTuple{tuple}, $$ItTuple{GaussCycle};
          } else {
            $bGaussRedo = 1;
          }
        }
        $$ItTuple{GaussMy} = sprintf ('%.4f', $$ItTuple{GaussSample}{mean});
        $$ItTuple{GaussSigma} = sprintf ('%.4f', $$ItTuple{GaussSample}{s});
        $bGaussRedo and redo;
      }
      push @{$graph{plot}[3]{data}}, { x=>0, y=>0 };
      for (
        my $CtI=&Max(1,int($$ItTuple{GaussMy}-2.5*$$ItTuple{GaussSigma}));
        $CtI<=&nearest_ceil(1,$$ItTuple{GaussMy}+2.5*$$ItTuple{GaussSigma});
        $CtI+=0.5
      ) {
        push @{$graph{plot}[3]{data}}, {
          x => $CtI,
          y => &GaussVal($$ItTuple{GaussMy},$$ItTuple{GaussSigma},$CtI),
          };
      }
      push @{$graph{plot}[3]{data}}, { x=>$$pSeqDens[-1][0], y=>0 };

      # combine all plots into image
      # $#{$graph{plot}} = 2;
      $path{imgdata} = sprintf '%s/%d_%s_distrib_img.dat',
        $path{DirTuple}, $CtTuple, $$ItTuple{tuple};
      if ($hOut = FileHandle->new($path{imgdata},'w')) {
        printf "writing plot data to file %s\n", $path{imgdata};
      } else {
        die sprintf "%s. ERROR: unable to write file %s\n", &MySub, $path{imgdata};
      }
      &DataPrint (\%graph, -handle=>$hOut);
      $path{img} = sprintf '%s/%d_%s_distrib_img.png',
        $path{DirTuple}, $CtTuple, $$ItTuple{tuple};
      unless (&Graph (&DataClone(\%graph), -save=>$path{img}, -debug=>$dbg2)) {
        die sprintf "ERROR: unable to write image to %s\n", $path{img}|"''";
      }
      printf "writing image to file %s\n", $path{img}|"''";
      $graph{plot}[0]{DataRange}{x} = $graph{plot}[3]{DataRange}{x} = $graph{plot}[4]{DataRange}{x} = [
        &Max (0, $$ItTuple{GaussMy}-5*$$ItTuple{GaussSigma}),
        $$ItTuple{GaussMy} + 5*$$ItTuple{GaussSigma} ];
      $path{imgdet} = sprintf '%s/%d_%s_distrib_detail.png',
        $path{DirTuple}, $CtTuple, $$ItTuple{tuple};
      if (&Graph (\%graph, -save=>$path{imgdet}, -debug=>$dbg2)) {
        printf "writing image to file %s\n", $path{imgdet};
      } else {
        printf STDERR "ERROR: unable to write image to %s\n", $path{imgdet};
      }
    }

    my $pColLabel = [qw(tuple freq freqbg meandevsqr meancodev GaussMy0 GaussMy GaussSigma GaussCycle)];
    $path{overvw} = sprintf '%s_TupleOverv_%d.tab', $PathStamp, $CtTuple;
    &WriteFile ($path{overvw},
      sprintf ("# column labels:\n# %s\n", join("\t",@$pColLabel))
      . join ('', map{ sprintf ("%s\n", join("\t",@{$_}{@$pColLabel})) }
        sort{ $b->{meandevsqr}<=>$a->{meandevsqr} } @$pTuple));
  }
}


# plot tuple score over range of sequence
#
# INTERFACE
# - argument 1:   path of tuple library
# - argument 2:   tuple size
#
# - global options:
#   -debug        [STD]
#   -OutImg       [STD]
#   -OutImgWidth  [STD]
#   -OutTab       [STD]
#   -OutTabFmt    [STD]
#   -TupleSize    [STD]
#   --col         ...
#   --GC          ...
#   --log         ...
#
sub ProgTupleScore {

  # function constants
  my %GcMatrix = ( A=>0,C=>1,G=>1,T=>0, a=>0,c=>1,g=>1,t=>0 );

  # function parameters
  my ($PathArg) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $ColScore = $ProgOpt{-var}{col};
  my $bGC = $ProgOpt{var}{GC} and $ProgOpt{-OutImg}=1;
  my $bLog = $ProgOpt{-var}{log};
  my %TupleOpt; $TupleOpt{-debug} = $dbg2 || undef;
  @TupleOpt{'-TupleMin','-TupleMax'} = @{$ProgOpt{-TupleSize}};
  my $TupleSize = $TupleOpt{-TupleMax};

  # image parameters
  my %img;
  if ($ProgOpt{-OutImg}) {
    $img{width} = $ProgOpt{-OutImgWidth} || $ProgParam{default}{OutImgWidth};
    $img{RelHigh} = $ProgOpt{-OutImgRelhigh} || $ProgParam{default}{OutImgRelhigh}{TupleScore};
  }

  # load tuple table
  my ($PathTab,$pTab,$pCol,$bLower);
  if (-r ($PathTab=$PathArg) or -r ($PathTab=$PathArg.'.tab') or -r ($PathTab=$PathArg.'.tab'.$TupleSize)) {
    $debug and printf STDERR "%s. reading tuple library file %s (table)\n", &MySub, $PathTab||"''";
    ($pTab,$pCol) = &PlainToTable ($PathTab, -TabType=>'HIH', -comments=>1);
    $debug and printf STDERR "  %d data lines\n", int keys(%$pTab);
    my ($Tuple1st) = keys %$pTab;
    $debug and printf STDERR "  first entry complete: (%s)\n", join(',',%{$$pTab{$Tuple1st}});
    if (length($Tuple1st) != $TupleSize) {
      die sprintf "ERROR: tuple library from file %s has inconsistent tuple size (%d instead of %d)\n",
        $PathTab, length($Tuple1st), $TupleSize;
    }
    if (uc($Tuple1st) ne $Tuple1st) {
      warn "WARNING: lower-case letters in tuple library\n";
      $bLower = 1;
    }
  } else {
    die sprintf "ERROR: unable to find tuple library %s\n", $PathArg||"''";
  }

  # which column contains the tuple scores?
  unless ($ColScore) {
    my @ColOrder = qw(bayes_score transit_score score freq_corr freq ct);
    my %ColIdx = map{ ($_=>1) } @$pCol;
    ($ColScore) = grep{ $ColIdx{$_} } @ColOrder;
    unless ($ColScore) {
      print  STDERR "ERROR: unable to find column for score values\n";
      printf STDERR "  available: %s\n", join(' ',@$pCol);
      printf STDERR "  searched: %s\n", join(' ',@ColOrder);
      exit 1;
    }
    $debug and printf STDERR "%s. column label for tuple score: %s\n", &MySub, $ColScore;
  }
  # turn tuple scores to logarithm
  if ($bLog) {
    foreach (values %$pTab) {
      $_->{$ColScore} = log($_->{$ColScore}) / log($bLog);
    }
  }

  ##############################################################################
  # analyse sequences for G/C and tuples

  # loop over sequences
  my ($pSeq,@ScoreGc,$poPlotGc);
  while ($pSeq = &SeqQueue()) {
    my $iSeqLen = length $$pSeq{sequence};

    # determine local G+C on demand
    if ($bGC) {
      @ScoreGc = (undef);
      for (my $CtI=0; $CtI<$iSeqLen; $CtI++) {
        push @ScoreGc, $GcMatrix{substr($$pSeq{sequence},$CtI,1)};
      }
      $poPlotGc = Math::Plot2D->new(\@ScoreGc,-TabType=>'A1y',-extrapolate=>'mean');
    }

    # determine local tuple scores
    # I need to force the sequence slice into the same letter case as that of
    # the tuple library
    my @ScoreTuple = (undef);
    for (my $CtI=0; $CtI+$TupleSize<=$iSeqLen; $CtI++) {
      my $t=substr($$pSeq{sequence},$CtI,$TupleSize);
      if($bLower){ $t=lc($t) } else{ $t=uc($t) }
      push @ScoreTuple, $$pTab{$t}{$ColScore};
    }
    my $poPlotScore = Math::Plot2D->new(\@ScoreTuple,-TabType=>'A1y',-extrapolate=>'mean');

  ##############################################################################
  # prepare image of smoothened plot
    if ($ProgOpt{-OutImg}) {

      # smoothening parameters
      $img{SmoothWin}  = $ProgOpt{-WinSize} || $ProgParam{default}{WinSize}{TupleScore};
      $img{SmoothStep} = 0.5 * $iSeqLen / $img{width};

      # construct graph image
      my %graph = (
        BgTranspar => $ProgOpt{-OutImgTransp},
        plot => [
            { DimPixel  => { x=>$img{width}, y=>0 },
              HeightRel => $img{RelHigh},
              DataType  => 'AA',
                           # smoothened plot
              data      => $poPlotScore->SmoothPlot($img{SmoothStep},-window=>$img{SmoothWin},-debug=>$dbg2),
              DataRange => { x=>[0,$iSeqLen] },
              ReprType  => 'line',
              ReprColor => 'black',
            }
          ],
        scale => [
            { PlotNum  => 0,
              location => 'bottom',
            },
            { PlotNum  => 0,
              location => 'left',
            },
            { PlotNum  => $bGC ? 1 : 0,
              location => 'right',
            },
          ],
        );
      $bGC and push @{$graph{plot}}, {
        DataType  => 'AA',
                     # smoothened plot
        data      => $poPlotGc->SmoothPlot($img{SmoothStep},-window=>$img{SmoothWin},-debug=>$dbg2),
        DataRange => { x=>[0,$iSeqLen] },
        ReprType  => 'line',
        ReprColor => 'blue',
        };
      $img{path} = $ProgOpt{-OutImg} || &PathUnique (-name=>&PrepOstump($pSeq,-stamp=>'_TupleScore') .'#.png', -NoSize=>1, -touch=>1);
      if (&Graph (\%graph, -save=>$img{path}, -timer=>$ProgOpt{-timer}, -debug=>$dbg2)) {
        print  STDERR "saving image to $img{path}\n";
      } else {
        printf STDERR "ERROR: unable to save image to %s\n", $img{path}|"''";
        undef $img{path};
      }
    }

  ##############################################################################
  # do tabular output
  # - this must run after the image passage, cause we reference the image file name
    $ProgOpt{-OutTabFmt} ||= '';

    &OutTabIntro ("$ProgFile -$ProgMode\n");
    &OutTabIntro (sprintf "date/time: %s\n", &TimeStr());
    &OutTabIntro (sprintf "sequence ID: %s\n", $$pSeq{id}||"''");
    &OutTabIntro (sprintf "sequence length: %d letters\n", $iSeqLen);
    &OutTabIntro (sprintf "image: %s\n", $ProgOpt{-OutImg} ? $img{path}:'-');

    # table format 'tab' / 'html'
    if (!$ProgOpt{-OutTabFmt} or $ProgOpt{-OutTabFmt} eq 'tab' or $ProgOpt{-OutTabFmt} eq 'html') {
      &OutTabHeadln ($bGC ?
        [qw(seqid pos GC score CumulScore)] : [qw(seqid pos score CumulScore)]);
      my $ScoreCumul;
      for (my $CtI=1; $CtI<=$iSeqLen; $CtI++) {
        &OutTabLine ([
          &{$SeqidFunc{$ProgOpt{-OutIdFmt}}}($$pSeq{id}),
          $CtI, $bGC?($ScoreGc[$CtI]):(),
          $ScoreTuple[$CtI]+0,
          $ScoreCumul+=$ScoreTuple[$CtI],
          ]);
      }
    }

    # table format 'gff'
    elsif ($ProgOpt{-OutTabFmt} eq 'gff') {
      &OutTabHeadln ($SeqLab::SeqFormat::LibGlob{GFF}{ColOut});

      # loop over plot coordinates
      foreach (@{ $poPlotGc->SmoothPlot (1, -window=>$img{SmoothWin}, -debug=>$dbg2) }) {
        &OutTabLine ([
          &{$SeqidFunc{$ProgOpt{-OutIdFmt}}}($$pSeq{id}),
          "$ProgFile -$ProgMode", 'GC',
          $_->[0], $_->[0],
          $_->[1],
          '.', '.', '.',
          ]);
      }
      foreach (@{ $poPlotScore->SmoothPlot (1, -window=>$img{SmoothWin}, -debug=>$dbg2) }) {
        &OutTabLine ([
          &{$SeqidFunc{$ProgOpt{-OutIdFmt}}}($$pSeq{id}),
          "$ProgFile -$ProgMode", 'TupleSignif',
          $_->[0], $_->[0],
          $_->[1],
          '.', '.', '.',
          ]);
      }
    }

    # end table
    &OutTabTail();

  } # end seq loop
}


# compare tuple distribution for two given sequence sources
#
# INTERFACE
# - argument 1: tuple width for comparison
# - argument 2: tuple width for correction
# - argument 3: sequence source / tuple library 1
# - argument 4: sequence source / tuple library 2
#
# - global options:
#   -debug      [STD]
#   -OutTab     [STD]. Not processed the standard way here.
#   -OutTabFmt  [STD]. Not processed the standard way here.
#   -strands    [STD]
#
sub ProgTupleCmp {
  my (%TupleSize, @TupleSrc, $BakOutStamp);
     ($TupleSize{val}, $TupleSize{corr}, $TupleSrc[1], $TupleSrc[2]) = @_;
  my ($debug, $dbg2);
  my @ColLabel = qw(tuple ct freq freqexp ctcorr freqcorr freqstress ctchisqr);
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  my ($ItLib, %Table, $bPseudo);
  my ($ItTuple, $ItTuplePos, $FreqExpect);

  # ensure / create tuple libraries for input
  $BakOutStamp = $ProgOpt{-OutStump};
  foreach my $ItSrc (\$TupleSrc[1], \$TupleSrc[2]) {
    if (-r "$$ItSrc.dat" and -r ("$$ItSrc.tab".$TupleSize{val})) {
      $debug and printf STDERR "%s. source $$ItSrc is already a tuple library\n", &MySub;
    }
    # create temporary tuple library from input sequences
    else {
      unless (-r $$ItSrc) {
        die sprintf "ERROR: unable to read input file %s\n", $$ItSrc;
      }
      $debug and printf STDERR "%s. creating tuple library from sequence source $$ItSrc\n", &MySub;
      $ProgOpt{-OutStump} = $ProgParam{TmpManag}->Create(-touch=>1);
      $ProgParam{store}{queue}{SeqIn}->ini($$ItSrc);
      &ProgTupleLib ($TupleSize{val});
      $$ItSrc = $ProgOpt{-OutStump};
    }
  }
  $ProgOpt{-OutStump} = $BakOutStamp;

  # loop over tuple libraries
  foreach my $NumSrc ('1', '2') {
    foreach $ItLib ($TupleSize{corr} ? ('corr','val'):('val')) {

      # load tuple data tables
      $Table{$NumSrc}{$ItLib} = &PlainToTable ("$TupleSrc[$NumSrc].tab".$TupleSize{$ItLib},
        -TabType=>'HIH', -ColKeep=>1, -ColLabel=>\@ColLabel, -comments=>1);

      # introduce pseudo-counts into tuple data tables
      $Table{$NumSrc}{$ItLib.'CtSum'} = &Sum (map{@$_}
        &DataTreeSlc($Table{$NumSrc}{$ItLib},[[0,'all'],['ct']],-debug=>$dbg2) );
      $Table{$NumSrc}{$ItLib.'pseudo'} = 0.5 / $Table{$NumSrc}{$ItLib.'CtSum'};
      map {
        $_->{ct} = 0.5;
        $_->{freq} = $Table{$NumSrc}{$ItLib.'pseudo'};
        $bPseudo = 1;
      } grep{ !$_->{ct} } values %{$Table{$NumSrc}{$ItLib}};
      if ($bPseudo) {
        $Table{$NumSrc}{$ItLib.'FreqSum'} = &Sum (map{@$_}
          &DataTreeSlc($Table{$NumSrc}{$ItLib},[[0,'all'],['freq']],-debug=>$dbg2) );
        map {
          $_->{freq} /= $Table{$NumSrc}{$ItLib.'FreqSum'};
        } values %{$Table{$NumSrc}{$ItLib}};
      }
    }

    # 'normalize'/'correct' tuple data tables
    # this is only needed for $TupleSize{corr} > 0
    if ($TupleSize{corr}) {
      foreach $ItTuple (keys %{$Table{$NumSrc}{val}}) {
        $FreqExpect = $Table{$NumSrc}{corr}{substr($ItTuple,0,$TupleSize{corr})}{freq};
        for ($ItTuplePos=1; $ItTuplePos<=($TupleSize{val}-$TupleSize{corr}); $ItTuplePos++) {
          $FreqExpect *= $Table{$NumSrc}{corr}{substr($ItTuple,$ItTuplePos,$TupleSize{corr})}{freq}
            / &Sum (map{ $_->{freq} }@{$Table{$NumSrc}{corr}}{map{substr($ItTuple,$ItTuplePos,$TupleSize{corr}-1).$_}qw(A C G T)});
        }
        $Table{$NumSrc}{val}{$ItTuple}{freqexp} = $FreqExpect;
        $Table{$NumSrc}{val}{$ItTuple}{freqcorr} =
          $Table{$NumSrc}{val}{$ItTuple}{freq} / $FreqExpect;
      }
      $Table{$NumSrc}{valFreqCorrSum} = &Sum (map{@$_}
        &DataTreeSlc($Table{$NumSrc}{val},[[0,'all'],['freqcorr']],-debug=>$dbg2) );
      map {
        $_->{freqcorr} /= $Table{$NumSrc}{valFreqCorrSum};
        $_->{ctcorr} = $_->{freqcorr} * $Table{$NumSrc}{valCtSum};
      } values %{$Table{$NumSrc}{val}};
    } else {
      map {
        $_->{ctcorr} = $_->{ct};
        $_->{freqcorr} = $_->{freq};
      } values %{$Table{$NumSrc}{val}};
    }
  }

  # calculate tuple data table including comparison data
  foreach $ItTuple (keys %{$Table{'1'}{val}}) {
    $Table{'1'}{val}{$ItTuple}{freqstress} =
      ($Table{'1'}{val}{$ItTuple}{freqcorr} / $Table{'2'}{val}{$ItTuple}{freqcorr});
    unless ($TupleSize{corr}) {
      $Table{'1'}{val}{$ItTuple}{ctchisqr} = $Table{'1'}{valCtSum} *
        (($Table{'1'}{val}{$ItTuple}{ct} - $Table{'2'}{val}{$ItTuple}{ct}) ** 2) /
        (($Table{'1'}{val}{$ItTuple}{ct} + $Table{'2'}{val}{$ItTuple}{ct}) *
          (2 * $Table{'1'}{valCtSum} - $Table{'1'}{val}{$ItTuple}{ct} - $Table{'2'}{val}{$ItTuple}{ct}));
    }
  }
  $debug and &DataPrint (\%Table, -handle=>\*STDERR);

  # output tuple comparison data table
  {
    my $PathStamp = &PrepOstump();
    my $PathOut = $ProgOpt{-OutTab} || ("$PathStamp.tab".$TupleSize{val});
    my ($hOutTab);
    unless ($hOutTab = FileHandle->new($PathOut,'w')) {
      die sprintf "ERROR: unable to write file %s\n", $PathOut||"''";
    }
    printf "writing file $PathOut\n";
    printf $hOutTab ("# column labels:\n# %s\n", join ("\t", @ColLabel));
    foreach (sort{ $a->{tuple} cmp $b->{tuple} } values %{$Table{'1'}{val}}) {
      printf $hOutTab ("%s\n", join("\t",@{$_}{@ColLabel}));
    }
  }

  # tidy up done by manager of temporary files
}


################################################################################
# miscellaneous
################################################################################


# randomize sequences
#
# INTERFACE
# - global options:
#   -debug      [STD]
#   -strands    [STD]. Strand model 0 does not make sense here, and it is
#               evaluated to model 2.
#   -TupleSize  [STD]
#
# DESCRIPTION
# - alphabet of the input sequences: The function assumes that the sequences
#   conform to a strict nt alphabet (ACGT, strictly upper case). It would be
#   nice to have a generalised version that also allows to randomize gaps and
#   IUPAC wobble codes.
#
sub ProgRandomize {
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;

  ##########################################################
  # analyse input sequences

  # initialize tuple object
  $time = (times)[0];
  my %TupOpt;
  $TupOpt{-debug} = $debug || undef;
  @TupOpt{'-TupleMin','-TupleMax'} = ($ProgOpt{-TupleSize}[1]) x 2;
  $TupOpt{-symbol} = undef;  # = [ qw(A C G T) ];
  $TupOpt{-strands} = $ProgOpt{-strands} || 2;
  my $iTupLen = $ProgOpt{-TupleSize}[1];
  $debug and printf STDERR "%s. tuple size for resampling: %d\n", &MySub, $iTupLen;
  my $pTupTree = SeqLab::SuffixTrie->new(%TupOpt);

  # grow up tuple tree, index sequence lengths
  my @SeqLen;
  while (my $pSeq = &SeqQueue()) {
    my $sSeq = $$pSeq{sequence};
    $pTupTree->SeqScan($sSeq);
    my $alph = join ('', $pTupTree->alph_ok());  # alphabet, including trash symbol
    if ($sSeq =~ s/[^$alph]//g) {
      printf STDERR "%s. WARNING: sequence %s contains non-alphabet characters, e.g. ASCII %s\n", &MySub, $pSeq->{id}, ord($&);
    }
    push @SeqLen, length($sSeq);
  }
  my $SeqNum = int @SeqLen;
  $bTimer and printf STDERR "%s. tuple analysis: %.3f s\n", &MySub, (times)[0]-$time;

  # fixed sequence length
  my $poPlotSeqlen;
  if ($ProgOpt{-var}{seqlen}) {
    $poPlotSeqlen = Math::Plot2D->new([[0,$ProgOpt{-var}{seqlen}],[1,$ProgOpt{-var}{seqlen}]],-debug=>$dbg2);
  }
  # create look-up object for (freq integral -> sequence length)
  else {
    $time = (times)[0];
    my $ItProb=0;
    my @ProbSeqLen = map{ [($ItProb+=1/$SeqNum),$_] } sort{ $a<=>$b } @SeqLen;
    unshift @ProbSeqLen, [0,$ProbSeqLen[0][1]];
    $poPlotSeqlen = Math::Plot2D->new(\@ProbSeqLen,-debug=>$dbg2);
    $bTimer and printf STDERR "%s. sequence length random look-up object: %.3f s\n", &MySub, (times)[0]-$time;
  }

  # tuple index from suffix trie object
  $time = (times)[0];
  if ($iTupLen>1) { $pTupTree->LevelTransit($iTupLen) }
  my $pTupIdx = $pTupTree->LevelIndex($iTupLen);
  my $TupSum = $pTupTree->LevelSum($iTupLen);
  foreach (values %$pTupIdx) {
    $_->{freq} = $_->{ct} / $TupSum;
  }
  # create look-up object for (freq integral -> tuple)
  my $poPlotTupleProb = do{
    my @TupProb = ([0,0]);
    my $ItProb = 0;
    foreach (sort keys %$pTupIdx) {
      $ItProb += $$pTupIdx{$_}{freq};
      push @TupProb, [$ItProb,$_];
    }
    Math::Plot2D->new(\@TupProb,-debug=>$dbg2);
  };
  $debug and &DataPrint($poPlotTupleProb,-handle=>\*STDERR);
  # create look-up object for (freq integral -> tuple transition)
  my %TupTransit;
  if ($iTupLen > 1) {
    my %TupTransitRaw;
    foreach (keys %$pTupIdx) {
      $TupTransitRaw{substr($_,0,$iTupLen-1)}{substr($_,-1,1)}
        = $$pTupIdx{$_}{transitfreq};
    }
    foreach my $prefix (sort keys %TupTransitRaw) {
      my $ItProb = 0;
      $TupTransit{$prefix} = [
          map{ [$ItProb+=$TupTransitRaw{$prefix}{$_},$_] }
          $pTupTree->alph()
        ];
    }
  }
  $debug and &DataPrint(\%TupTransit,-handle=>\*STDERR);
  $bTimer and printf STDERR "%s. tuple index and random look-up object: %.3f s\n", &MySub, (times)[0]-$time;
  undef $pTupIdx; undef $pTupTree;

  ##########################################################
  # create random sequences

  $time = (times)[0];
  if ($ProgOpt{-var}{nseq}) { $SeqNum = $ProgOpt{-var}{nseq}; }
  for (my $i=0; $i<$SeqNum; $i++) {

    # number of sequences: $SeqNum
    my $RandLen;
    if ($ProgOpt{-var}{seqlenipol}) {
      $RandLen = $poPlotSeqlen->Interpolate(-rand()+1);
    } else {
      $RandLen = $poPlotSeqlen->Bound(-rand()+1)->[1][2];
    }
    $debug and printf STDERR "%s. randomizing to length: %d\n", &MySub, $RandLen;

    # sequences string
    my $prefix = $poPlotTupleProb->Bound(-rand() + 1)->[1][2];
    my $sSeq = $prefix;
    substr ($prefix, 0, 1) = '';
    my $j=$iTupLen;
    if ($iTupLen == 1) {
      for (; $j<$RandLen; ++$j) {
        $sSeq .= $poPlotTupleProb->Bound(-rand() + 1)->[1][2];
      }
    } else {
      for (; $j<$RandLen; ++$j) {
        my $r = -rand() + 1;
        for (my $k=0; $k<int(@{$TupTransit{$prefix}}); ++$k) {
          if ($TupTransit{$prefix}[$k][0] > $r) {
            my $symb = $TupTransit{$prefix}[$k][1];
            $sSeq .= $symb;
            $prefix = substr($prefix,1,$iTupLen-2) . $symb;
            last;
          }
        }
      }
    }
    $ProgParam{store}{queue}{SeqOut}->Push(
      { id=>sprintf('rand%d',$i), sequence=>$sSeq } 
      );
  }
  $bTimer and printf STDERR "%s. sequence randomization: %.3f s\n", &MySub, (times)[0]-$time;
}

# $Id: SeqMotif.pl,v 1.72 2018/06/05 18:02:56 szafrans Exp $
