#! /usr/local/bin/perl
################################################################################
#
#  Testing and Temporary Code
#
#  copyright (c)
#    Fritz Lipmann Institute Jena, Genome Analysis Group, 2013
#    Institute of Molecular Biotechnology Jena, Dept. Genome Analysis, 1998-2004
#  author
#    Karol Szafranski, karol.szafranski@leibniz-fli.de
#
################################################################################
#
#  DESCRIPTION
#
# - See function &usage for command line syntax
#
# - each function comes along with a description at the beginning of the code
#   block
#
################################################################################
#
#  FUNCTIONS, DATA
#
# - MAIN
#   %GlobStore
#   $ProgFile,$ProgFstump
#   %ProgParam
#   $ProgMode,%ProgOpt,@ProgArg
#
# - usage help, command line arguments
#   &usage
#   &AddSwitch
#
# - the rest is temporary!, See function &usage or &AddSwitch
#
################################################################################


# global constants and variables
# also used by modules as a unique global data anchor
our %GlobStore;

# include path(s), includes
use strict; #use warnings;  # OK 20040813
use Cwd qw(realpath);
BEGIN {
  unshift @INC, grep{$_} split(/:+/,$ENV{KPERLPATH}||$ENV{PERLPATH}||'');
  my ($mypath) = realpath(__FILE__)=~m/(^.*)\//;
  push @INC, $mypath;
}
use MainLib::StrRegexp;
use MainLib::Data;
use MainLib::Path;
use MainLib::Cmdline qw(&GetoptsNArgs);
use MainLib::File;
use MainLib::FileTmp qw(&PathTmpdir &PathUnique);
use MainLib::Graphics;
use MainLib::Misc;
use Math::kCalc;
use Math::Statist;
use Math::kRandom;
use Math::Plot2D;
use database::DbPlain;
use SeqLab::SeqBench;
use SeqLab::SeqFormat;
use SeqLab::SeqAnalysis;
use SeqLab::SeqComp;
use SeqAlign::EstGenome;
use ReadWatch::Library;


# script ID
# - program name as specified on the command line
our $ProgFile = ( split('/',__FILE__) )[-1];
our $ProgFstump=$ProgFile; $ProgFstump=~s/\.\w{1,4}$//;

# global constants (esp. default values)
our %ProgParam;
$ProgParam{call}{DATagReduce} = $CorePath{call}{PerlScript}  .'/'. 'SeqHandle.pl -TagReduce';

# working desk
$ProgParam{store} = undef;


# manage I/O #####################################

# organiz I/O handles
&Unbuffer();

# organiz tmp files
# - encapsulation in a sub{} allows MainLib::FileTmp to identify the calling
#   package
my $pcFT = sub{ $ProgParam{TmpManag} = MainLib::FileTmp->new(); };
&$pcFT;


# command line interface #########################
# NOTE:
# - &GetoptsNArgs (via &AddSwitch) modifies global variables:
#   $ProgMode @ProgArg %ProgOpt
# - @ProgArg may be pre-filled in &GetoptsNArgs via &AddSwitch (option -fofn)

# arguments, switches
our $ProgMode = undef;
our @ProgArg = ();
our %ProgOpt = ();
@ProgArg = &GetoptsNArgs();

# eventually open LOG file
if ($ProgOpt{-log}) {
  $ProgOpt{LogFile} = ($ProgOpt{-log} ne '1')? $ProgOpt{-log} : undef;
  &LogOpen (-file=>$ProgOpt{LogFile}, -stamp=>$ProgFstump, -prog=>"$ProgFile -$ProgMode");
}
END {
  $ProgOpt{-log} and &LogClose();
}


# work flow manifold #############################
# we don't test for presence of arguments in this
# program (highly fluctuating code)

# chain to subprogram
if (0) { }
elsif ($ProgMode eq '') {  # there is no default ModeSwitch!
  print STDERR "ERROR: missing mode switch\n";
  &usage();
}
elsif ($ProgMode =~ m/^arch$/i) {
  &TestArch (@ProgArg);
}
elsif ($ProgMode =~ m/^BinomBefore$/i) {
  &TestBinomBefore (@ProgArg);
}
elsif ($ProgMode =~ m/^BlockParse$/i) {
  &TestBlockParse ($ProgArg[0]);
}
elsif ($ProgMode =~ m/^boolean$/i) {
  &TestBoolean();
}
elsif ($ProgMode =~ m/^color$/i) {
  &TestColorFormat();
}
elsif ($ProgMode =~ m/^DataXML$/i) {
  &TestDataXML (@ProgArg);
}
elsif ($ProgMode =~ m/^EstGenome$/i) {
  &TestEstGenome (@ProgArg);
}
elsif ($ProgMode =~ m/^float$/i) {
  &TestFloat();
}
elsif (!@ARGV or $ProgMode=~m/^h(elp)?$/i) { &usage() }
elsif ($ProgMode =~ m/^match$/i) {
  &TestMatch (@ProgArg);
}
elsif ($ProgMode =~ m/^MatrixSimrel$/i) {
  &TestMatrixSimrel (@ProgArg);
}
elsif ($ProgMode =~ m/^motif$/i) {
  &TestMotifHMM (@ProgArg);
}
elsif ($ProgMode =~ m/^MyLocal$/i) {
  &TestMyLocal();
}
elsif ($ProgMode =~ m/^PathUniq$/i) {
  &TestPathUniq (@ProgArg);
}
elsif ($ProgMode =~ m/^plot$/i) {
  &TestPlotInterpol();
}
elsif ($ProgMode =~ m/^RandStr$/i) {
  print '', &RandStr ($ProgArg[0]||80), "\n";
}
elsif ($ProgMode =~ m/^SeqExper$/i) {
  foreach (@ProgArg) {
    &TestSeqExper ($_);
  }
}
elsif ($ProgMode =~ m/^SeqPure$/i) {
  &TestSeqPure (@ProgArg);
}
elsif ($ProgMode =~ m/^SeqRange$/i) {
  &TestSeqRange2 (@ProgArg);
}
elsif ($ProgMode =~ m/^SeqRangeGapped$/i) {
  &TestSeqRangeGapped ();
}
elsif ($ProgMode =~ m/^system(call)?$/i) {
  &TestSystem();
}
elsif ($ProgMode =~ m/^table$/i) {
  &TestTabHIH (@ProgArg);
}
elsif ($ProgMode =~ m/^TgtSpecif$/i) {
  &TestTgtSpecif ($ProgArg[0]);
}
elsif ($ProgMode =~ m/^UcVsTr$/i) {
  &TestUcVsTr();
}
else {
  print STDERR "ERROR: unknown program mode or switch '$ProgMode'\n";
}

# exit script successfully
# cf. END blocks!
exit 0;


################################################################################
# usage help, command line arguments
################################################################################


sub usage {
  print "\n";
  print <<END_USAGE;
DESCRIPTION
 Temporary demo and test routines. Sorry, program documentation is minimal
 for it is not meant to do something really useful. You may find some
 interface documentation inline, associated with function headers.

COMMAND LINE SYNTAX
 $ProgFile  -<ModeSwitch> [-<OptionalSwitch> ...] <Arg1> [<Arg2> ...]

ModeSwitch (case-insensitive)
-----------------------------
-arch             functions concerning program architecture, functions
                  hierarchy etc. Module MainLib::Misc.
-BinomBefore      trace the probability of an event A (prob pa) before
                  the occurrence of an event B (prob pb)
-BlockParse       some tricky recursive parsing of a nested expression
-boolean          some boolean logics
-color            function \&MainLib::Graphics::ColorFormat
-DataXML          function \&MainLib::Data::DataFromXML
-EstGenome        function \&SeqLab::EstGenome::EstGenome
-float            float calculation, possibly BigFloat.pm
-GaussQuant       function \&Math::Statist::GaussQuant
-h(elp)           output command line syntax description and exit
-match            test one of &Match... functions in Misc.pm
-MatrixSimrel     test one of &SeqLab::SeqComp::MatrixSimrelCalc
-motif            test functions in MotifRE.pm
-MyLocal          what's the difference between 'my' and 'local'?
-PathUniq         function \&MainLib::FileTmp::PathUnique
-plot             plot object handling
-RandStr          function \&Math::kRandom::RandStr
-SeqExper         Experiment file format functions in module
                  \&SeqLab::SeqFormat.pm
-SeqPure          test of \&SeqLab::SeqBench::SeqPure function
-SeqRange         test of \&SeqLab::SeqFormat::SeqRange function
-SeqRangeGapped   test of \&SeqLab::SeqFormat::SeqRange function
-system           function 'system'
-table            table operations
-TgtSpecif        target specificity functions in ReadWatch::Library.pm
-UcVsTr           analyse time performance of 'uc' as compared with 'tr//'.

OptionalSwitch (case-insensitive)
---------------------------------
switch argument types: B:=boolean, F:=floating point/scientific, N:=integer,
S:=string, X:=varying type.

-debug(=N)        print debug protocol to STDERR (sometimes STDOUT). Keep
                  temporary files.
                  N           debug depth value
-log(=S)          redirect STDOUT and STDERR to LOG file
                  S           log file path, default path worked out
                              automatically from built-in directives.
-v(erbose)        print extended progress report to STDOUT.

Environment Variables
---------------------
 \$PERLPATH        primary search path for Perl package look-up
 \$TEMPPATH        directory for storage of temporary files, default /tmp

Temporary Files
---------------
 The program may generate temporary files. These will be placed either in a
 directory specified by \$ENV{TEMPPATH} or in /tmp.
END_USAGE
  print "\n";
  exit 0;
}


# add program switches to global table (hash)
#
# INTERFACE
# - argument 1:  switch argument without leading '-'
#
# - global data:
#   $ProgMode
#   %ProgOpt  switch data which gets processed here
#
# DESCRIPTION
# - this function gets called by &MainLib::Misc::GetoptsNArgs
# - switch arguments are tested for validity. Arguments are parsed with highest
#   possible tolerance. This way, syntax errors can reported in accordance to
#   the actual switch, rather than reporting ANY syntax error.
#
sub AddSwitch {
  my $switch = shift;

  # optional switches
  if ($switch =~ m/^debug(=(\d+))?$/i) {
    $ProgOpt{-debug} = defined($2) ? int($2) : 1;
    return;
  }
  if ($switch =~ m/^log(=(.*))?$/i) {
    $ProgOpt{-log} = $2 ? &PathExpand($2) : 1;
    return;
  }
  if ($switch =~ m/^v(erbose)?$/i) {
    $ProgOpt{verbose} = 1;
    return;
  }

  # switch 'mode'
  if (defined $ProgMode) {
    printf STDERR "ERROR: multiple specification of program mode or unknown switch, %s and %s\n",
      '-'.($ProgMode||"''"), '-'.($switch||"''");
    exit 1;
  }
  else {
    $ProgMode = $switch;
  }
}


################################################################################
# all program functions
################################################################################


# functions concerning program architecture, functions hierarchy etc.
#
sub TestArch {
  printf "my function: %s\n", &MySub;
  printf "my memory usage: %s kByte\n", &MyMem;
}


# trace the probability of an event A (prob pa) before the occurrence
#   of an event B (prob pb)
#
sub TestBinomBefore {
  my ($ProbA, $ProbB, $ProbCutoff) = @_;
  my ($ProbPlus, $ProbNeg, $ProbLeft, $CtI);

  # function parameters
  $ProbA ||= 0.1;
  $ProbB ||= 0.05;
  $ProbCutoff ||= 0.01;

  # endless loop
  $ProbLeft = 1;
  while ($ProbLeft > $ProbCutoff) {
    $CtI ++;

    # calculate probabilities after next elementary event
    $ProbNeg  += $ProbLeft * $ProbB;
    $ProbLeft *= (1 - $ProbB);
    $ProbPlus += $ProbLeft * $ProbA;
    $ProbLeft *= (1 - $ProbA);

    # output
    printf "%s\n", join ("\t", $CtI, $ProbPlus, $ProbNeg, $ProbLeft);
  }
}


sub TestBlockParse {
  my ($str) = @_;
  my ($before, $block, $left);

  # first matching
  $str =~ m/^[^()]*/;
  $before = $&;
  if (substr($',0,1) eq '(') {
    ($block, $left) = &BlockParse (substr ($',1));
    print "before: $before\n";
    print "block: ($block)\n";
    print "left: $left\n";
  }
  else {
    print "there's no block at all!\n";
  }
}


sub BlockParse {
  my ($str) = @_;
  my ($done, $block, $left);

  # matching
  BlockParseStart: {
    $str =~ m/^[^()]*/;
    $done .= $&;
    if (substr ($',0,1) eq '(') {
      ($block, $left) = &BlockParse (substr ($',1));
      $done .= "($block)";
      $str = $left;
      redo BlockParseStart;
    }
    elsif (substr ($',0,1) eq ')') {
      $left = substr ($',1);
      return ($done, $left);
    }
    else {
      print "ERROR: premature end of string\n";
      exit 1;
    }
  }
}


# test some boolean logics
#
sub TestBoolean {

  # some boolean logics
  printf "%d (false?)\n", 1 and not (0 or 1);
  # is equivalent to:
  # (printf "%d (false?)\n", 1) and not (0 or 1);
  printf "%d (false?)\n", 1 and (not (0 or 1));
  printf "%d (false?)\n\n", 1 && (not (0 or 1));

  printf "%d (false?)\n", 13 and (not (0 and 1));
  printf "%d (false?)\n\n", (13 and (not (0 and 1)));

  printf "%d (false?)\n", (1 and not (0 or 1));
  printf "%d (true?)\n", (1 and not (0 and 1));
}


# test &MainLib::Graphics::ColorFormat
#
sub TestColorFormat {
  my ($color);

  $color = [0, 0, 0];
  printf "color input: %s\n", '('. join(',',@$color) .')';
  printf "  color returned: %s\n",
    map{ '('. join(',',@{$_||[]}) .')' } scalar &ColorFormat($color);

  $color = 'blue';
  printf "color input: %s\n", $color;
  printf "  color returned: %s\n",
    map{ '('. join(',',@{$_||[]}) .')' } scalar &ColorFormat($color);

  foreach $color (@ProgArg) {
  printf "custom color input: %s\n",
    ref($color)?'('.join(',',@$color).')':$color;
  printf "  color returned, array: %s\n",
    map{ '('. join(',',@{$_||[]}) .')' } scalar &ColorFormat($color);
  printf "  color returned, hex: %s\n",
    &ColorFormat($color, -format=>'hex');
  }
}


sub TestDataXML {
  my ($file) = @_;
  my ($pXmlTree, $pEntry);
  my (@bib, $pDatBib);

  # read XML
  $pXmlTree = &DataFromXML ($file);
  unless ($pXmlTree) { exit 1 }
  if ($$pXmlTree{label} ne 'DataFromXmlRoot') {
    printf STDERR "%s. ERROR: wrong root label - %s\n", &MySub, $$pXmlTree{value};
    exit 1;
  }
  &DataPrint ($pXmlTree, -handle=>\*STDOUT);
}


sub TestEstGenome {
  my ($file) = @_;

  # work
  &EstGenome ($file, '', '-TestParse');
}


sub TestFloat {
  my ($FloatA, $FloatB);

  # constants
  $FloatA = ' 1.3e-15';
  $FloatB = '2.5e-104';

  # the situation
  print  "\n";
  printf "printf %s: \$FloatA '%s', \$FloatB '%s'\n", '%s', $FloatA, $FloatB;
  printf "printf %s: \$FloatA %f, \$FloatB %f\n", '%f', $FloatA, $FloatB;
  printf "printf %s: \$FloatA %E, \$FloatB %E\n", '%E', $FloatA, $FloatB;
  print  "\n";

  # comparison
  printf "compare normal:\n".
         "  \$FloatA <=> \$FloatB  %2d\n".
         "  \$FloatB <=> \$FloatA  %2d\n".
         "  \$FloatA  <  \$FloatB  %2d\n".
         "  \$FloatA  >  \$FloatB  %2d\n".
         "  \$FloatB  <  \$FloatA  %2d\n".
         "  \$FloatB  >  \$FloatA  %2d\n".
         '',
         $FloatA <=> $FloatB,
         $FloatB <=> $FloatA,
         $FloatA < $FloatB,
         $FloatA > $FloatB,
         $FloatB < $FloatA,
         $FloatB > $FloatA,
         ;
  print  "\n";
}


# test some of &Match... functions in Misc.pm
#
sub TestMatch {
  my (@arg) = @_;
  my ($debug, $bTimer, $time, $MatchMin);
  my ($StrTempl, $StrRegexp, $pMatch);

  # function constants
  $bTimer = 1;
  $MatchMin = 10 - 1;

  # function parameters
  $debug = $ProgOpt{-debug};

  # template/regexp definition
  $bTimer and $time = (times)[0];
  $StrTempl = &ReadFile ($arg[0]);
  $StrRegexp = $arg[1] || sprintf ('([ACGT])(\1\n?){%d,}', $MatchMin);
  $bTimer and printf STDERR "%s. CPU time for template/regexp definition: %.3f\n", &MySub, (times)[0]-$time;

  # test
  $bTimer and $time = (times)[0];
  $pMatch = &MatchIdx (\$StrTempl, $StrRegexp);
  $bTimer and printf STDERR "%s. CPU time for search: %.3f\n", &MySub, (times)[0]-$time;
  printf "matches (&MatchIdx):\n";
    &DataPrint ($pMatch, -space=>2);
}


# test some of &Match... functions in Misc.pm
#
sub TestMatchOldA {
  my (@arg) = @_;
  my ($debug, $bTimer, $time, $cycles, $CtCycle);
  my ($StrTempl, $StrRegexp, $CtMatch);

  # function parameters
  $bTimer = 1;
  $cycles = 25;

  # template/regexp definition
  $debug = $ProgOpt{-debug};
  $bTimer and $time = (times)[0];
  $StrTempl = &ReadFile ($arg[0]);
  $StrRegexp = $arg[1] || '([ACGT])\1{4,}';
  $bTimer and printf STDERR "%s. CPU time for template/regexp definition: %.3f\n", &MySub, (times)[0]-$time;

  # method I - inline
  $bTimer and $time = (times)[0];
  for $CtCycle (1..$cycles) {
  $CtMatch = do { $_ = 0; while ($StrTempl =~ m/$StrRegexp/mog) { $_ ++; }; $_; };
  $debug and print  STDERR "matches (inline): $CtMatch\n";
  }
  $bTimer and printf STDERR "%s. CPU time for search: %.3f\n", &MySub, (times)[0]-$time;

  # method II - &MatchCt
  $bTimer and $time = (times)[0];
  for $CtCycle (1..$cycles) {
  $CtMatch = &MatchCt (\$StrTempl, $StrRegexp);
  $debug and print  STDERR "matches (&MatchCt): $CtMatch\n";
  }
  $bTimer and printf STDERR "%s. CPU time for search: %.3f\n", &MySub, (times)[0]-$time;
}


# test &SeqLab::SeqComp::MatrixSimrelCalc
sub TestMatrixSimrel {
  &MatrixSimrelCalc(-debug=>1);
}


# modules SeqLab::Motif*
#
sub TestMotifLib {
  require SeqLab::MotifLib;
  require SeqLab::MotifRE; SeqLab::MotifRE->import();
  my ($PathSeq, @ArgLib) = @_;
  my ($pMtfLib, $pMtfHits, $pSeq);

  # read motif library
  $pMtfLib = SeqLab::MotifLib->new();
  $pMtfLib->AddSwitch(-debug=>1);
  $pMtfLib->Load(@ArgLib);
  $pMtfLib->Statist(\*STDOUT);
  printf "%s. paths of files loaded:\n%s", &MySub, join('', map{"  $_\n"} $pMtfLib->Paths());

  # read sequence
  printf "%s. searching on first 3 sequences\n", &MySub;
  foreach $pSeq (@{ &SeqarrFromFFmt ($PathSeq) }[0..2]) {

    # perform motif searches
    $pMtfHits = $pMtfLib->Search ($$pSeq{sequence});
    foreach (@$pMtfHits) {
      printf "%s\n", join ("\t", $$pSeq{id}, @{$_}{qw(MotifID orient offset score instance)});
    }
  }

  # query motif library
  $pMtfLib->Statist(\*STDOUT);
}


# module SeqLab::MotifHMMer.pm
#
sub TestMotifHMM {
  require SeqLab::MotifHMMer; SeqLab::MotifHMMer->import('&HmmsearchParse');
  my ($PathHmmReport) = @_;

  my $pHit = &HmmsearchParse ($PathHmmReport);
  &DataPrint ($pHit);
}

# module SeqLab::MotifHMMer.pm
#
sub TestMotifHMM_old {
  require SeqLab::MotifHMMer;
  my ($PathHMM, $PathSeq) = @_;
  my ($pMtf, $pMtf2, $PathOut, $pSeq, $pHit);

  # read motif
  $pMtf = SeqLab::MotifHMMer->new($PathHMM);
  $pMtf->AddSwitch(-debug=>1);

  # reverse-complement motif
  unless ($pMtf2 = $pMtf->RevCompl()) {
    printf "%s. ERROR in SeqLab::MotifHMMer->RevCompl(), source object %s\n", &MySub,
      $pMtf->ID();
    exit 1;
  }
  $PathOut = $ProgParam{TmpManag}->Create(-touch=>1);
  $pMtf2->DefFile($PathOut);
  printf "%s. conversion done: %s -> %s\n", &MySub, $PathHMM, $PathOut;

  # search using motif
  $pSeq = &SeqentryPopFasta ($PathSeq, -pure=>1);
  $pHit = $pMtf->Search($$pSeq{sequence}, -isPure=>1);
  &DataPrint ($pHit);
}


# what's the difference between 'my' and 'local'?
# Still don't know.
#
sub TestMyLocal {
  printf "playing around with program mode variable, \$main::ProgMode\n";
  printf "  initial:                     global %s\n", $main::ProgMode;
  {
  local $ProgMode=$ProgMode;
  printf "  local scoping, value copied: global %s, local %s\n", $main::ProgMode, $ProgMode;
  $ProgMode .= '.modified';
  printf "  assignment (modification):   global %s, local %s\n", $main::ProgMode, $ProgMode;
  }
  printf "  left area of scoping:        global %s\n", $ProgMode;
}


# test function &PathUnique
#
sub TestPathUniq {

  # manage file paths
  my (%file);
  $file{source} = $_[0];
  $file{TgtFull} = &PathUnique (-name=>sprintf("%s_psi#.0",&PathSplit($file{source})->{nameroot}),
    -dir=>&PathSplit($file{source})->{dir}, -CtStart=>1);
  printf "%s. target file for contig export: %s\n", &MySub,
    $file{TgtFull}||"''";
  exit;

  # try features of &PathUnique
  printf "%s. \n", &MySub, &PathUnique (-touch=>1);
  printf "%s. \n", &MySub, &PathUnique (-name=>'hallo', -touch=>1);
  printf "%s. \n", &MySub, &PathUnique (-name=>'hallo.world', -touch=>1);
  printf "%s. \n", &MySub, &PathUnique (-name=>'hallo###abc', -touch=>1);
  printf "%s. \n", &MySub, &PathUnique (-name=>'hallo###abc', -CtHex=>1, -touch=>1);
  printf "%s. \n", &MySub, &PathUnique (-name=>'hallo###abc', -CtHexL=>1, -touch=>1);
  printf "%s. \n", &MySub, &PathUnique (-name=>'../perl/extlib/dirs-ltr', -touch=>1);
}


# temporary table operations
#
# INTERFACE
# - argument 1: path of table file
#
sub TestPlotInterpol {
  my ($PathTable) = @_;

  # initialize plot object
  my $pTable = &PlainToTable ($PathTable, -TabType=>'A1', -comments=>1);
  my $time = &Sum ((times)[0,2]);
  my $pPlot = Math::Plot2D->new($pTable, -TabType=>'A1y', -debug=>1);
  printf "%s. CPU time for table conversion: %.3f\n", &MySub, &Sum((times)[0,2])-$time;

#  # save table as AA type (was A1y)
#  $PathTable =~ s/\./_plot./;
#  open (OUTTAB, ">$PathTable");
#  foreach (@{ &TableConvert ('A1y', 'AA', $pTable) }) {
#    printf OUTTAB "%s\n", join ("\t", @$_);
#  }
#  close OUTTAB;
#
#  # debug table data in object
#  &DataPrint ($pPlot->{pdata});

  # test interpolation
  print  "interpolating:\n";
  foreach (-1, 6.3, 13.2, 2.7) {
    my $ValInterpol = $pPlot->Interpolate ($_, -debug=>1);
    printf "%3.1f  %f\n", $_, $ValInterpol;
  }

  # test integration
  my @interval = (6.3, 8.2, 2.7, 5.2);
  print  "integrating:\n";
  while (my($x,$y) = splice(@interval,0,2)) {
    printf "interval %3.1f to %3.1f:  %f\n", $x, $y, $pPlot->Integral($x,$y,-debug=>1);
  }

  # test smoothening
  my $ValMax = $pPlot->Xmax();
  my $StepSize = 1.5;
  $PathTable =~ s/\./_smooth./;
  open (OUTTAB, ">$PathTable");
  print  "smoothening:\n";
#  for (my $CtVal=0; $CtVal<=$ValMax; $CtVal+=$StepSize) {
#    printf "$CtVal -> %f\n", $pPlot->SmoothVal ($CtVal, -window=>3.5, -debug=>1);
#    printf OUTTAB "%s\n", join ("\t", $CtVal, $pPlot->SmoothVal ($CtVal, -window=>3.5, -debug=>1));
#  }
  foreach (@{ $pPlot->SmoothPlot ($StepSize, -window=>3.5, -debug=>1) }) {
    printf OUTTAB "%s\n", join ("\t", @$_);
  }
  close OUTTAB;
}


# compare fastA and Experiment versions of dicty.JC2 and according
# Experiment files
#
sub TestSeqExper {
  require SeqLab::SeqStreamIn;
  require ReadWatch::ReadIndex;
  my ($poSeqQueue, $poSeqIndex, $pSeqFasta, $PathExper, $pSeqExper);
  my ($equal, $FileOut, $CtDissim);

  # initialize sequence queue for fastA
  $poSeqQueue = SeqLab::SeqStreamIn->new('/gen/links/dicty/DictyDB/dicty.JC2');
  $poSeqQueue->AddSwitch(-pure=>1);

  # initialize sequence index
  $poSeqIndex = ReadWatch::ReadIndex->new();

  # start report
  print "\tExperFound\tSeqIdent\n";

  # loop over sequences
  $CtDissim = 0;
  while ($pSeqFasta=$poSeqQueue->GetNext() and $CtDissim<50) {
    if ($PathExper = ( $poSeqIndex->Retrieve ('ExperFull', $$pSeqFasta{id}) )[0]) {
      $pSeqExper = &{$SeqFFmtGet{Experiment}{FuncEntry}} ($PathExper,
        -ClipQuality=>1, -ClipUnk=>1, -pure=>1, -debug=>0);
      $equal = int ($$pSeqFasta{sequence} eq $$pSeqExper{sequence});
      printf "%s\t%d\t%d\t%s\n", $$pSeqFasta{id}, 1, $equal, $PathExper;
      unless ($equal) {
        $FileOut = &PathTmpdir() . "/$$pSeqFasta{id}" .'_fa';
        open (OUTSEQ, ">$FileOut");
        print OUTSEQ &SeqentryToFasta ($pSeqFasta, -debug=>0);
        close (OUTSEQ);
        $FileOut = &PathTmpdir() . "/$$pSeqFasta{id}" .'_ex';
        open (OUTSEQ, ">$FileOut");
        print OUTSEQ &SeqentryToFasta ($pSeqExper, -debug=>0);
        close (OUTSEQ);
        $CtDissim ++;
      }
    } else {
      printf "%s\t%d\n", $$pSeqFasta{id}, 0;
    }
  }
}


# test -pure switch in load of annotated sequences
#
sub TestSeqPure {
  my ($PathExper) = @_;
  my ($pSeq);

  # load method
  $pSeq = &{$SeqFFmtGet{Experiment}{FuncEntry}} ($PathExper,
    -ClipQuality=>1, -ClipUnk=>0, -pure=>0, -debug=>0);

  # purify 1
  &SeqPure ($pSeq, -debug=>1);

  # add complex-range annotation
  push @{$$pSeq{annot}}, {
    type  => 'COMM',
    range => 'complement(165..174)',
    };

  # purify 2
  &SeqPure ($pSeq, -debug=>1);
}


# test &SeqCplxRange with/without option -PurePos
#
sub TestSeqRange {
  my ($PathExper, $RangeArg) = @_;
  my ($pSeq, $pSeqSub);

  # load sequence
  $pSeq = &{$SeqFFmtGet{Experiment}{FuncEntry}} ($PathExper,
    -pure=>0, -debug=>0);

  # range original
  $pSeqSub = &SeqCplxRange ($pSeq, $RangeArg);
  print &SeqentryToFasta ($pSeqSub);

  # range purified
  $pSeqSub = &SeqCplxRange ($pSeq, $RangeArg, -PurePos=>1, -TrackPos=>1);
  print  &SeqentryToFasta ($pSeqSub);
  printf "%s\n", join (' ', @{$$pSeqSub{PosArr}});
}


# test annotation range information of contig in directed assembly
#
sub TestSeqRange2 {
  my ($PathExper) = @_;
  my ($pSeq, $file, $pSeqTag, $pSeqCons);
  my ($pTag, $SeqSlice);

  # load sequence entries
  $pSeq = &{$SeqFFmtGet{Experiment}{FuncEntry}} ($PathExper,
    -pure=>0, -debug=>0);
  printf "%s. sequence %sloaded from file %s, ID %s, length %d\n", &MySub,
    $pSeq ? '': 'not ', $PathExper, $$pSeq{id}, length $$pSeq{sequence};

  $file = &PathSplit ($$pSeq{SrcPath})->{dir} . '/ContigTags';
  $pSeqTag  = &{$SeqFFmtGet{Experiment}{FuncEntry}} ($file,
    -MatchId=>[$$pSeq{id}], -debug=>0);
  printf "%s. sequence %sloaded from file %s, ID %s\n", &MySub,
    $pSeqTag ? '': 'not ', $file, $$pSeqTag{id};

  $file = &PathSplit ($$pSeq{SrcPath})->{dir} . '/ContigConsensi';
  $pSeqCons = &{$SeqFFmtGet{Experiment}{FuncEntry}} ($file,
    -MatchId=>[$$pSeq{id}], -debug=>0);
  printf "%s. sequence %sloaded from file %s, ID %s, length %d\n", &MySub,
    $pSeqCons ? '': 'not ', $file, $$pSeqCons{id}, length $$pSeqCons{sequence};

  # loop over sequences
  foreach $pTag (grep { $_->{type} eq 'ENZ8' } grep { $_->{layer} eq 'cons' } @{$$pSeqTag{annot}}) {
    $SeqSlice = substr ($$pSeqCons{sequence}, $$pTag{offset}-1, $$pTag{end}-$$pTag{offset}+1);
    printf "%s. annotation, type ENZ8, pos %d..%d, seq %s\n", &MySub,
      $$pTag{offset}, $$pTag{end}, $SeqSlice;
  }
}


# test &SeqRangeGapped
#
sub TestSeqRangeGapped {
  require Math::Range;
  my ($pRange, $pRange2);
  my $sSeqPure = 'ATTCTGTAGTtatatataTATAGGTCGTTAGTACCTGCATGAGT';
  my $sSeqOrig = 'ATTCT-G-TAGT--tatatat-a--TATAGGTCGTTAGTACCTGCATGAGT';

  # load sequence
  $pRange = Math::Range->new([11,18]);
  printf "range in purified seq string: %s = %d..%d\n", $pRange, $pRange->lower(), $pRange->upper();

  # range original
  $pRange2 = &SeqRangeGapped ($sSeqPure, $sSeqOrig, $pRange);
  printf "range in gapped seq string: %s\n", $pRange2;
}


sub TestSystem {
  my ($DirFofn) = @_;
  my ($call);

  # do system call
  if (-d $DirFofn) { chdir $DirFofn; }
  $call = join (' ', $ProgParam{call}{DATagReduce}, "-fofn=fofn");
  system ($call);
#  system ($ProgParam{call}{DATagReduce}, "-fofn=fofn");
}


# temporary table operations
#
# INTERFACE
# - argument 1: path of table file
#
sub TestTabHIH {
  my ($PathTable) = @_;
  my ($debug, $dbg2);
  my ($pTable, $pColumn);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # read table
  ($pTable, $pColumn) = &PlainToTable ($PathTable, -TabType=>'HIH', -comments=>1, -debug=>$dbg2);
  unless ($pTable) {
    printf "ERROR: unable to read entries from table file %s\n", $PathTable||"''";
    exit 1;
  } else {
    printf "%s. read %d entries from table file %s\n", &MySub, int keys %$pTable, $PathTable||"''";
  }
  printf "%s. column labels: %s\n", &MySub, join (' ', @$pColumn);

  # check parsing
  &DataPrint ($pTable, -handle=>\*STDOUT);
}


# test sequence library specificity functions in ReadWatch::Library.pm
#
sub TestTgtSpecif {
  my ($lib, $tgt);

  # start table
  printf "# read distribution on sequencing targets\n";
  printf "#\n# column labels:\n# %s\n", join ("\t", 'library', 'read_sum',
    map { "reads_${_}" } qw(Chr1 Chr2 Chr3 Chr45 Chr6));

  # loop over libraries
  foreach $lib (qw(JAX4 JC1 JC2 JC3)) {
    printf "%s\n", join ("\t", $lib, &TgtspecifLibgrpNum ($lib, 'Read'),
      map { &TgtspecifTgtExpectLib($_,$lib,'Read') } qw(Chr1 Chr2 Chr3 Chr45 Chr6));
  }
}


# analyse time performance of uc versus tr//
#
sub TestUcVsTr {
  my $StrA = 'ABCDEFGHIKLMNPQRSTUVWXYABCDEFGHIKLMNPQRSTUVWXYABCDEFABCDEFGHIKLMNPQRSTUVWXY';
  my $CtLimit = 1000000;
  my ($StrB, $time, $CtI);

  # null loop for tr//
  $time = (times)[0];
  for ($CtI=0; $CtI<$CtLimit; $CtI++) {
    $StrB = $StrA;
  }
  printf "CPU time for tr// null loop: %.03f s\n", (times)[0]-$time;

  # tr//
  $time = (times)[0];
  for ($CtI=0; $CtI<$CtLimit; $CtI++) {
    $StrB = $StrA;
    $StrB =~ tr/a-z/A-Z/;
  }
  printf "CPU time for tr//: %.03f s\n", (times)[0]-$time;

  # null loop for uc
  $time = (times)[0];
  for ($CtI=0; $CtI<$CtLimit; $CtI++) {
  }
  printf "CPU time for uc null loop: %.03f s\n", (times)[0]-$time;

  # tr//
  $time = (times)[0];
  for ($CtI=0; $CtI<$CtLimit; $CtI++) {
    $StrB = uc $StrA;
  }
  printf "CPU time for uc: %.03f s\n", (times)[0]-$time;
}

# $Id: Test.pl,v 1.36 2018/06/05 18:02:56 szafrans Exp $
