#! /usr/local/bin/perl
################################################################################
#
#  Sequence Read Handling in the Dictyostelium Genome Project
#  Things beside the ReadWatch Concept
#
#  copyright (c)
#  Karol Szafranski on behalf of IMB Jena, Dept. Genome Analysis, 1999-2004,
#    szafrans@imb-jena.de
#
################################################################################
#
#  DESCRIPTION
#
# - this program performs some kind of special REAP and SCOPE actions
#   in the context of the Dictyostelium Genome Project.  ReadWatcher.pl
#   is somehow related to the present program, covering the more specific
#   functions of read data processing.
#
# - See function &usage for description of command line syntax
#
# - each function comes along with a description at the beginning of the code
#   block
#
################################################################################
#
#  FUNCTIONS, DATA
#
# - MAIN
#   %GlobStore
#   $ProgFile,$ProgFstump
#   %ProgParam
#   $ProgMode,%ProgOpt,@ProgArg
#
# - usage help, command line arguments
#   &usage
#   &AddSwitch
#
# - basic elementaries
#   &NiceName
#   &ItemCopy
#   &ItemCopyEd
#   &ItemStoreEd
#   &ItemLink
#   &ItemZip
#   &TraceRootChk
#
# - raw data elementaries
#   &SampleIterat
#   &SampleIterDir
#   &SampleDirRegist
#   &SampleDirUnzip
#   &SampleDirNameTab
#   &SampleDirNameMove0
#   &SampleDirAsampleBackup
#   &SampleDirAsampleRead
#   &SampleDirAsampleWrite
#   &SampleName
#   &SampleReport
#   &SampleStoreNice
#
# - raw data actions
#   &ProgAutosample
#   &ProgRename3700
#   &ProgSampleImport
#   &ProgSampleSort
#   &ProgReap
#
# - experimental data elementaries
#   &ExperIterat
#   &ExperIterDir
#   &ExperIterRead
#   &ExperDirRegist
#
# - experimental data actions
#   &ProgReadIndex
#   &ProgExperList
#   &ProgExperListStat
#   &ProgExperSampleIdx
#   &ProgExperStoreEd
#   &ProgExperProvide
#
# - manage sample plate and sheet formats, do sequencing experiments
#   &PlateToReads
#   &ProgSortRead
#    &SampleSheet
#   &ProgSampleSheet
#   &ProgSeqExtra
#   &ProgSeqPrimer
#
# - read pool handling
#   read pool data structure
#    &GetReadpoolStruct
#    &GetIds
#   &ProgSeqPairs
#   &ProgReadMiss
#   &ProgReadFilter
#
# - mixed actions
#   &ProgFolderCmp
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - global data like
#     $ProgParam{path}{AutoSample}  path of AutoSample file
#     $ProgParam{store}{curr}{SampleDir}  path of Sample folder
#     $ProgParam{store}{curr}{ExperDir}  path of Experiment folder
#     $ProgParam{store}{sample}     list of Sample names (AutoSample list)
#   may be organised via a data object reflecting a sequencing result. This way,
#   parallel handling of several different sequencing results at a time would
#   become possible.
#
# - make functions applicable GSCJ-wide, no longer dicty-specific
#   status quo:
#   - raw data elementaries
#     &SampleIterat                     GSCJ?
#     &SampleIterDir                    GSCJ?
#     &SampleDirRegist                  elementary for ABI folder structure
#     &SampleDirUnzip                   GSCJ: elementary for ABI folder structure
#     &SampleDirNameTab                 GSCJ: elementary for ABI folder structure
#                                       calls dicty-specific &SampleStoreNice
#     &SampleDirNameMove0               GSCJ: elementary for ABI folder structure
#     &SampleDirAsampleBackup           GSCJ: elementary for ABI folder structure
#     &SampleDirAsampleRead             GSCJ: elementary for ABI folder structure
#     &SampleDirAsampleWrite            GSCJ: elementary for ABI folder structure
#     &SampleName                       GSCJ: elementary for ABI folder structure
#     &SampleReport                     GSCJ: elementary for ABI folder structure
#     &SampleStoreNice                  'nice' is defined in dicty's sense
#   - raw data actions
#     &ProgRename3700                   GSCJ
#     &ProgSampleImport                 GSCJ, but volatile and problem-specific
#     &ProgSampleSort                   GSCJ [checked 20030806]
#     &ProgReap                         GSCJ
#   - experimental data elementaries
#     &ExperIterat                      GSCJ, defaults to dicty [checked 20040414]
#     &ExperIterDir                     GSCJ REAP+CONVERGE [checked 20040414]
#     &ExperIterRead                    GSCJ
#     &ExperDirRegist                   GSCJ
#   - experimental data actions
#     &ProgReadIndex                    dicty-specific, due to $CorePath{ReadWatch}{ExperTarget}
#     &ProgExperList                    GSCJ, defaults to dicty (certain "groups") [checked 20040419]
#     &ProgExperListStat                GSCJ, defaults to dicty
#     &ProgExperSampleIdx               GSCJ
#     &ProgExperProvide                 GSCJ, defaults to dicty (through sample indices) [checked 20040419]
#   - manage sample plate and sheet ...
#     &PlateToReads                     GSCJ
#     &ProgSortRead                     GSCJ
#      &SampleSheet
#     &ProgSampleSheet                  GSCJ
#     &ProgSeqExtra                     dicty, need sample indices
#     &ProgSeqPrimer                    dicty, need sample indices
#   - read pool handling
#     read pool data structure
#      &GetReadpoolStruct               GSCJ
#      &GetIds                          GSCJ
#     &ProgSeqPairs                     GSCJ
#     &ProgReadMiss                     GSCJ
#     &ProgReadFilter                   dicty, highly specific
#   - mixed actions
#     &ProgFolderCmp                    GSCJ, defaults to dicty [20040226]
#
# - some functions may move to ReadWatcher.pl in the future:
#   all of "manage sample plate and sheet formats, do sequencing experiments"
#   all of "read pool handling"
#
# - look also for notes in the header of each function block
#
################################################################################


# global constants and variables
# also used by modules as a unique global data anchor
our %GlobStore;

# include path(s), includes
BEGIN {
  unshift @INC, grep{$_} split(/:+/,$ENV{KPERLPATH}||$ENV{PERLPATH}||'');
}
#use strict; use warnings;  # OK 20040813
use MainLib::StrRegexp;
use MainLib::Data;
use MainLib::Path;
use MainLib::Cmdline qw(&GetoptsNArgs &QueryConfirm);
use MainLib::File;
use MainLib::FileTmp qw(&PathUnique);
use MainLib::Misc;
use Math::Calc;
use database::DbPlain;
use SeqLab::SeqFormat qw(%reSeqid);
use SeqLab::SeqStreamIn;
use ReadWatch::Read;
use ReadWatch::ReadIndex;
use ReadWatch::Plate;


# script ID
# - program name as specified on the command line
our $ProgFile = ( split('/',__FILE__) )[-1];
our $ProgFstump=$ProgFile; $ProgFstump=~s/\.\w{1,4}$//;

# global constants (esp. default values)
our %ProgParam;
$ProgParam{call}{tag} = {
  rept => "$CorePath{call}{BlastWrap} -SeqAnnot=REPT -program=blastn2 -db=dicty.mask  -MaskPoly=14 -param=DictyMask -ThreshId=0.885",
  scer => "$CorePath{call}{BlastWrap} -SeqAnnot=SPSQ -program=blastn2 -db=scerevisiae -MaskPoly=14 -param=DictyMask -ThreshId=0.895",
  trna => "$CorePath{call}{BlastWrap} -SeqAnnot=EXON -program=blastn2 -db=dicty.trna               -param=DictyMask -ThreshId=0.910",
  };
$ProgParam{call}{TagStd} = [ 'rept', 'trna' ];
$ProgParam{call}{TagStdStr} = join (',', @{$ProgParam{call}{TagStd}});
$ProgParam{default}{ProgMode}  = 'provide';
$ProgParam{default}{PlateSize} = 64;
# Experiment item files
$ProgParam{exper}{ExperCateg} = {
  exper => { FileSuffix=>'',      FileSlc=>'\.[prstw]\d$' },
  qual  => { FileSuffix=>'.qual', FileSlc=>'\.qual$' },
  scf   => { FileSuffix=>'SCF',   FileSlc=>'SCF$' },
  };
# REAP status categories
$ProgParam{exper}{ReapCateg} = [ 'exp', 'failed', 'passed' ];

# working desk
$ProgParam{action} = undef;
$ProgParam{filter} = undef;
$ProgParam{store}  = undef;
$ProgParam{time}   = undef;


# manage I/O #####################################

# organise I/O handles
&Unbuffer();

# organise tmp files
# - encapsulation in a sub{} allows MainLib::FileTmp to identify the calling
#   package
my $pcFT = sub{ $ProgParam{TmpManag} = MainLib::FileTmp->new(); };
&$pcFT;


# command line interface #########################
# NOTE:
# - &GetoptsNArgs (via &AddSwitch) modifies global variables:
#   $ProgMode @ProgArg %ProgOpt
# - @ProgArg may be pre-filled in &GetoptsNArgs via &AddSwitch (option -fofn)

# arguments, switches, default subprogram
our $ProgMode = undef;
our @ProgArg = ();
our %ProgOpt = ();
unshift @ProgArg, &GetoptsNArgs();
$ProgMode ||= $ProgParam{default}{ProgMode};

# eventually open LOG file
if ($ProgOpt{-log}) {
  $ProgOpt{LogFile} = ($ProgOpt{-log}!=1) ? $ProgOpt{-log} : undef;
  &LogOpen (-file=>$ProgOpt{LogFile}, -stamp=>$ProgFstump, -prog=>"$ProgFile -$ProgMode");
}
END {
  $ProgOpt{-log} and &LogClose();
}

# chain to program mode which is optionally argument-less
if (0) { }
elsif (!@ARGV or $ProgMode=~m/^h(elp)?$/i) { &usage() }
elsif ($ProgMode =~ m/^ReadIndex$/i) {
  &ProgReadIndex();
  exit 0;
}
elsif ($ProgMode =~ m/^ListExper$/i) {
  &ProgExperList();
  exit 0;
}
elsif ($ProgMode =~ m/^ListExpStat$/i) {
  &ProgExperListStat();
  exit 0;
}

# ensure input argument(s)
unless (@ProgArg) {
  print STDERR "ERROR: input arguments missing\n";
  exit 1;
}

# chain to program mode (with input argument(s))
my ($arg);
if (0) { }
elsif ($ProgMode =~ m/^AutoSample$/i) {
  &ProgAutosample (@ProgArg);
}
elsif ($ProgMode =~ m/^FilterExper$/i) {
  &ProgReadFilter (@ProgArg);
}
elsif ($ProgMode =~ m/^FolderCmp$/i) {
  foreach $arg (@ProgArg) {
    &ProgFolderCmp ($arg);
    $ProgParam{store} = undef;
  }
}
elsif ($ProgMode =~ m/^ListSample$/i) {
  foreach $arg (@ProgArg) {
    &SampleDirUnzip ($arg);
    $ProgParam{action}{SamplePure} = \&SampleReport;
    &SampleIterDir ($arg);
    $ProgParam{action}{SamplePure} = \&ItemZip;
    &SampleIterDir ($arg);
  }
}
elsif ($ProgMode =~ m/^PlateToReads$/i) {
  &PlateToReads (@ProgArg);
}
elsif ($ProgMode =~ m/^(provide|proj)$/i) {
   unless (exists $ProgOpt{-edited}) {
     $ProgOpt{-edited} = 1;
   }
   &ProgExperProvide (@ProgArg);
}
elsif ($ProgMode =~ m/^ReadMiss$/i) {
  &ProgReadMiss (@ProgArg);
}
elsif ($ProgMode =~ m/^Reap(Call)?$/i) {
  &ProgReap (@ProgArg);
}
elsif ($ProgMode =~ m/^Rename3700$/i) {
  foreach $arg (@ProgArg) {
    &ProgRename3700 ($arg);
  }
}
elsif ($ProgMode =~ m/^SampleIdxFromExper$/i) {
  &ProgExperSampleIdx ($ProgArg[0]);
}
elsif ($ProgMode =~ m/^(SampleImport|ReorgBaylor)$/i) {
  foreach $arg (@ProgArg) {
    &ProgSampleImport ($arg);
  }
}
elsif ($ProgMode =~ m/^SampleSheet$/i) {
  &ProgSampleSheet ($ProgArg[0]);
}
elsif ($ProgMode =~ m/^SampleSort$/i) {
  &ProgSampleSort ($ProgArg[0]);
}
elsif ($ProgMode =~ m/^SeqExtra$/i) {
  &ProgSeqExtra ($ProgArg[0]);
}
elsif ($ProgMode =~ m/^SeqPairs$/i) {
  &ProgSeqPairs ($ProgArg[0]);
}
elsif ($ProgMode =~ m/^SeqPrimer$/i) {
  &ProgSeqPrimer ($ProgArg[0]);
}
elsif ($ProgMode =~ m/^SortRead$/i) {
  &ProgSortRead (@ProgArg);
}
elsif ($ProgMode =~ m/^StoreExper$/i) {
  &ProgExperStoreEd (@ProgArg);
}
elsif ($ProgMode =~ m/^tmp$/i) {
  print  map { "$_\n" } @ProgArg;
}
elsif ($ProgMode =~ m/^unzip$/i) {
  foreach $arg (@ProgArg) {
    &SampleDirUnzip ($arg);
  }
}
elsif ($ProgMode =~ m/^zip$/i) {
  $ProgParam{action}{SamplePure} = \&ItemZip;
  foreach $arg (@ProgArg) {
    &SampleIterDir ($arg);
  }
}
else {
  printf STDERR "ERROR: unknown program mode or switch %s\n", $ProgMode||"''";
  exit 1;
}

# exit script successfully
# cf. END blocks!
exit 0;


################################################################################
# usage help, command line arguments
################################################################################


sub usage {
  print "\n";
  print <<END_USAGE;
DESCRIPTION
 $ProgFile is desired for handling sequencing result folders (ABI output)
 and Experiment items at the GSCJ, especially in the Dictyostelium Genome
 Project.

COMMAND LINE SYNTAX
 $ProgFile  -<ModeSwitch> [-<OptionalSwitch> ...] <Arg1> [<Arg2> ...]

Arguments
---------
 See function descriptions.

path arguments:
 Relative paths will be resolved according to the pwd. Prefixes "~" and "~uid"
 are resolved to the home directories. Path "-" resolves to STDIN or STDOUT,
 depending on the context.

ModeSwitch (case-insensitive)
-----------------------------
<none>            default ModeSwitch -$ProgParam{default}{ProgMode} if program arguments are given.
                  Otherwise like ModeSwitch -help.
-AutoSample       prepare an Autosample sheet containing reniced read names
                  of sample files.
                  Arg1+         target path of sample folder(s)
-FilterExper      filter list of Experiment file paths according to
                  contamination lists
                  Arg1          output filename stem
                  Arg2          input file
-FolderCmp        compare complete Sample and Experiment folder structures
                  of the Dictyostelium Genome Project
                  Arg1+         target subfolder: AX4, Chr2, etc.
                  -TraceRoot    specify root of an Experiment file tree
                                (source), default:
                                $CorePath{ReadWatch}{ExperGroupDir}
                                The Sample file root directory will be derived
                                from the same option argument.
-h(elp)           output command line syntax description and exit
-ListExper        list full paths of Experiment items, print to STDOUT
                  Arg1*         specify root of an Experiment directory
                                subtree, default:
                                $CorePath{ReadWatch}{ExperGroupDir}
                  -recursive    no effect, recursion is a fixed fashion of this
                                program mode
                  -SlcID        select items according to name
                  -TraceRoot    specify root of an Experiment file tree
                                (source), default:
                                $CorePath{ReadWatch}{ExperGroupDir}
-ListExpStat      statistics on Experiment data
                  Arg1*         specify root of an Experiment file tree
                                (source), default:
                                $CorePath{ReadWatch}{ExperGroupDir}
                  -recursive    no effect, recursion is a fixed fashion of this
                                program mode
                  -SlcID        select items according to name
                  -TraceRoot    same as Arg1
-ListSample       report file information for a sample folder.
                  Arg1+         path(s) of Sample folder(s)
-PlateToReads     prepare list of read IDs from a plate ID
                  Arg1          primer specification, single letter
                  Arg2+         list of plate IDs
-provide          provide Experiment items (copies Experiment files and SCF
                  links) in current working folder
                  Arg1          path of file of read IDs contributing to
                                the request. FOFN entries are applied in
                                RegExp/substring searches.
                  Arg2*         optional: path of target folder, default: ./
                  -edited       non-standard default: 1.
                  -FullMatch    recommended
                  -recursive    do it the slow way, default: do not
                  -SlcID        does not work here
                  -tag          recommended if you want to use the Experiment
                                data for an assembly
                  -TraceRoot    specify root of an Experiment file tree
                                (source), default:
                                $CorePath{ReadWatch}{ExperGroupDir}
-ReadIndex        create index for all available Experiment items and print to
                  STDOUT.
                  No arguments.
                  -SlcID        select items according to name
-ReadMiss         prepare report on missing reads on available clones. Read
                  suffix digit will be 2 or higher.
                  List of reads (new unique ID) is output to STDOUT.
                  Arg1          file containing list of IDs of existing reads
                  Arg2          optional: file containing list of IDs of clones
                                to restrict analysis on
                  -RcReadIndex  specify read index rc file(s), default: ...
-reap             call REAP on list of Sample folders
                  Arg1*         paths of source sample folders
-Rename3700       renaming of folder and files coming out of ABI 3700. The
                  program may ask for user interaction.
-SampleIdxFromExper  create Sample file index based on Experiment file
                  structure
                  Arg1          Experiment root directory, project name
-SampleImport     import Sample data, typically Baylor College ABI/SCF
                  Arg1          trace directory
                  -InSuffix     file suffix which has to be removed to get the
                                trace ID.
                  -var=fchg,B   change files, default: 1
-SampleSheet      prepare a sample sheet from file of read names
                  Arg1          file of read names
                  sample sheet is automatically saved as input file + '_sht'.
-SampleSort       sort Sample files in Sample folder into appropriate target
                  folders
                  Arg1          path of source sample folder
-SeqExtra         prepare a sorted list, well plate scheme and sample sheet
                  from file of read names, cmp. -SampleSheet.
                  Arg1          file of read names
                  NOTE: this function generates a large bunch of files!
-SeqPairs         prepare a listing of existing read pairs in a fastA
                  read database.
                  argument 1    sequence source file for read IDs
-SeqPrimer        prepare a sorted list, well plate scheme and sample sheet
                  from file of clone/primer pairs.
                  Arg1          space-delimited table of clone/primer pairs
                  NOTE: this function generates a large bunch of files!
-SeqXML           prepare a trace archive XML for a batch of Experiment files
                  argument 1    file of filenames
-SortRead         sort read list logically, output to STDOUT.
                  Arg1          file of read names
-StoreExper       store read files in a folder into indexed folder structure
                  Arg1          path of target folder
-unzip            turn sample folder unzipped
                  Arg1+         target path(s) of sample folder(s)
-zip              zip unzipped sample files in a sample folder.
                  Arg1+         target path(s) of sample folder(s)

OptionalSwitch (case-insensitive)
---------------------------------
switch argument types: B:=boolean, F:=floating point/scientific, N:=integer,
S:=string, X:=varying type.


-debug(=N)        print debug protocol to STDERR (sometimes STDOUT). Keep
                  temporary files.
                  N           debug depth value
-edited(=B)       regard edited file versions if available, default: do not.
-fofn=S           supply list of command arguments in a file. The entries of the
                  file will be appended to the argument list. Multiple -fofn
                  switch statements are allowed.
-FullMatch        perform exact string matching during select operations
                  instead of RegExp matching
-InSuffix=S       file suffix for input
-log(=S)          redirect STDOUT and STDERR to LOG file
                  S           log file path, default path worked out
                              automatically from built-in directives.
-OutFail=S        file path for output of failed data (e.g. list of read names)
-OutPass=S        file path for output of passed data (e.g. list of read names)
-OutStump=S       path stump for multi-file output. A default is derived from
                  input file names in most cases.
-PlateSize        specify plate format (not implemented yet)
-RcReadIndex=S    use read index rc file(s) (comma-delimited switch arguments)
                  instead of default $ReadWatch::ReadIndex::LibGlob{default}{index}
-recursive        do full recursion Experiment file structure, do not use a
                  read index.
-SlcDescr=S       specify a list of sequence description keywords (RegExps
                  in a file) which shall be used to select entries from the
                  sequence source.
-SlcID=S          specify a RegExp which shall be used for sequence ID
                  selection. There are magic ID selectors:
                  dicty       any regular read name from the Dictyostelium
                              Genome Project
                  gscj        any regular read name from the GSCJ part of
                              the Dictyostelium Genome Project
-SlcReap=S        select reads according to REAP processing status. Possible
                  values:
                  exp         all Experiment files
                  failed      Experiment files which failed the REAP procedure
                  passed      Experiment files which passed the REAP procedure
-tag(=S)          tag Experiment files. This works in combination with ModeSwitch
                  -provide.
                  S           tag procedure sets as a comma-delimited list,
                              default: $ProgParam{call}{TagStdStr}
-timer            print time-performance protocol to STDERR
-TraceRoot=S      specify a root of read/sample folders, default:
                  $CorePath{ReadWatch}{ExperGroupDir} /
                  $CorePath{ReadWatch}{RawGroupDir}.
-var=S,X          do assignment for function-specific variable.
                  S           variable name, case-sensitive
                  X           value
-v(erbose)        print extended action protocol to STDERR or STDOUT

Environment Variables
---------------------
 \$PERLPATH        primary search path for Perl package look-up
 \$TEMPPATH        directory for storage of temporary files, default /tmp

Temporary Files
---------------
 The program may generate temporary files. These will be placed either in a
 directory specified by \$ENV{TEMPPATH} or in /tmp.
END_USAGE
  print "\n";
  #'
  exit 0;
}


# add program switches to global table (hash)
#
# INTERFACE
# - argument 1:  switch argument without leading '-'
#
# - global options:
#   -debug       [STD]
#
# - global data:
#   $ProgMode
#   @ProgArg     may be pre-filled here beside it's typically used to store
#                return value from &GetoptsNArgs via &AddSwitch (option
#                -fofn)
#   %ProgOpt  switch data which gets processed here
#
# DESCRIPTION
# - this function gets called by &MainLib::Misc::GetoptsNArgs
# - switch arguments are tested for validity. Arguments are parsed with highest
#   possible tolerance. This way, syntax errors can reported in accordance to
#   the actual switch, rather than reporting ANY syntax error.
#
sub AddSwitch {
  my ($switch) = shift;
  my ($debug, $SwitchArg);
  my ($pTable);

  # function parameters
  $debug = $ProgOpt{-debug};

  # optional switches
  if ($switch =~ m/^debug(=(\d+))?$/i) {
    $ProgOpt{-debug} = length($2) ? int($2) : 1;
    return;
  }
  if ($switch =~ m/^edited(=(\d+))?$/i) {
    $ProgOpt{-edited} = length($2) ? int($2) : 1;
    return;
  }
  if ($switch =~ m/^fofn=(.+)$/i) {
    $SwitchArg = ($1 eq '-') ? $1 : &PathExpand($1);
    if ($pTable = &LoadFoid($SwitchArg)) {
      if ($$pTable[0] =~ m/\bResults$/) {
        $pTable = &LoadFoid ($SwitchArg, -delimit=>{line=>$reEndl,col=>'\t'});
      }
      push @ProgArg, @$pTable;
      $debug and printf STDERR "%s. %d entries loaded from fofn %s\n", &MySub,
        int @$pTable, $SwitchArg||"''";
    } else {
      printf STDERR "ERROR: unable to read entries from file of filenames %s (-> %s)\n",
        $1, $SwitchArg;
      exit 1;
    }
    return;
  }
  if ($switch =~ m/^FullMatch$/i) {
    $ProgOpt{-FullMatch} = 1;
    return;
  }
  if ($switch =~ m/^InSuffix=(.*)$/i) {
    $ProgOpt{-InSuffix} = $1;
    return;
  }
  if ($switch =~ m/^log(=(.*))?$/i) {
    $ProgOpt{-log} = $2 ? &PathExpand($2) : 1;
    return;
  }
  if ($switch =~ m/^OutFail=(.+)$/i) {
    $ProgOpt{-OutFail} = &PathExpand ($1);
    return;
  }
  if ($switch =~ m/^OutPass=(.+)$/i) {
    $ProgOpt{-OutPass} = &PathExpand ($1);
    return;
  }
  if ($switch =~ m/^OutSt[au]mp=(.+)$/i) {
    $ProgOpt{-OutStump} = &PathExpand ($1);
    return;
  }
  if ($switch =~ m/^PlateSize=(\d+)$/i) {
    $ProgOpt{-PlateSize} = $1;
    return;
  }
  if ($switch =~ m/^RcReadIndex=(.+)$/i) {
    $ProgOpt{-RcReadIndex} = [ split(',',$1) ];
    return;
  }
  if ($switch =~ m/^(recourse|recursive)$/i) {
    $ProgOpt{-recursive} = 1;
    return;
  }
  if ($switch =~ m/^Select/i) {
    print  STDERR "ERROR: selector switches are now spelled \"-Slc*\"\n";
    exit 1;
  }
  if ($switch =~ m/^SlcDescr=(.+)$/i) {
    $ProgOpt{-SlcDescr} = $1;
    return;
  }
  if ($switch =~ m/^SlcID=(.+)$/i) {
    $ProgOpt{-SlcID} = $1;
    if ($ProgOpt{-SlcID} =~ m/^dicty$/i) {
      $ProgOpt{-SlcID} = $reSeqid{DictyRd};
    }
    if ($ProgOpt{-SlcID} =~ m/^gscj$/i) {
      $ProgOpt{-SlcID} = $reSeqid{GscjRd};
    }
    return;
  }
  if ($switch =~ m/^SlcReap=(\w+)$/i) {
    $ProgOpt{-SlcReap} = lc($1);
    unless (grep { $ProgOpt{-SlcReap} eq $_ } @{$ProgParam{exper}{ReapCateg}}) {
      printf STDERR "ERROR: unknown REAP fofn category %s\n", $ProgOpt{-SlcReap};
      exit 1;
    }
    return;
  }
  if ($switch =~ m/^tag(=(.*))?$/i) {
    $ProgOpt{-tag} = $2 ? [ split(/,/,$2) ] : 1;
    if ($2) {
      foreach (@{$ProgOpt{-tag}}) {
        unless (defined $ProgParam{call}{tag}{$_}) {
          printf STDERR "ERROR: tag procedure set %s not available, try one of:\n  %s\n",
            $_||"''", join (', ', keys %{$ProgParam{call}{tag}});
          exit 1;
        }
      }
    }
    return;
  }
  if ($switch =~ m/^timer$/i) {
    $ProgOpt{-timer} = 1;
    $ProgParam{time}{start} = time;
    return;
  }
  if ($switch =~ m/^TraceRoot=(.*)$/i) {
    $ProgOpt{-TraceRoot} = $1;
    return;
  }
  if ($switch =~ m/^var=(\w+)[,=](.+)$/i) {
    $ProgOpt{-var}{$1} = $2;
    return;
  }
  if ($switch =~ m/^v(erbose)?$/i) {
    $ProgOpt{-verbose} = 1;
    return;
  }

  # program mode switches
  if (defined $ProgMode) {
    printf STDERR "ERROR: multiple specification of program mode or unknown switch, %s and %s\n",
      '-'.($ProgMode||"''"), '-'.($switch||"''");
    exit 1;
  }
  else {
    $ProgMode = $switch;
    return;
  }
}


################################################################################
# basic elementaries
################################################################################


# make nice read names
#
# INTERFACE
# - argument 1: reference to plain text containing read name to check/change
# - return val: number of changes
#
sub NiceName {
  my ($pName) = @_;
  my ($CtChg);

  # upper case of target specifier
  $CtChg += ($$pName =~ s/(jax|Jax|jAx|jaX|jAX|JaX|JAx)(4[a-ex-z]\d{2,3}[a-h]\d{2})/JAX$1/g);  # lower-case
  $CtChg += ($$pName =~ s/(jc|Jc|jC)([12][a-ex-z]\d{2,3}[a-h]\d{2})/JC$2/g);                   # lower-case
  $CtChg += ($$pName =~ s/(JAX)([a-ex-z]\d{2,3}[a-h]\d{2})/${1}4${2}/gi);                      # missing '4'
  $CtChg += ($$pName =~ s/J([12][a-ex-z]\d{2,3}[a-h]\d{2})/JC$1/g);                            # chromosome 'C' missing
  $CtChg += ($$pName =~ s/JC([12])[12]([a-ex-z]\d{2,3}[a-h]\d{2})/JC$1$2/g);                   # chromosome digit doubling
  $CtChg += ($$pName =~ s/JC([a-ex-z]\d{2,3}[a-h]\d{2})/JC2$1/g);                              # chromosome digit missing => 2

  # primer specifier
  $CtChg += ($$pName =~ s/([a-ex-z]\d{2,3})r([a-h]\d{2}\.)t(\d)/${1}${2}r${3}/g); # 'r' moved to the middle
  $CtChg += ($$pName =~ s/([a-ex-z]\d{2,3})t([a-h]\d{2}\.[rs]\d)/${1}${2}/g);     # additional 't' in the middle
#  $CtChg += ($$pName =~ s/([a-ex-z]\d{2,3}[a-h]\d{2}\.)t(\d)/${1}r${2}/g);

  # read number digit
  $CtChg += ($$pName =~ s/([a-ex-z]\d{2,3}[a-h]\d{2}\.[rs])([^\d]|$)/${1}1${2}/g);  # missing final '1'
  $CtChg += ($$pName =~ s/\.s1m$/\.s1/g);

  # exit SUB
  return $CtChg;
}


# copy specified file to target folder
#
# INTERFACE
# - argument 1: full path in Experiment tree (source file)
#
# - options:
#   -debug      [STD]
#   -TgtName    name of target file instead of source name
#
# - return val: success status (boolean)
#
# - global data:
#   $ProgParam{store}{DirFixed}          fixed folder (target)
#
sub ItemCopy {
  my ($PathSrc, %opt) = @_;
  my ($debug);
  my ($FileTgt, $PathTgt, $ret);

  # function parameters
  $debug = $opt{-debug};

  # work out target filename
  $FileTgt = $opt{-TgtName} || &PathSplit ($PathSrc)->{name};
  $PathTgt = ($ProgParam{store}{DirFixed} || '.') .'/'. $FileTgt;
  if ($PathSrc eq $PathTgt) { return }

  # copy file
  $debug and printf STDERR "%s. copying file $PathSrc\n", &MySub;
  unless ($ret = &FileCopy($PathSrc,$PathTgt)) {
    printf STDERR "%s. ERROR: copying of $PathSrc failed: '$!'\n", &MySub;
  }

  # exit SUB
  return $ret;
}


# copy specified file to target folder, *.ed version preferred
#
# DESCRIPTION
# - interface identical to &ItemCopy
#
sub ItemCopyEd {
  my ($PathSrc, %opt) = @_;

  # function parameters
  if (-r "$PathSrc.ed") {
    &ItemCopy ("$PathSrc.ed", %opt, -TgtName=>&PathSplit($PathSrc)->{name});
  }
  else { &ItemCopy (@_) }
}


# move Experiment file from fixed folder in target folder as *.ed
#
# INTERFACE
# - argument 1: full path in Experiment tree (target file)
#
# - options:
#   -debug      [STD]
#
# - return val: success status (boolean)
#
# - global data:
#   $ProgParam{store}{DirFixed}          fixed folder (source)
#
# DEBUG, CHANGES, ADDITIONS
# - for disk space econonomy:
#   Check if Experiment data has really changed vs. original Experiment data in
#   Experiment folder.
#
sub ItemStoreEd {
  my ($PathItem, %opt) = @_;
  my ($bEconomic);
  my ($pSeqQueue, $pSeqOrig, $pSeqNew, $bMove);
  my ($FileItem, $DirSrc, $DirTgt, $ret);

  # function parameters
  $FileItem = &PathSplit ($PathItem)->{name};
  $DirTgt = &PathSplit ($PathItem)->{dir};
  $DirSrc = $ProgParam{store}{DirFixed};
  $bEconomic = 1;

  # disk space econonomy
  # is the newer Experiment file changed in comparison to the original one?
  $bMove = 1;
  if ($bEconomic) {
    $bMove = 0;
    $pSeqQueue = SeqLab::SeqStreamIn->new("$DirSrc/$FileItem", "$DirTgt/$FileItem");
    $pSeqQueue->AddSwitch (-pure=>1, -upper=>1);
    $pSeqNew = $pSeqQueue->GetNext();
    $pSeqOrig = $pSeqQueue->GetNext();
    if ($$pSeqOrig{sequence} ne $$pSeqNew{sequence}) {
      $bMove = 1;
      printf STDERR "Experiment file has changed sequence: $FileItem\n";
    }

    # look for annotation types other than standard
    else {
      if (grep { $_->{type} !~ m/^(EXON|REPT|SPSQ)/ } @{$$pSeqNew{annot}}) {
        $bMove = 1;
        printf STDERR "Experiment file has been annotated: %s, type %s\n",
          $FileItem, join (' ',map{@$_} &DataTreeSlc($$pSeqNew{annot},[[0,'all'],['type']],-unique=>1));
      } else {
        printf STDERR "Experiment file without valuable changes: $FileItem\n";
      }
    }
  }

  # update/create edited Experiment file
  if ($bMove) {
    $ret = &mv ("$DirSrc/$FileItem", "$DirTgt/$FileItem.ed");
    printf STDERR "Experiment file stored: $DirTgt/$FileItem.ed\n";
  } else {
    unlink "$DirSrc/$FileItem";
  }

  # exit SUB
  return $ret;
}


# create symbolic link to file in Experiment folder
#
# INTERFACE
# - argument 1: full path of source file
#               to be linked
#
# - options:
#   -debug      [STD]
#
# - return val: success status (boolean)
#
# - global data:
#   $ProgParam{store}{DirFixed}          fixed folder (target)
#
sub ItemLink {
  my ($FileTgt, %opt) = @_;
  my ($debug);
  my ($FileLink, $ret);

  # function parameters
  $debug = $opt{-debug};

  # work out link name
  $FileLink = ($ProgParam{store}{DirFixed} || '.') .'/'. &PathSplit ($FileTgt)->{name};

  # link file
  $debug and printf STDERR "%s. linking $FileLink -> $FileTgt\n", &MySub;
  unless ($ret = &FileLink ($FileTgt, $FileLink)) {
    printf STDERR "%s. ERROR: linking of $FileTgt failed: '$!'\n", &MySub;
  }

  # exit SUB
  return $ret;
}


# Zip file
#
# INTERFACE
# - argument 1: path of target file
#
sub ItemZip {
  my ($PathTgt, %opt) = @_;

  # zip sample file
  if (system ("$CorePath{call}{gzip} '$PathTgt'")) {
    printf STDERR "%s. ERROR: unable to zip file %s\n", &MySub, $PathTgt||"''";
  }
}


# verify existence of trace file root directory
#
# INTERFACE
# - argument 1: path of directory
# - return val: status (boolean)
#
# DESCRIPTION
# - For the function code, it does not matter at all if the path is a trace
#   root directory or any other directory. Just, that the error message is
#   quite topic-specific.
# - The function will cause a program exit (with error flag) if a directory
#   does not exist.
# - The path argument is expected to represent the Experiment/Sample root
#   directory (default in $CorePath{ReadWatch}{RawHome} /
#   $CorePath{ReadWatch}{ExperHome}) plus the subdirectory name of the
#   so-called "group".
#
sub TraceRootChk {
  my ($PathTrRoot, %opt) = @_;
  if (! -d $PathTrRoot) {
    printf STDERR "%s. trace base directory does not exist: %s\n", (caller(1))[3],
      $PathTrRoot;
    exit 1;
  }
}


################################################################################
# raw data elementaries
################################################################################


# work through Sample folder structure
#
# INTERFACE
# - argument 1: path of target folder
#
# - options:
#   -debug      [STD]
#
sub SampleIterat {
  my ($PathDir, %opt) = @_;
  my ($debug, $dbg2);
  my (@Fname, $DirSample);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # read names of sample folders from directory
  @Fname = &ReadDir ($PathDir, -select=>'^Results \d+', -debug=>$dbg2);
  $debug and printf STDERR "%s. found %d sample folders in directory %s\n", &MySub,
    int @Fname, $PathDir||"''";

  # loop over sample folders
  foreach $DirSample (@Fname) {
    &{$ProgParam{action}{SampleDir}} ($PathDir.'/'.$DirSample, -debug=>$dbg2);
  }
}


# work through Sample folder
#
# INTERFACE
# - argument 1: path of target Sample folder
#
# - global options:
#   -debug      [STD]
#
# - global data:
#   $ProgParam{action}{SamplePure}  code reference for action on sample file already decompressed
#                                   (compressed files will be temporarily decompressed)
#   $ProgParam{action}{SampleZip}   code reference for action on sample.gz file
#                                   (compressed files will be temporarily decompressed)
#   $ProgParam{action}{SampleSeq}   code reference for action on decompressed Sample.Seq file
#                                   (compressed files will be temporarily decompressed)
#
sub SampleIterDir {
  my ($PathTgt, %opt) = @_;
  my ($debug, $dbg2);
  my (@Fname, $PathSource, $PathSourceFull, $PathSourceDecomp);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  if (@Fname = &ReadDir($PathTgt,-select=>'Kopie')) {
    printf STDERR "WARNING: hilarious file%s in Sample folder %s\n%s",
      (@Fname==1)?'':'s', $PathTgt, join('',map{"$_\n"}@Fname);
  }

  ##############################################################################
  # zipped sample files

  if ($ProgParam{action}{SampleZip}) {

    # read names of zipped files from directory
    @Fname = &ReadDir ($PathTgt, -select=>'^Sample \d+\.gz$', -expand=>1, -debug=>$dbg2);
    $debug and printf STDERR "%s. found %d zipped Sample-related files in sample folder %s\n", &MySub,
      int(@Fname), $PathTgt||"''";

    # loop over zipped files
    foreach $PathSource (@Fname) {
      $PathSourceDecomp = $PathSourceFull;
      $PathSourceDecomp =~ s/\.gz$//;
      $debug and printf STDERR "%s. acting on file %s, unzipped %s\n", &MySub,
        $PathSource||"''", $PathSourceDecomp||"''";

      # do work: unzip, action, zip
      if (system ("$CorePath{call}{gunzip} '$PathSource'")) {
        printf STDERR "%s. ERROR: unable to unzip file %s\n", &MySub, $PathSource||"''";
      }
      &{$ProgParam{action}{SampleZip}} ($PathSourceDecomp, -debug=>$dbg2);
      if (system ("$CorePath{call}{gzip} '$PathSourceDecomp'")) {
        printf STDERR "%s. ERROR: unable to zip file %s\n", &MySub, $PathSourceDecomp||"''";
      }
    } # end foreach $PathSource
  }

  ##############################################################################
  # unzipped sample files

  if ($ProgParam{action}{SamplePure}) {

    # read names of unzipped files from directory
    @Fname = &ReadDir ($PathTgt, -select=>'^Sample \d+$', -expand=>1, -debug=>$dbg2);
    $debug and printf STDERR "%s. found %d unzipped Sample files in sample folder %s\n", &MySub,
      int(@Fname), $PathTgt||"''";

    # loop over unzipped files
    foreach $PathSourceDecomp (@Fname) {
      $debug and printf STDERR "%s. acting on file %s\n", &MySub, $PathSourceDecomp||"''";

      # do work
      &{$ProgParam{action}{SamplePure}} ($PathSourceDecomp, -debug=>$dbg2);
    }

  } # end if $ProgParam{action}{SamplePure}
}


# register Sample folder to be present
#
# INTERFACE
# - argument 1: path of target Sample folder
#
# - options:
#   -debug      [STD]
#
sub SampleDirRegist {
  my ($PathTgt, %opt) = @_;
  my ($debug, $dbg2);
  my (@Fname, @SmpArr, $date, $machine);

  # function parameters
  $debug = $opt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  if (@Fname = &ReadDir ($PathTgt, -select=>'Kopie')) {
    printf STDERR "WARNING: hilarious file%s in Sample folder %s\n%s",
      (@Fname==1)?'':'s', $PathTgt, join('', map{"$_\n"}@Fname);
  }

  # verify sample files to exist
  if (@SmpArr = &ReadDir($PathTgt,-select=>'^Sample \d+',-debug=>$dbg2)) {

    # split folder name to date/machine
    if ($PathTgt =~ m/(^|\/)Results (\d{2}\.\d{2}\.\d{2}) ([\w.-]+)/) {
      $date = $2;
      $machine = $3;
    } else {
      printf STDERR "%s. ERROR: pattern mismatch for Sample folder '%s'\n", &MySub, $PathTgt;
      return;
    }

    # register folder
    $ProgParam{store}{folder}{$date.'_'.$machine}{raw} = 1;
    $debug and printf STDERR "%s. registering folder %s\n", &MySub, $date .'_'. $machine||"''";
  }

  # no Sample files!
  elsif ($debug) {
    printf STDERR "%s. no Sample files found in Sample folder '%s'\n", &MySub, $PathTgt;
  }

}


# turn Sample folder unzipped
#
# INTERFACE
# - argument 1: path of target Sample folder
#
# - global options:
#   -debug      [STD]
#
sub SampleDirUnzip {
  my ($PathTgt, %opt) = @_;
  my ($debug);
  my (@Fname, $PathCurr);

  # function parameters
  $debug = $ProgOpt{-debug};
  unless (-d $PathTgt) {
    printf STDERR "%s. ERROR: unable to find Sample folder %s\n", &MySub, $PathTgt;
    return;
  }
  if (@Fname = &ReadDir($PathTgt,-select=>'Kopie')) {
    printf STDERR "WARNING: hilarious file%s in Sample folder %s\n%s",
      (@Fname==1)?'':'s', $PathTgt, join('',map{"$_\n"}@Fname);
  }

  # loop over zipped sample files
  @Fname = &ReadDir ($PathTgt, -select=>'\.gz$', -expand=>1);
  $debug and printf STDERR "%s. found %d zipped files in sample folder %s\n", &MySub,
    int(@Fname), $PathTgt||"''";
  foreach $PathCurr (@Fname) {
    if (system ("$CorePath{call}{gunzip} '$PathCurr'")) {
      printf STDERR "%s. ERROR: unable to unzip file %s\n", &MySub, $PathCurr||"''";
    }
  }
}


# backup existing AutoSample file
#
# INTERFACE
# - argument 1: path of target folder
#
# - global options:
#   -debug      [STD]
#
# - return val: there was an AutoSample file (boolean)
#
# - global data:
#   $ProgParam{path}{AutoSample}   path of the AutoSample file even if it doesn't exist.
#                                  It's derived from $PathDir => $ProgParam{store}{curr}{SampleDir}
#   $ProgParam{store}{curr}{SampleDir}
#
# DESCRIPTION
# - zipped AutoSample file or AutoSample backup files are unzipped.
#
sub SampleDirAsampleBackup {
  my ($PathDir, %opt) = @_;
  my ($debug);
  my ($PathDirE, $PathAutoSampleReapold, $PathZip, $PathBakold);

  # function parameters
  $debug = $ProgOpt{-debug};
  unless ($PathDirE = $ProgParam{store}{curr}{SampleDir} = &PathExpand($PathDir)) {
    printf STDERR "ERROR: unable to find target path %s\n", $PathDir||"''";
    exit 1;
  }

  # name of Autosample file
  # - current naming philosophy
  # - naming philosophy of old REAP versions (until 2.4B ?)
  $ProgParam{path}{AutoSample} = $PathDirE .'/Autosample';
  if ($PathDirE =~ m/Results([ \.\w]+)/) {
    $PathAutoSampleReapold = $PathDirE .'/Autosample'. $1;
  } else {
    printf STDERR "ERROR: no RegExp match on specified target path %s (%s)\n",
      $PathDirE, $_[0]||"''";
    exit 1;
  }

  # unzip existing Autosample file and backups
  foreach $PathZip (&ReadDir ($PathDir, -select=>'^Autosample.*\.gz$', -expand=>1)) {
    if (system ("$CorePath{call}{gunzip} '$PathZip'")) {
      printf STDERR "ERROR: unable to unzip file %s\n", $PathZip||"''";
    } elsif ($debug) {
      printf STDERR "%s. unzipping existing Autosample file %s\n", &MySub,
        $ProgParam{path}{AutoSample}||"''";
    }
  }

  # rename existing Autosample file backups in old naming philosophy
  foreach $PathBakold (&ReadDir ($PathDir, -select=>'^Autosample .+\.\d{3}$', -expand=>1)) {
    printf "renaming Autosample backup %s\n", $PathBakold;
    rename $PathBakold, &PathUnique (-name=>$ProgParam{path}{AutoSample}.'.###');
  }
  if (-e $PathAutoSampleReapold) {
    if (-e $ProgParam{path}{AutoSample}) {
      printf "deleting Autosample file %s\n", $PathAutoSampleReapold;
      unlink $PathAutoSampleReapold;
    } else {
      printf "renaming existing Autosample file %s\n", $PathAutoSampleReapold;
      rename $PathAutoSampleReapold, &PathUnique (-name=>$ProgParam{path}{AutoSample}.'.###');
    }
  }
  # rename existing Autosample file
  if (-e $ProgParam{path}{AutoSample}) {
    printf "renaming existing Autosample file %s\n", $ProgParam{path}{AutoSample};
    rename $ProgParam{path}{AutoSample}, &PathUnique (-name=>$ProgParam{path}{AutoSample}.'.###');
    return 1;
  }

  # nothing to do
  return 0;
}


# return list of Sample names
#
# INTERFACE
# - argument 1: path of target folder
#
# - global options:
#   -debug      [STD]
#
# - return val: array of names for Sample files
#               see DESCRIPTION for index numbering
#
# - global data:
#   $ProgParam{path}{AutoSample}  path of the AutoSample file
#                                 That's set by &SampleDirAsampleRead
#   $ProgParam{store}{sample}     list of Sample names
#                                 see DESCRIPTION for index numbering
#   $ProgParam{store}{curr}{SampleDir}
#
# DESCRIPTION
# - first, the functions tries to get current Sample names from an AutoSample
#   file. If it doesn't exist, names are read from the Sample files.
# - array entry number (starting at 0) of Sample name corresponds to Sample
#   number. Array entry number 0 should always be undefined.
#
sub SampleDirNameTab {
  my ($PathDir, %opt) = @_;
  my ($debug);
  my (@name);

  # function parameters
  $debug = $ProgOpt{-debug};
  unless (-d ($ProgParam{store}{curr}{SampleDir} = &PathExpand($PathDir))) {
    printf STDERR "ERROR: unable to find target folder %s\n", $PathDir||"''";
    exit 1;
  }

  # names in AutoSample file?
  if (@name = @{ &SampleDirAsampleRead($PathDir)||[] } ) {
    $debug and printf STDERR "%s. found %s Sample names in AutoSample file\n", &MySub, int @name;
    return @name;
  }

  # names from Sample files (niced)
  &SampleDirUnzip ($PathDir);
  $ProgParam{store}{sample} = [];
  $ProgParam{action}{SamplePure} = \&SampleStoreNice;
  &SampleIterDir ($PathDir);
  return @{$ProgParam{store}{sample}};
}


# return list of Sample names
#
# INTERFACE
# - argument 1: path of target folder
#
# - local/global options:
#   -debug      [STD]
#
# - global data:
#   $ProgParam{store}{sample}     is needed in order to get the number of
#                                     existing Samples. This list is updated
#                                     in respect of up-counting.
#                                     Remember: index [0] is a fake position!
#
# DESCRIPTION
# - Invoke this function if the Sample folder is assumed to contain a
#   Sample #0. All existing Samples will get up-counted.
#
sub SampleDirNameMove0 {
  my ($PathDir, %opt) = @_;
  my ($debug);
  my (@name, $CtI, $suffix);

  # function parameters
  $debug = $opt{-debug} || $ProgOpt{-debug};
  &SampleDirUnzip ($PathDir);

  # renumber sample files if needed
  # this has to be done if a sample file has number 0
  if (glob ("$PathDir/Sample 0*")) {
    $debug and printf STDERR "%s. need to renumber Sample files\n", &MySub;
    $debug and printf STDERR "  Sample folder: %s\n", $PathDir;
    for ($CtI=$#{$ProgParam{store}{sample}}; $CtI>=0; $CtI--) {
      foreach $suffix ('', 'gz', '.Seq', '.Seq.gz') {
        if (-e $PathDir ."/Sample $CtI$suffix") {
          $debug and printf STDERR "%s. renaming %s -> %s\n", &MySub,
            $PathDir ."/Sample $CtI$suffix", $PathDir . sprintf("/Sample %d$suffix", $CtI+1);
          rename $PathDir ."/Sample $CtI$suffix", $PathDir . sprintf("/Sample %d$suffix", $CtI+1);
        }
      }
    }
    unshift @{$ProgParam{store}{sample}}, undef;
  } else {
    printf STDERR "%s. ERROR: nothing to do here\n", &MySub;
  }
}


# return array of Sample names read from AutoSample file
#
# INTERFACE
# - argument 1: path of target folder
#
# - global options:
#   -debug      [STD]
#
# - return val: reference to Sample name list (from AutoSample file)
#               while the list is globally stored at $ProgParam{store}{sample}
#               see comment on index numbering
#
# - global data:
#   $ProgParam{path}{AutoSample}      path of the AutoSample file even if it doesn't exist.
#                                     It's derived from $PathDir => $ProgParam{store}{curr}{SampleDir}
#   $ProgParam{store}{curr}{SampleDir}  path of current Sample folder
#   $ProgParam{store}{sample}         list of Sample names
#                                     see comment on index numbering
#
# DESCRIPTION
# - this function may also be used just to get the proper name of the AutoSample
#   for any folder
# - Valid entries start at index number 1 in order to mirror Sample file
#   numbering. Array entry number 0 is undefined and should be left so.
#
sub SampleDirAsampleRead {
  my ($PathDir, %opt) = @_;
  my ($debug);
  my ($PathDirE, $PathAutoSampleReapold, $PathZip, $PathAsample, $pName);

  # function parameters
  # pre-work
  $debug = $ProgOpt{-debug};
  unless ($PathDirE = $ProgParam{store}{curr}{SampleDir} = &PathExpand($PathDir)) {
    printf STDERR "ERROR: unable to find target path %s\n", $PathDir||"''";
    exit 1;
  }

  # name of Autosample file
  # - current naming philosophy
  # - naming philosophy of old REAP versions (until 2.4B ?)
  $ProgParam{path}{AutoSample} = $PathDirE .'/Autosample';
  if ($PathDirE =~ m/Results([ \.\w]+)/) {
    $PathAutoSampleReapold = $PathDirE .'/Autosample'. $1;
  } else {
    printf STDERR "ERROR: no RegExp match on specified Sample folder %s (was %s)\n",
      $PathDirE, $_[0]||"''";
    exit 1;
  }

  # unzip existing Autosample file
  foreach $PathZip (&ReadDir ($PathDir, -select=>'^Autosample.*\.gz$', -expand=>1)) {
    if (system ("$CorePath{call}{gunzip} '$PathZip'")) {
      printf STDERR "ERROR: unable to unzip file %s\n", $PathZip||"''";
    } elsif ($debug) {
      printf STDERR "%s. unzipping existing Autosample file %s\n", &MySub,
        $ProgParam{path}{AutoSample}||"''";
    }
  }

  # existing Autosample file?
  foreach $PathAsample ($ProgParam{path}{AutoSample}, $PathAutoSampleReapold) {
    if (-e $PathAsample and
        $pName = &PlainToTable ($PathAsample, -TabType=>'A1') and @$pName) {
      unshift @$pName, undef;
      return $ProgParam{store}{sample} = [ @$pName ];
    }
  }

  # no Autosample file
  return $ProgParam{store}{sample} = [];
}


# prepare (fresh) AutoSample table from global data and store it in Sample folder
#
# INTERFACE
# - global options:
#   -debug      [STD]
#
# - global data:
#   $ProgParam{path}{AutoSample}      path of the AutoSample file even if it doesn't exist.
#                                     It was derived from $PathDir => $ProgParam{store}{curr}{SampleDir}
#   $ProgParam{store}{sample}         list of Sample names
#                                     Remember: index [0] is a fake position!
#
# DESCRIPTION
# - existing AutoSample files will be overwritten!
#   Call &SampleDirAsampleBackup to backup existing Autosample files.
#
sub SampleDirAsampleWrite {
  my ($debug);
  my ($succ, $LinkSrc, $LinkTgt);

  # function parameters
  $debug = $ProgOpt{-debug};

  # write Autosample file
  shift @{$ProgParam{store}{sample}};
  $succ = &WriteFile ($ProgParam{path}{AutoSample},
    join ('', map {"$_\n"} @{$ProgParam{store}{sample}} ));
  unshift @{$ProgParam{store}{sample}}, undef;
  $succ or printf STDERR "ERROR: unable to write Autosample file %s\n", $ProgParam{path}{AutoSample}||"''";
}


# return read name retrieved from Sample file
#
# INTERFACE
# - argument 1: path of target file
#
# - options:
#   -debug      [STD]
#
# - return val: - name from Sample file
#               - undef if an error occurs
#
sub SampleName {
  my ($PathSample, %opt) = @_;
  my ($debug);
  my ($SampleName);

  # function parameters
  $debug = $opt{-debug};

  # read sample file
  unless (open (INSAMPLE, "$CorePath{call}{SampleName} '$PathSample' |")) {
    $debug and printf STDERR "%s. ERROR: unable to get read name from file %s\n", &MySub, $PathSample||"''";
    return undef;
  }
  $SampleName = &ReadFile (\*INSAMPLE);
  chop $SampleName;
  $debug and printf STDERR "%s. read name %s in file %s\n", &MySub, $SampleName||"''", $PathSample||"''";

  # return
  return $SampleName;
}


# report read name in raw data sample file
#
# INTERFACE
# - argument 1: path of target file
#
sub SampleReport {
  my ($PathSample, %opt) = @_;
  my ($SampleName);

  # read sample file
  unless ($SampleName = &SampleName ($PathSample)) {
    printf STDERR "%s. ERROR: unable to get read name from file %s\n", &MySub, $PathSample||"''";
    return;
  }

  # do work: report
  printf "file %s, read name %s\n", $PathSample||"''", $SampleName||"''";
}


# enter name of sample file (niced) to global list
#
# INTERFACE
# - argument 1: path of target file
#
# - options:
#   -debug      [STD]
#
# - global data:
#   $ProgParam{store}{sample}     list of Sample names
#                                 Remember: index [0] is a fake position!
#
sub SampleStoreNice {
  my ($PathSample, %opt) = @_;
  my ($debug);
  my ($SampleNmb, $SampleName, $CtChg);

  # function parameters
  $debug = $opt{-debug};

  # sample number
  if ($PathSample =~ m/\bSample (\d+)\b/i) {
    $SampleNmb = $1;
    $debug and printf STDERR "%s. checking Sample file no. %d\n", &MySub, $SampleNmb;
  } else {
    printf STDERR "%s. didn't get number of sample, file %s\n", &MySub, $PathSample||"''";
    return;
  }

  # read read name from sample file
  unless ($SampleName = &SampleName ($PathSample)) {
    printf STDERR "%s. ERROR: unable to get read name from file %s\n", &MySub, $PathSample||"''";
    return;
  }

  # nice read name, store it
  $CtChg = &NiceName (\$SampleName);
  $debug and printf STDERR "%s. made %d changing%s to read ID of sample file %s\n", &MySub,
    $CtChg, ($CtChg==1)?'':'s', $PathSample||"''";
  $ProgParam{store}{sample}[$SampleNmb] = $SampleName;
}


################################################################################
# raw data actions
################################################################################


# create Autosample file
#
# INTERFACE
# - argument 1+: path of target folder
#
sub ProgAutosample {
  my ($arg);

  $ProgParam{action}{SampleZip} =
  $ProgParam{action}{SamplePure} = \&SampleStoreNice;

  foreach $arg (@ProgArg) {

    # action on folder
    if (-d $arg) {
      &SampleDirUnzip ($arg);
      # WARNING:
      # currently, we don't look up any Autosample information that may already exist
      $ProgParam{store}{sample} = [];
      &SampleIterDir ($arg);
      $ProgParam{store}{sample}[0] and &SampleDirNameMove0 ($arg);
      &SampleDirAsampleBackup ($arg);
      &SampleDirAsampleWrite();
    }

    # not a folder
    else {
      printf STDERR "ERROR: folder '%s' not found\n", $arg;
    }
  }
}


# rename output of ABI 3700 machine
#
# INTERFACE
# - argument 1: path of target folder
#
# DESCRIPTION
# - some logging is done to STDOUT
# - the ABI files are expected to have names like:
#   PlateA_B08_JC3b02r_061.ab1
#   plate (on machine)
#          coordinate [not assumed to be any letter case]
#              sample name/plate naming scheme
#                      capillary
#                          std suffix
# - two philosophies:
#   1. all samples have the same name, i.e. the plate name including primer
#      and chemistry specifications. The final sample names are derived from
#      the plate name and the plate coordinate.
#   2. all samples have a unique name, i.e. the final sample name.
#
sub ProgRename3700 {
  my ($PathDir) = @_;
  my ($debug);
  my ($PathDirName, $PathDirNew, $PathIt, $PathSeq);
  my ($mach, $b384, $bDel);
  my (@ABI, $CtAbi, $pAbi, %NameIdx, $NameGlob);
  my ($plate, $prm, $num, $chem);
  my ($mplate, @coo, @capl);
  my ($hOut);

  # function parameters
  $debug = $ProgSwitch{-debug};
  $PathDir =~ s|/*$||;
  printf "name of folder is %s in %s\n",
    &PathSplit($PathDir)->{name}||"''", &PathSplit($PathDir)->{dir};
  $ProgParam{store}{sample} = [];

  # ensure existence of Sample folder
  unless (-d $PathDir) {
    printf STDERR "ERROR: ABI folder %s doesn't exist\n", $PathDir||"''";
    return;
  }

  ##############################################################################
  # analyse files in 3700 folder
  # - sample read files/names

  # get read names from sample files in directory
  foreach $PathIt (glob ("$PathDir/*.ab1")) {
    my ($fname, $coo, $capl, $name);
    $fname = &PathSplit($PathIt)->{name};

    # get information fields from Sample filename
    # - separated by '_': coordinates, naming scheme phrase, capillary number
    # *** bsenf : 25.01.05: new naming scheme ***
    if ($fname =~ m/^([a-zA-Z0-9\-\.]+)_([A-PS][0-9]{2})_([a-zA-Z0-9\.-]+)_([0-9]{3})\.ab1$/) {
      $mplate = $1;
      $coo = $2;
      $capl = int $4;
      $name = $3;
      if (! $b384 and (substr($coo,0,1)=~m/[I-P]/i or int(substr($coo,1,2))>12)) {
        printf STDERR "folder is derived from 384-well plate\n";
        $b384 = 1;
      }
    } else {
      printf STDERR "ERROR: ABI filename %s doesn't match to naming scheme (step 1)\n", $fname||"''";
      printf STDERR "  folder not touched\n";
      return;
    }

    # empty or faked sample name
    if ($name =~ m/^(empty|fake|leer|none)$/i) {
      unless ($bDel) {
        printf STDERR "WARNING: some ABI files contain empty or faked read phrases (e.g. %s)\n", $fname||"''";
        printf STDERR "  Delete them and continue? ";
        $bDel = &QueryConfirm() or return;
      }
      unlink "$PathDir/$fname";
    }

    # enter sample information into array
    # - formerly, the sample number reflected the capillary number. But, as
    #   this does not work with the 3730 (48 capillary) machines where the
    #   capillary number is not unique.
    else {
      push @ABI, {
        coo   => $coo,
        capl  => $capl,
        name  => $name,
        fname => $fname,
        };
      $debug and printf STDERR "%s. entered sample: name %s, coord. %s, capillary %d\n", &MySub,
        $name, $coo, $capl;
    }
  }

  ##############################################################################
  # naming philosophy and according actions

  # determine naming philosophy
  %NameIdx = ();
  map { $NameIdx{$_->{name}} ++; } grep { $_ and %$_ and $_->{name} } @ABI;

  # naming philosophy 1
  # One single naming phrase. Read IDs are generated by iteration over plate
  # coordinates
  if (int (keys %NameIdx) == 1) {
    $debug and printf STDERR "%s. philosophy 1\n", &MySub;

    # get information fields from naming scheme phrase:
    # - plate
    # - primer letter, possibly preceded by dot
    # - read number (associated with primer letter) - optional
    # - chemistry - optional
    $NameGlob = (%NameIdx)[0];
    if ($NameGlob =~ m/^(.*?)(\.?([prstw])((\d)([a-z])?)?)?$/) {
      $plate = $1;
      $prm = $3 || 's';
      $num = $5 || 1;
      $chem = $6;
    } else {
      printf STDERR "ERROR: name template %s doesn't match to naming scheme (step 2): %s\n",
        $NameGlob||"''";
      printf STDERR "  folder not touched\n";
      return;
    }

    # derive read names from plate / coordinate
    # *** bsenf : 25.01.05: new naming scheme ***
    foreach $pAbi (grep { $_ and %$_ and $_->{name} } @ABI) {
      # A-B-C-D-E
      if ( $$pAbi{name} =~ m/([\w]+)-([\w]+)-([\w]+)-([\w]+)-([\w]+)/) {
        $$pAbi{name} = sprintf ("%s-%s-%s-%s%s.%s",$1,$2,$3,$4,lc($$pAbi{coo}),$5);
      } 
      # A-B-C-D.E
      elsif ( $$pAbi{name} =~ m/([\w]+)-([\w]+)-([\w]+)-([\w]+)\.([\w]+)/) {
        #
        #  do not change name
        #
      }
      else { 
        $$pAbi{name} = sprintf ("%s%s.%s%s%s",$plate,lc($$pAbi{coo}),$prm,$num,$chem);
      }
    }
  }

  # naming philosophy 2
  # Unique read IDs provided
  elsif (int(keys %NameIdx) == int(grep{ $_ and %$_ and $_->{name} }@ABI)) {
    $debug and printf STDERR "%s. philosophy 2\n", &MySub;
  }

  # unknown naming philosophy
  # something in between philosophy 1 and 2
  else {
    printf STDERR "%s. do not know which naming philosophy underlies\n", &MySub;
    printf STDERR "  no. of valid ABI files: %d\n", int (grep { $_ and %$_ and $_->{name} } @ABI);
    printf STDERR "  no. of unique names: %d\n", int (keys %NameIdx);
    printf STDERR "  duplicated names: %s\n", join (' ', grep { $NameIdx{$_} > 1 } keys %NameIdx);
    return;
  }

  ##############################################################################
  # rename files, write Autosample & coordinate index, rename folder

  # rename Sample files
  # create Autosample list, number according to primary name sampling
  foreach $pAbi (grep { $_ and %$_ and $_->{name} } @ABI) {
    $CtAbi++;

    # rename Sample file
    rename "$PathDir/$$pAbi{fname}", "$PathDir/Sample $CtAbi";
    # enter Sample name to global array (for Autosample file)
    $ProgParam{store}{sample}[$CtAbi] = $$pAbi{name};
    # enter capillary to array (for capillary index file)
    $capl[$CtAbi] = $$pAbi{capl};
    # enter coordinate to array (for coordinate index file)
    $coo[$CtAbi] = $$pAbi{coo};

    # delete "Sample *.seq" file, occasionally
    $PathSeq = &PathChgSuffix ($$pAbi{fname}, 'seq', -last=>1);
    if (-f "$PathDir/$PathSeq") {
      unlink "$PathDir/$PathSeq";
    }
  }

  # get information field from Sample directory: machine name
  $PathDirName = &PathSplit($PathDir)->{name};
  $PathDirName =~ m/^Run_(\w{2})/;
  $mach = $1 || 'xy';
  substr($mach,0,1) = uc (substr($mach,0,1));
  substr($mach,1,1) = lc (substr($mach,1,1));

  # rename Sample directory
  # do the job by: rename ABI directory
  if ($PathDirName =~ m/\d{2}(\d{2})-(\d{2})-(\d{2})_(\w+)/) {
    $PathDirNew = &PathSplit($PathDir)->{dir} .'/'. "Results $1.$2.$3 $mach$4";
    rename $PathDir, $PathDirNew;
    printf "renaming folder to %s\n", $PathDirNew||"''";
    $PathDir = $PathDirNew;
  } else {
    printf STDERR "ERROR: folder name %s doesn't match to naming scheme (step 2)\n", $PathDir||"''";
    return;
  }

  # save plate name to file
  $hOut = FileHandle->new("$PathDir/plate",'w');
  printf $hOut "%s\n", $mplate;
  # save Sample list to (new) AutoSample file
  if ($ProgParam{store}{sample} and @{$ProgParam{store}{sample}}) {
    &SampleDirAsampleBackup ($PathDir);
    &SampleDirAsampleWrite();
  }
  # save coordinate list to file
  $hOut = FileHandle->new("$PathDir/capillary",'w');
  shift @capl;
  print  $hOut map{ "$_\n" } @capl;
  $hOut = FileHandle->new("$PathDir/coordinate",'w');
  shift @coo;
  print  $hOut map{ "$_\n" } @coo;
}


# reorganise Baylor's Sample files
#
# INTERFACE
# - argument 1: path of source folder
#
# - global options:
#   -debug      [STD]
#   -InSuffix   file suffix which has to be removed to get the trace ID.
#               Default: '.scf'
#
# DESCRIPTION
# - Formerly, this applied to trace files (ABI) provided by Baylor College 2001-02-23
# - Now, this applies to trace files (SCF) provided by Baylor College 2001-04-19
# - This also works with trace files (SCF) provided by Sanger Centre 2001-07-15
#
sub ProgSampleImport {
  my ($SrcDir) = @_;
  my ($debug, $dbg2, $bFileChg, $SrcSuffix, $RegexpSuffix);
  my (%path, %ReapIdx);
  my ($ItTrace, $TraceName, $pNameField);
  my ($ItDir, @fofn, $CtSample, $ItSample);

  ##############################################################################
  # pre-work

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $bFileChg = (exists($ProgOpt{-var}{fchg}) and defined($ProgOpt{-var}{fchg})) ?
    $ProgOpt{-var}{fchg} : 1;
  if (-d ($SrcDir = &PathExpand($SrcDir))) {
    printf "%s. working on Baylor trace source folder %s\n", &MySub, $SrcDir||"''";
  } else {
    printf STDERR "ERROR: unable to find folder %s\n", $_[0]||"''";
    return;
  }
  $SrcSuffix = $ProgOpt{-InSuffix} || '.scf';
  $RegexpSuffix = $SrcSuffix ? &RegexpEncode($SrcSuffix).'$' : undef;
  $path{TgtRoot} = $CorePath{ReadWatch}{RawGroupDir};

  ##############################################################################
  # move SCF files to Sample data file structure

  # loop over source files
  foreach $ItTrace (&ReadDir ($SrcDir, -select=>$RegexpSuffix)) {
    $TraceName = $ItTrace;
    $TraceName =~ s/$RegexpSuffix//;

    # split name into fields
    # formerly inline: $ItTrace =~ m/^(\w{3})(\w)(\w{3})(\d{2,3})(\d{2})\./;
    #   ($$pNameField{tgt}, $$pNameField{lib0}, $$pNameField{AbiSfx}, $$pNameField{plt}, $$pNameField{coo}) = ($1, $2, $3, $4, $5);
    $pNameField = ReadidToFields ($TraceName, -clone=>0);
    if (! $pNameField or $$pNameField{stx} =~ m/minimal/) {
      printf STDERR "ERROR: regexp matching failed for trace %s, name %s\n",
        $ItTrace, $TraceName;
      next;
    }
    $dbg2 and printf STDERR "%s. determined naming syntax %s for trace %s, name %s\n", &MySub,
      $$pNameField{stx}, $ItTrace, $TraceName;

    # derive gel directory, confirm existence
    if ($$pNameField{stx} eq 'Baylor') {
      $$pNameField{AbiSfx} = $$pNameField{chm} . $$pNameField{num} . $$pNameField{prm};
    } else {
      $$pNameField{AbiSfx} = '';
    }
    if ($$pNameField{tgt} =~ m/II[BD]/) {
      $path{TgtDir} = "$path{TgtRoot}/$$pNameField{tgt}$$pNameField{lib0}";
      $path{TgtDirAbiGlob} = $path{TgtDir} .'/'
        . "Results*$$pNameField{lib0}$$pNameField{plt}$$pNameField{AbiSfx}";
    } else {
      $path{TgtDir} = "$path{TgtRoot}/$$pNameField{tgt}";
      $path{TgtDirAbiGlob} = $path{TgtDir} .'/'
        . "Results*$$pNameField{lib0}$$pNameField{plt}$$pNameField{AbiSfx}";
    }
    $path{TgtDirAbiTpl} = $path{TgtDirAbiGlob}; $path{TgtDirAbiTpl} =~ s/\*/ %s /;
    if ($dbg2) {
      printf STDERR "%s. derived name fields for read %s\n", &MySub, $ItTrace;
      printf STDERR "  full name: %s\n", $$pNameField{full}||"''";
      printf STDERR "  project: %s\n", $$pNameField{tgt}||"''";
      printf STDERR "  plate:   %s\n", $$pNameField{plt}||"''";
      printf STDERR "  coords:  %s\n", $$pNameField{coo}||"''";
      printf STDERR "%s. derived target path information\n", &MySub;
      printf STDERR "  ABI folder suffix: %s\n", $$pNameField{AbiSfx}||"''";
      printf STDERR "  target base folder: %s %s\n",
        $path{TgtDir}||"''", (-d $path{TgtDir})?'exists':'does not exist';
      printf STDERR "  target ABI folder glob: %s %s\n",
        $path{TgtDirAbiGlob}||"''", (int(glob($path{TgtDirAbiGlob}))) ? 'matches':'does not match';
      printf STDERR "  target ABI folder new (sprintf template): %s\n", $path{TgtDirAbiTpl}||"''";
    }

    # derive target directory, confirm existence
    unless (-d "$path{TgtRoot}/$$pNameField{tgt}") {
      mkdir ("$path{TgtRoot}/$$pNameField{tgt}");
    }

    # load REAP index
    unless ($ReapIdx{$$pNameField{tgt}}) {
      $path{TgtIndexFile} = $path{TgtDir} .'/'. $CorePath{GSCJ}{RawReapSub} .'/'
        . "$$pNameField{tgt}$CorePath{GSCJ}{RawReapList}";
      $debug and printf STDERR "%s. reading REAP index file %s\n", &MySub,
        $path{TgtIndexFile};
      unless (defined ($ReapIdx{$$pNameField{tgt}}
      = &PlainToTable ($path{TgtIndexFile}, -TabType=>'HIA', -ColIdx=>'2', -delimit=>'SpaceRet', -debug=>$dbg2))) {
        printf STDERR "WARNING: no REAP index found for target %s\n", $$pNameField{tgt}||"''";
        $ReapIdx{$$pNameField{tgt}} = {};
      }
    }
    # read in REAP index? => skip
    if ($ReapIdx{$$pNameField{tgt}}{$$pNameField{full}}) {
      $debug and printf STDERR "%s. found read %s in REAP index - skipped\n", &MySub,
        $$pNameField{'$pNameField'}||"''";
      next;
    }

    ############################################################################
    # locate/create ABI folder, copy trace files to ABI folder

    # find existing gel directory
    if (($path{TgtDirAbi},) = grep { -d $_ } glob($path{TgtDirAbiGlob})) {
      $debug and printf STDERR "%s. found existing target directory: %s\n", &MySub,
        $path{TgtDirAbi};
      $path{TgtDirAbi} =~ m/ (\d{2}.\d{2}.\d{2}) (\w+)/;
      $$pNameField{date} = $1;

      # Sample files exist, rename them back to SCF files
      # glob() instead of &ReadDir() doesn't work here. Why?
      if (int (&ReadDir ($path{TgtDirAbi}, -select=>'^Sample'))) {
        printf STDERR "%s. WARNING: target Sample folder contains %d Sample files!\n", &MySub,
          int (&ReadDir ($path{TgtDirAbi}, -select=>'^Sample'));

        # unzip complete folder, load AutoSample file
        &SampleDirUnzip ($path{TgtDirAbi});
        if (@fofn = @{ &SampleDirAsampleRead($path{TgtDirAbi})||[] }) {
          printf STDERR "%s. target Sample folder contains Autsample file with %d Sample names!\n", &MySub,
            int (grep { $_ } @fofn);

          # rename Sample files
          # existing unconverted source files (test suffix) have priority!
          if ($bFileChg) {
            for ($CtSample=0; $CtSample<@fofn; $CtSample++) {
              unless ($ItSample = $fofn[$CtSample]) { next }
              if (-e "$path{TgtDirAbi}/$ItSample.scf") {
                $debug and printf STDERR "%s. new SCF file gets priority: %s\n", &MySub,
                  "$path{TgtDirAbi}/$ItSample.scf";
                unlink ("$path{TgtDirAbi}/Sample $CtSample", "$path{TgtDirAbi}/Sample $CtSample.Seq");
              } else {
                printf STDERR "%s. renaming Sample %d\n", &MySub, $CtSample;
                rename ("$path{TgtDirAbi}/Sample $CtSample", "$path{TgtDirAbi}/$ItSample$SrcSuffix");
                unlink ("$path{TgtDirAbi}/Sample $CtSample.Seq");
              }
            }
          }
        }

        # AutoSample file error
        else {
          printf STDERR "%s. ERROR: empty AutoSample file - any old Sample information will be lost now\n", &MySub;
        }
      }
    }

    # create new gel directory
    else {
      $$pNameField{date} = &TimeStr (-time=>&ftime ("$SrcDir/$ItTrace"), -format=>'%y.%m.%d');
      $path{TgtDirAbi} = sprintf ($path{TgtDirAbiTpl}, $$pNameField{date});
      $debug and printf STDERR "%s. creating target directory: %s\n", &MySub,
        $path{TgtDirAbi};
      if ($bFileChg) {
        mkdir ($path{TgtDirAbi});
        $path{TgtDirTouched}{$path{TgtDirAbi}} = 1;
      }
    }

    # copy SCF files to Sample file structure
    if ($bFileChg) {
      if (-e "$path{TgtDirAbi}/$ItTrace") {
        if (-s("$path{TgtDirAbi}/$ItTrace") == -s("$path{TgtDirAbi}/$ItTrace")) {
          printf STDERR "%s. WARNING: target file %s exists, same size as source file\n", &MySub,
            "$path{TgtDirAbi}/$ItTrace";
          unlink "$path{TgtDirAbi}/$ItTrace";
          &FileCopy ("$SrcDir/$ItTrace", "$path{TgtDirAbi}/$ItTrace");
          $path{TgtDirTouched}{$path{TgtDirAbi}} = 1;
        } else {
          printf STDERR "%s. WARNING: target file %s exists and is different from source file - skipped\n", &MySub,
            "$path{TgtDirAbi}/$ItTrace";
        }
      } else {
        &FileCopy ("$SrcDir/$ItTrace", "$path{TgtDirAbi}/$ItTrace");
        $path{TgtDirTouched}{$path{TgtDirAbi}} = 1;
      }
    }
  }

  ##############################################################################
  # refine Sample data file structure

  # do post-work on Sample folders
  $debug and printf STDERR "%s. %d directories touched by recent action\n", &MySub,
    int keys %{$path{TgtDirTouched}};
  if ($bFileChg) {
    foreach $ItDir (sort keys %{$path{TgtDirTouched}}) {

      # rename SCF files to Sample files, create AutoSample file
      $debug and printf STDERR "%s. renaming source traces to Sample files in gel folder %s, %d files\n", &MySub,
        $ItDir, int (grep { $_ !~ m/sample/i } &ReadDir ($ItDir, -select=>$RegexpSuffix||undef));
      $CtSample = 0; @fofn = ();
      foreach (sort grep { $_ !~ m/sample/i } &ReadDir ($ItDir, -select=>$RegexpSuffix||undef)) {
        $ItTrace = $_;
        $CtSample ++;
        rename ("$ItDir/$_", "$ItDir/Sample $CtSample");
        $ItTrace =~ s/$RegexpSuffix//;
        push @fofn, $ItTrace;
      }

      # write Autosample file
      $ProgParam{store}{sample} = [ undef, @fofn ];
      &SampleDirAsampleBackup ($ItDir);
      &SampleDirAsampleWrite();
    }
  }
}


# sort Sample files in Sample folder into appropriate target folders
#
# INTERFACE
# - argument 1: path of source folder
#
# - global options:
#   -debug      [STD]
#
# - global data:
#   $ProgParam{path}{AutoSample}  path of AutoSample file
#   $ProgParam{store}{sample}     list of Sample names (AutoSample list)
#                                 see &SampleDirAsampleRead DESCRIPTION for index numbering
#
sub ProgSampleSort {
  my @TgtPossib = (
    { regexp => '^13364',
      group  => 'Dictyostelium',
      tgt    => 'DY3850',
      },
    { regexp => '^ePCR',
      group  => 'Dictyostelium',
      tgt    => 'transpos',
      },
    { regexp => '^H3',
      group  => 'Dictyostelium',
      tgt    => 'transpos',
      },
    { regexp => '^JAX4',
      group  => 'Dictyostelium',
      tgt    => 'AX4',
      },
    { regexp => '^JC1',
      group  => 'Dictyostelium',
      tgt    => 'Chr1',
      },
    { regexp => '^JPCR([^a]|a[^2]|a2[^0-3])',
      group  => 'Dictyostelium',
      tgt    => 'PCR',
      },
    { regexp => '^JTR',
      group  => 'Dictyostelium',
      tgt    => 'transpos',
      },
    { regexp => '^My',
      group  => 'ribo',
      tgt    => 'Agaric',
      },
    { regexp => '^R2',
      group  => 'ribo',
      tgt    => 'R2',
      },
    );
  my ($PathSrc) = @_;
  my ($debug);
  my (%TgtOrig, @sample, $ItSample, $ItTgt);
  my ($PathCurr);

  # function parameters
  $debug = $ProgOpt{-debug};

  ##############################################################################
  # pre-work: analyse path argument, read/construct list of samples
  #
  # list of samples is array of hashes containg fields
  # id    Sample name
  # nmb   Sample number, referring to Sample filename
  # tgt   Sample target, reference to data structure (hash)
  {
  
    # analyse path argument = raw machine output (ABI folder)
    $PathSrc =~ s|/*$||;
    unless (-d $PathSrc) {
      printf STDERR "ERROR: source folder '%s' doesn't exist\n", $PathSrc;
      exit 1;
    }
    my @tmp = split (/\//, &PathExpand($PathSrc));
    @TgtOrig{'group','tgt','machraw'} = @tmp[-3,-2,-1];
    $TgtOrig{mach} = $TgtOrig{machraw};
    $TgtOrig{mach} =~ s/^Results //;
    $TgtOrig{mach} =~ s/ /_/;
    $debug and printf STDERR "%s. original target project: .../%s/%s, machine: %s\n", &MySub,
      @TgtOrig{'group','tgt','mach'};
    unless ($TgtOrig{machraw}) { exit 1 }
    @TgtPossib = grep { $_->{group} ne $TgtOrig{group} or
                        $_->{tgt} ne $TgtOrig{tgt} } @TgtPossib;

    # get Sample names
    # 1st AutoSample file
    # 2nd Sample files
    my $CtSample = 0;
    @sample = grep { $_->{id} ne '' and $_->{id}!~m/^(fake|missing_sample_file)$/ }
      map { { id=>$_, nmb=>$CtSample++ } } &SampleDirNameTab($PathSrc);
    unless (@sample) {
      printf STDERR "ERROR: unable to get list of Sample names\n";
      exit 1;
    }
    printf "%d Sample%s in source folder '%s'\n",
      int(@sample), (@sample==1)?'':'s', &PathPhysical($PathSrc);
    if ($debug) {
      printf STDERR "%s. Sample names:\n", &MySub;
      print  STDERR join ('', map { sprintf("  %3d  %s\n",$_->{nmb},$_->{id}) } @sample);
    }
  }

  ##############################################################################
  # sort Samples into target folders
  {

    # loop over possible targets
    # sort Sample files into targets
    foreach $ItTgt (@TgtPossib) {
      map { $_->{id}=~m/$$ItTgt{regexp}/ and $_->{tgt}=$ItTgt } @sample;
    }
    map { $_->{tgt} ||= \%TgtOrig } @sample;
    if (my @TgtMiss = grep { $_->{tgt} eq \%TgtOrig } @sample) {
      printf STDERR "%d Sample%s remain in source folder\n",
        int(@TgtMiss), (@TgtMiss==1)?'':'s';
      print  STDERR join ('', map { "$_->{id}\n" } @TgtMiss);
    }
    if ($debug) {
      printf STDERR "%s. found targets for Samples:\n", &MySub;
      print  STDERR join ('', map { "  $_->{nmb}\t$_->{id}\t$_->{tgt}{group}/$_->{tgt}{tgt}\n" }
                              grep { $_->{tgt} } @sample);
    }
  }

  ##############################################################################
  # move Samples
  { my ($PathTgt, %PathHad, @SampleCurr, $pASampleTgt);

    # loop over target folders to move Samples
    my %tmp = map { ($_->{tgt} => $_->{tgt} ) } @sample;
    my @TgtTouched = values %tmp;
    foreach $ItTgt (@TgtTouched) {
      $debug and printf STDERR "%s. iterating over target: .../%s/%s\n", &MySub,
        @{$ItTgt}{'group','tgt'};
      $PathTgt = "$CorePath{GSCJ}{RawHome}/$$ItTgt{group}/$$ItTgt{tgt}/$TgtOrig{machraw}";

      # pre-work for target folder
      # - check for existing target (if not source directory) => backup or remove
      # - create target, go on
      if ($ItTgt ne \%TgtOrig) {
        if (-e $PathTgt and ! $PathHad{$PathTgt}) {
          printf STDERR "WARNING: target folder '%s' already exists - target skipped\n", $PathTgt;
          print  STDERR "  either delete/renew or\n  backup this folder? (enter full path) ";
          my $PathNew;
          chop ($PathNew=<STDIN>);
          if ($PathNew) {
            if (-e $PathNew or ! -w $PathNew) {
              printf STDERR "WARNING: backup path '%s' exists, using default '%s'\n",
                $PathTgt, ($PathNew.='.bak');
            }
            mv ($PathTgt, $PathNew);
          } else {
            system "$CorePath{call}{rm} -fR '$PathTgt'";
          }
        }
        mkdir ($PathTgt);
      }
      $PathHad{$PathTgt} = 1;

      # collect samples for current target, write AutoSample file
      @SampleCurr = grep { $_->{tgt} eq $ItTgt } @sample;
      $pASampleTgt = &SampleDirAsampleRead ($PathTgt);
        # sets name of the AutoSample file globally
      map { $$pASampleTgt[$_->{nmb}]=$_->{id} } @SampleCurr;
      $debug and printf STDERR "%s. writing AutoSample file: '%s'\n", &MySub,
        $ProgParam{path}{AutoSample};
      &SampleDirAsampleWrite();

      # report and pre-work for target folder
      # - check for being source directory => nothing left to do
      # - report moving of files
      if ($ItTgt eq \%TgtOrig) {
        printf "leaving %d Sample%s in source folder '%s'\n",
          int(@SampleCurr), (@SampleCurr==1)?'':'s', $PathTgt;
        next;
      } else {
        printf "moving %d Sample%s to target folder '%s'\n",
          int(@SampleCurr), (@SampleCurr==1)?'':'s', $PathTgt;
      }

      # move files
      foreach $ItSample (grep { $_->{tgt} eq $ItTgt } @sample) {
        foreach (&ReadDir ($PathSrc, -select=>"^Sample $$ItSample{nmb}\\b", -expand=>1)) {
          $debug and printf STDERR "%s. moving file %s\n", &MySub, $_||"''";
          &mv ($_, $PathTgt);
          unlink $_;
        }
      }
    }
  }

  ##############################################################################
  # delete source folder, Experiment folder, index entries

  # delete source folder if there're no Samples left
  unless (&ReadDir ($PathSrc, -select=>'^Sample')) {
    print  "removing source folder\n";
    # 'rm -fR' is fast and effective
    system "$CorePath{call}{rm} -fR '$PathSrc'";
    if (-e $PathSrc) {
      print  STDERR "ERROR: failed to remove source folder\n";
    }
  }

  # delete Experiment folder corresponding to source folder
  $PathCurr = "$CorePath{GSCJ}{ExperHome}/$TgtOrig{group}/$TgtOrig{tgt}/$TgtOrig{mach}";
  print  "removing Experiment folder $PathCurr";
  if (-e $PathCurr) {
    # 'rm -fR' is fast and effective
    system "$CorePath{call}{rm} -fR $PathCurr";
    if (-e $PathCurr) { print  " - failed\n"; }
    else              { print  " - OK\n"; }
  }
  else                { print  " - not found\n"; }

  # delete REAP index entries
  $PathCurr = $CorePath{GSCJ}{RawHome} .'/'. $TgtOrig{group} .'/'. $TgtOrig{tgt} .'/'.
    $CorePath{GSCJ}{RawReapSub} .'/'. $TgtOrig{tgt} . $CorePath{GSCJ}{RawReapList};
  if (-e $PathCurr) {
    system "$CorePath{call}{grep} -v $TgtOrig{mach} $PathCurr > $PathCurr.new";
    printf "removing %d entries from index %s\n", &wc_l ($PathCurr) - &wc_l ("$PathCurr.new"), $PathCurr;
    mv ("$PathCurr.new", $PathCurr);
  } else {
    printf STDERR "WARNING: there's no index file %s\n", $PathCurr;
  }
}


# produce REAP call *.ini files for list of ABI folders
#
# INTERFACE
# - argument 1*: paths of Sample folders (ABI folders)
#
# - global options:
#   -debug      [STD]
#
# DESCRIPTION
# - this function depends on a personal .reap.rc file
#
sub ProgReap {
  my $ShellWordMax = 1000;
  my (@SrcDir) = @_;
  my ($debug, $dbg2, $OutStump);
  my ($ItSrc, $SrcPath, %SrcIdx, %SrcTree);
  my ($ItGrp, $ItPrj, $StrFold);
  my (@ReapTpl, $ReapTplIns, $ReapIni, @ReapLst, $ReapLog, $pCall);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $OutStump = $ProgOpt{-OutStump} || "ReapCall${$}-";
  $OutStump =~ m|/| or $OutStump = './'. $OutStump;

  # loop over arguments (ABI folders)
  foreach $ItSrc (@SrcDir) {
    $SrcPath = &PathExpand ($ItSrc);
    unless (-d $SrcPath) {
      printf STDERR "ERROR: folder %s doesn't exist\n", $SrcPath;
      next;
    }

    # split name into fields
    # '/' characters have been reduced to minimal number (&PathExpand)
    unless ($SrcPath =~ m|^/gen/\w+/raw/(\w+)/(\w+)/Results ([^/]+)/*$|) {
      printf STDERR "ERROR: regexp matching failed for entry %s\n", $SrcPath;
      next;
    }
    $ItGrp = $1;
    $ItPrj = $2;
    $StrFold = $3;

    # check validity of ABI folder name
    if ($StrFold !~ m|^(\d{2}\.\d{2}\.\d{2} [\w.-]+)$|) {
      printf STDERR "ERROR: irregular ABI folder name: %s\n", $StrFold;
      next;
    }

    # enter ABI folder, take each folder once only
    $SrcIdx{$ItGrp}{$ItPrj} ||= {};
    $SrcIdx{$ItGrp}{$ItPrj}{$StrFold} and next;
    $SrcIdx{$ItGrp}{$ItPrj}{$StrFold} = 1;
    $SrcTree{$ItGrp}{$ItPrj} ||= [];
    push @{$SrcTree{$ItGrp}{$ItPrj}}, $StrFold;
  }

  # loop over group/project combinations
  foreach $ItGrp (keys %SrcTree) {
    foreach $ItPrj (keys %{$SrcTree{$ItGrp}}) {
      while (@{$SrcTree{$ItGrp}{$ItPrj}}) {

        # read REAPegn *.ini file template
        @ReapTpl = grep { $_ !~ m/set +group=/ and $_ !~ m/set +project=/ and $_ !~ m/set +resultlist=/ }
          map {m/(.+)\s*\n$/} &ReadFile(&PathExpand('~/.reap.rc'));

        # enter variable parameters: group, project, folders
        push @ReapTpl, "set group=$ItGrp";
        push @ReapTpl, "set project=$ItPrj";
        $ReapTplIns = '';
        while (@{$SrcTree{$ItGrp}{$ItPrj}} and
          12 + length($ReapTplIns) + 2 + length($SrcTree{$ItGrp}{$ItPrj}[0]) < $ShellWordMax) {
          $ReapTplIns .= ($ReapTplIns ? ' ;':'') . shift (@{$SrcTree{$ItGrp}{$ItPrj}});
        }
        unless (length $ReapTplIns) { last }
        push @ReapTpl, "set resultlist=(\"$ReapTplIns\")";

        # write parameter file
        $ReapIni = &PathUnique (-name=>"$OutStump##.ini");
        &WriteFile ($ReapIni, join ('', map { "$_\n" } @ReapTpl));
        printf "$ReapIni\n";
        push @ReapLst, $ReapIni;
      }
    }
  }

  # call REAP program iteratively
  foreach $ReapIni (@ReapLst) {
    $ReapLog = $ReapIni;
    $ReapLog =~ s/\.ini$/.log/;
    $pCall = &CallExtClosure ($CorePath{call}{ReapEgn},
      $ReapIni, "> $ReapLog", '2>&1');
    unless (&$pCall()) {
      printf STDERR "ERROR: REAP call failed:\n  call: %s\n", &$pCall ('WhatRUCalling');
    }
    printf "$ReapLog\n";
  }
}


################################################################################
# experimental data elementaries
################################################################################


# full recursion on Experiment folder structure
#
# INTERFACE
# - argument 1: path of source folder
#
# - options:
#   -debug      [STD]
#   -recursive  do full file structure recursion on all subfolders
#               This function performs recursion on the whole Experiment file
#               tree and looks for defined subfolders which are recognized by
#               their naming scheme (B.Drescher's convention).
#               In order to avoid endless loops you may give a switch
#               value of <1. Then, the first occurence of project folder
#               file structure schemes will block further recursion.
#
#   additional options may be handed over to subfolder action routines
#
# - global data:
#   $ProgParam{action}                     for standard Experiment folder actions and options see &ExperIterDir
#   $ProgParam{action}{ExperBlast}
#   $ProgParam{action}{ExperDb}
#   $ProgParam{action}{ExperDir}
#   $ProgParam{action}{ExperProj}
#   $ProgParam{action}{ExperProjroot}
#
# DESCRIPTION
# - code that's meant to work on the many different file/folder types
#   encountered has to be referenced in the global hash
#   $ProgParam{action}.
#
sub ExperIterat {
  my ($PathSrcDir, %opt) = @_;
  my ($debug, $dbg2);
  my ($CurrEntryName, $CurrEntryPath, $CurrEntryReapbase);
  my ($bProjRoot);

  # function parameters
  $debug = $opt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $PathSrcDir = &PathExpand ($PathSrcDir);
  $debug and printf STDERR "%s. processing directory %s\n", &MySub, $PathSrcDir||"''";

  # work through folder
  foreach $CurrEntryName (sort &ReadDir($PathSrcDir)) {
    $CurrEntryPath = "$PathSrcDir/$CurrEntryName";

    # actions on non-empty subdirectories
    if (-d $CurrEntryPath and int &ReadDir($CurrEntryPath)) {
      if (0) {
      } elsif ($CurrEntryName eq '.blast') {
        $bProjRoot ++;
        if ($ProgParam{action}{ExperBlast}) {
          &{$ProgParam{action}{ExperBlast}} ($CurrEntryPath, -debug=>$dbg2);
        }
      } elsif ($CurrEntryName eq '.db') {
        $bProjRoot ++;
        if ($ProgParam{action}{ExperDb}) {
          &{$ProgParam{action}{ExperDb}} ($CurrEntryPath, -debug=>$dbg2);
        }
      } elsif ($CurrEntryName eq '.project') {
        $bProjRoot ++;
        if ($ProgParam{action}{ExperProj}) {
          &{$ProgParam{action}{ExperProj}} ($CurrEntryPath, -debug=>$dbg2);
        }
      } elsif ($CurrEntryName =~ m|^(\d{2}\.\d{2}\.\d{2})_([\w.-]+)/*$|) {
        $CurrEntryReapbase = $1 . $2;
        $PathSrcDir=~m|([\w-]+)/*$| and $CurrEntryReapbase.=$1;
        $CurrEntryReapbase .= 'fn';
        unless (-e "$CurrEntryPath/experiment_files" or (
          -e "$CurrEntryPath/$CurrEntryReapbase" and
          -e "$CurrEntryPath/$CurrEntryReapbase.exp"
        )) {
          $debug and printf STDERR "%s. WARNING: Experiment folder $CurrEntryPath"
            . " contains neither CONVERGE nor REAP files, skipped\n", &MySub;
          next;
        }
        $bProjRoot ++;
        if ($ProgParam{action}{ExperDir}) {
          $dbg2 and printf STDERR "%s. inspecting Experiment folder $CurrEntryPath\n", &MySub;
          &{$ProgParam{action}{ExperDir}} ($CurrEntryPath, -debug=>$dbg2);
        }
      }

      # recursion?
      elsif ($opt{-recursive}>=1 or ($opt{-recursive} and !$bProjRoot)) {
        &ExperIterat ($CurrEntryPath, %opt);
      }
    }

    # actions on files?
    # => not yet
  }

  # action following recursion of project root folder
  if ($bProjRoot and $ProgParam{action}{ExperProjroot}) {
    &{$ProgParam{action}{ExperProjroot}} ($PathSrcDir, -debug=>$dbg2);
  }

  # exit SUB
}


# iteration step for Experiment folder
#
# INTERFACE
# - argument 1: path of Experiment folder
#
# - options:
#   -debug      [STD]
#
# - global options:
#   $ProgOpt{-SlcReap}                read REAP Experiment fofn to get list
#                                        Experiment entries, default: NONE (find files by suffix)
#
# - return val: success status (boolean)
#
# - global data:
#   $ProgParam{action}{ExperItem}{exper}
#   $ProgParam{action}{ExperItem}{qual}
#   $ProgParam{action}{ExperItem}{scf}
#   $ProgParam{exper}{ExperCateg}        available data categories in Experiment folder
#   $ProgParam{filter}{read}             filter for the Experiment entries
#   $ProgParam{store}{curr}{expern}      current Experiment item (read name)
#   $ProgParam{store}{curr}{experp}      current Experiment file path
#   $ProgParam{store}{curr}{foid}        current REAP fofn as an A1 table data structure
#                                        $ProgOpt{-SlcReap} takes effect here
#   $ProgParam{store}{curr}{ExperDir}    current Experiment folder
#   $ProgParam{store}{curr}{reap}        current REAP pathname stem (complete path)
#   $ProgParam{store}{treated}{*}        reference to array of treted Experiment file entries
#
# DESCRIPTION
# - this function represents the iteration node for items of an Experiment
#   folder (leaf in the Experiment file tree), especially iteration over the
#   different Experiment file categories.
# - for convention how to hand over code references see &ExperIterat.
# - there're two concepts to direct actions according to the REAP status of
#   reads:
#   selection      means that an action will only be performed for the
#                  specified REAP status. This is set via global switch
#                  $ProgOpt{-SlcReap}. Selection automatically
#                  includes registration for the specified status.
#   registration   means that
#                  1. the current fofn according to the REAP status will
#                     be loaded to $ProgParam{store}{table}{$status}
#                  2. treated files will be added to the list
#                     $ProgParam{store}{$FileType}{$status}
#                  This feature is set via function switch -ListReap and the
#                  switch value may be one of @{$ProgParam{exper}{ReapCateg}}
#                  in a comma delimited list.
# - a filename filter for the Experiment / SCF / quality files may be
#   referenced by $ProgParam{filter}{read}.
#
sub ExperIterDir {
  my ($PathDir, %opt) = @_;
  my ($debug, $dbg2, %ExperCateg);
  my ($BaseReap, $ReapSuffix);
  my ($ItCateg, @ListExper, $NameExper);

  # function constants
  %ExperCateg = %{$ProgParam{exper}{ExperCateg}};

  # function parameters
  $debug = $opt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $ProgParam{store}{curr}{ExperDir} = $PathDir = &PathExpand($PathDir);
  unless (-e $PathDir) { return undef }
  $debug and printf STDERR "%s. processing Experiment folder %s\n", &MySub, $PathDir||"''";

  # is CONVERGE Experiment folder, preferred
  if (-e "$PathDir/trace_files") {
    $ProgParam{store}{curr}{reap} = undef;

    # read CONVERGE fofn file
    $debug and printf STDERR "%s. loading CONVERGE fofn %s\n", &MySub, $_;
    $ProgParam{store}{curr}{foid} = &LoadFoid ("$PathDir/experiment_files");
  }

  # is REAP Experiment folder
  else {

    # split target argument to path fragments
    if ($PathDir =~ m|/([\w-]+)/+(\d{2}\.\d{2}\.\d{2})_([\w.-]+)/*$|) {
      $BaseReap = $2 . $3 . $1 . 'fn';
      $ProgParam{store}{curr}{reap} = $PathDir .'/'. $BaseReap;
    } else {
      $debug and printf STDERR "%s. ERROR: regexp match failure on Experiment folder %s\n", &MySub, $PathDir||"''";
      return undef;
    }

    # read REAP fofn file
    if ($ReapSuffix = $ProgOpt{-SlcReap}) {
      $_ = "$PathDir/$BaseReap.$ReapSuffix";
      $debug and printf STDERR "%s. loading REAP fofn %s\n", &MySub, $_;
      $ProgParam{store}{curr}{foid} = &LoadFoid ($_);
    }
  }

  # action on all types of Experiment files
  foreach $ItCateg (grep { ref($ProgParam{action}{ExperItem}{$_}) eq 'CODE' }
    keys %ExperCateg
  ) {

    # list of Experiment entries
    if ($ProgOpt{-SlcReap}) {
      @ListExper = @{$ProgParam{store}{curr}{foid}};
    } else {  # this is currently not in use!!!
      # no fofn: list of files of appropriate category
      foreach (&ReadDir ($PathDir,-select=>$ExperCateg{$ItCateg}{FileSlc},-debug=>$dbg2)) {
        push @ListExper, substr($_,0,-length($ExperCateg{$ItCateg}{FileSuffix}));
      }
    }

    # loop over possible target reads
    foreach $NameExper (@ListExper) {

      # again some selection
      # - filter according to filter fofn
      if ($ProgParam{filter}{read} and
          ! int (grep {$NameExper=~m/$_/} @{$ProgParam{filter}{read}})) { next }

      $ProgParam{store}{curr}{expern} = $NameExper;
      $ProgParam{store}{curr}{experp} = "$PathDir/$NameExper$ExperCateg{$ItCateg}{FileSuffix}";

      # action
      &{$ProgParam{action}{ExperItem}{$ItCateg}} ($ProgParam{store}{curr}{experp}, -debug=>$dbg2);
      push @{$ProgParam{store}{treated}{$ItCateg}}, $NameExper;
    }
  }

  # target folder done
  return 1;
}


# iteration step on Experiment folder for a single target read
#
# INTERFACE
# - argument 1: path of read (Experiment file)
#
# - options:
#   -debug      [STD]
#   -ListReap   see conditional action according to REAP status (registration
#               concept)
#
# - global options:
#   $ProgOpt{-SlcReap}                  act only on Experiment files that're
#                                          listed in these REAP fofns
# - return val: success status (boolean)
#
# - global data:
#   $ProgParam{action}{ExperItem}{exper}
#   $ProgParam{action}{ExperItem}{qual}
#   $ProgParam{action}{ExperItem}{scf}
#   $ProgParam{exper}{ExperCateg}          available data categories in Experiment folder
#   $ProgParam{filter}{read}
#   $ProgParam{store}{table}{$ReapSuffix}  store current REAP fofn as an A1 table data structure
#   $ProgParam{store}{treated}{*}          reference to array of treted Experiment file entries
#
# DESCRIPTION
# - this function does much the same job as &ExperIterDir, but it applies only
#   to one single Experiment item.
# - for handing over of code references see &ExperIterat.
#
sub ExperIterRead {
  my ($ArgExper, %opt) = @_;
  my ($debug, $dbg2, %ExperCateg, $reReapCateg);
  my ($PathDir, $NameExper);
  my ($BaseReap, @ReapCategList, $ReapSuffix, $ItCateg);

  # function constants
  %ExperCateg = %{$ProgParam{exper}{ExperCateg}};
  $reReapCateg = '^('. join('|',@{$ProgParam{exper}{ReapCateg}}) . ')$';

  # function parameters
  $debug = $opt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $ArgExper = &PathExpand ($ArgExper);
  unless (-e $ArgExper) { return undef }
  $debug and printf STDERR "%s. targeting at Experiment item %s\n", &MySub, $ArgExper;

  # split target argument to path fragments
  if ($ArgExper =~ m|/([^/]+)/(\d{2}\.\d{2}\.\d{2})_([\w.-]+)/|) {
    $PathDir = $` . $&;
    $NameExper = $';
    $BaseReap = $2 . $3 . $1 . 'fn';
  } else {
    $debug and printf STDERR "%s. ERROR: Experiment argument doesn't match to regexp\n", &MySub;
    return undef;
  }

  # read REAP files
  @ReapCategList = grep { -e "$PathDir/$BaseReap.$_" } grep { m/$reReapCateg/o }
    $ProgOpt{-SlcReap}, split(/,/,$opt{-ListReap});
  $debug and printf STDERR "%s. located %d REAP foid files\n", &MySub, int(@ReapCategList);
  foreach $ReapSuffix (@ReapCategList) {
    $debug and printf STDERR "%s. loading REAP file %s\n", &MySub, "$PathDir/$BaseReap.$ReapSuffix";
    $ProgParam{store}{table}{$ReapSuffix} = &ReadFile ("$PathDir/$BaseReap.$ReapSuffix");
  }

  # filter steps
  if (int(@ReapCategList) and $ReapSuffix=$ProgOpt{-SlcReap} and
      index($ProgParam{store}{table}{$ReapSuffix},$NameExper) < 0
  ) {
    $debug and printf STDERR "%s. skipping Experiment according to -SlcReap\n", &MySub;
    push @{$ProgParam{store}{read}{$ReapSuffix}}, $NameExper;
    return 1;
  }
  # $ProgParam{filter}{read} is not applied here!

  # action on all types of Experiment files
  foreach $ItCateg (grep { $ExperCateg{$_} } keys %{$ProgParam{action}{ExperItem}}) {
    &{$ProgParam{action}{ExperItem}{$ItCateg}} ("$PathDir/$NameExper$ExperCateg{$ItCateg}{FileSuffix}", -debug=>$dbg2);
    push @{$ProgParam{store}{treated}{$ItCateg}}, $NameExper;
  }

  # register REAP status
  foreach $ReapSuffix (@ReapCategList) {
    if ($ProgParam{store}{table}{$ReapSuffix} =~ m/^$NameExper/m) {
      push @{$ProgParam{store}{read}{$ReapSuffix}}, $NameExper;
    }
  }

  # target done
  return 1;
}


# register Experiment folder to be present
#
# INTERFACE
# - argument 1: path of Experiment folder
#
# - options:
#   -debug      [STD]
#
sub ExperDirRegist {
  my ($PathTgt, %opt) = @_;
  my ($debug, $dbg2);
  my (@ReapArr);

  # function parameters
  $debug = $opt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # verify sample files to exist
  if ((@ReapArr = &ReadDir($PathTgt,-select=>'\.exp$',-debug=>$dbg2))
      and -s "$PathTgt/$ReapArr[0]") {

    # truncate folder name to "date_machine"
    $PathTgt =~ s|/+$||;
    $PathTgt =~ s|.*/||;

    # register folder
    $ProgParam{store}{folder}{$PathTgt}{exper} = 1;
    $debug and printf STDERR "%s. registering folder %s\n", &MySub, $PathTgt||"''";
  }
}


################################################################################
# experimental data actions
################################################################################


# create combined index of available reads in the Sample file structure
#
# INTERFACE
# - global options:
#   -debug       [STD]
#   -SlcID       [STD]
#   -timer       [STD]
#
# DESCRIPTION
# - this function makes use of REAP's read index (resource for duplicate
#   check) which resides in the raw data file structure
#
# DEBUG, CHANGES, ADDITIONS
# - argument 1*: path(s) of raw data folder(s), default
#                map { "$CorePath{ReadWatch}{RawGroupDir}/$_" } @{$CorePath{ReadWatch}{ExperTarget}}
#                *** not implemented ***
#
sub ProgReadIndex {
  my ($debug, $dbg2, $bTimer, $time);
  my ($ItTarget, $PathList);
  my ($pData);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $bTimer = $ProgOpt{-timer};

  # loop over project folders
  foreach $ItTarget (@{$CorePath{ReadWatch}{ExperTarget}}) {

    # load REAP index
    $PathList = "$CorePath{ReadWatch}{RawGroupDir}/$ItTarget/$CorePath{GSCJ}{RawReapSub}/$ItTarget$CorePath{GSCJ}{RawReapList}";
    $debug and printf STDERR "%s. reading REAP index file %s\n", &MySub, $PathList;
    unless (defined ($pData = &PlainToTable ($PathList, -delimit=>'SpaceRet', -debug=>$dbg2))) {
      printf STDERR "ERROR: unable to read REAP index for target %s\n", $ItTarget||"''";
    }

    # select reads and write into global index
    $bTimer and $time = (times)[0];
    foreach (@$pData) {
      if ($ProgOpt{-SlcID} and $$_[2] !~ m/$ProgOpt{-SlcID}/o) { next }
      $$_[0] =~ s/^Results_//;
      $$_[0] = $ItTarget .'/'. $$_[0];
      print join ("\t", $$_[2], $$_[0], $$_[1]), "\n";
    }
    $bTimer and printf STDERR "%s. CPU time for selecting and writing: %.3f\n", &MySub, (times)[0]-$time;
  }
}


# list available reads in the Experiment file structure
#
# INTERFACE
# - argument 1*: root search path, default:
#                $CorePath{ReadWatch}{ExperGroupDir}
#
# - global options:
#   -debug       [STD]
#   -edited      [STD]
#   -SlcID       [STD]
#   -SlcReap     [STD]
#
# - global arguments:
#   $ProgArg[0]  root of Experiment file structure
#
# DESCRIPTION
# - this function makes use of REAP's read index (resource for duplicate
#   check) which resides in the raw data file structure
#
sub ProgExperList {
  my ($debug);
  my ($PathRoot);

  # function parameters
  $debug = $ProgOpt{-debug};
  $PathRoot = $ProgArg[0] || $ProgOpt{-TraceRoot} || $CorePath{ReadWatch}{ExperGroupDir};
  &TraceRootChk ($PathRoot);

  # loop over project folders
  $debug and printf STDERR "%s. working on folder %s\n", &MySub, $PathRoot;
  $ProgOpt{-SlcReap} ||= 'exp';

  # define recursion behaviour
  $ProgParam{action}{ExperDir} = sub {
    # no file actions here, but load REAP fofn according to $ProgOpt{-SlcReap}
    &ExperIterDir (@_);
    my ($ExperDir) = @_;

    # select according to $ProgOpt{-SlcID}
    # print remaining list of Experiment entries
    foreach (@{$ProgParam{store}{curr}{foid}}) {
      if ($ProgOpt{-SlcID} and !m/$ProgOpt{-SlcID}/o) { next }
        # regexp should never change during execution
      if ($ProgOpt{-edited} and -e "$ExperDir/$_.ed") {
        print "$ExperDir/$_.ed\n";
      } else {
        print "$ExperDir/$_\n";
      }
    }
  };

  # do recursive iteration
  &ExperIterat ($PathRoot, -recursive=>0.5, -debug=>$debug);
}


# do statistics about available reads in the Experiment file structure
#
# INTERFACE
# - argument 1*: root search path, default:
#                $CorePath{ReadWatch}{ExperGroupDir}
#
# - global options:
#   -debug       [STD]
#   -SlcID       [STD]
#   -SlcReap     [STD]
#
# DESCRIPTION
# - this function makes use of REAP's read index (resource for duplicate
#   check) which resides in the raw data file structure
#
sub ProgExperListStat {
  my ($debug);
  my ($PathRoot);

  # function parameters
  $debug = $ProgOpt{-debug};
  $PathRoot = $ProgArg[0] || $ProgOpt{-TraceRoot} || $CorePath{ReadWatch}{ExperGroupDir};
  &TraceRootChk ($PathRoot);

  # loop over project folders
  $debug and printf STDERR "%s. starting at root folder %s\n", &MySub, $PathRoot;
  $ProgOpt{-SlcReap} ||= 'exp';

  # define recursion behaviour
  $ProgParam{action}{ExperDir} = sub {
    $ProgParam{store}{curr}{CtDir} ++;
    &ExperIterDir (@_);
    $ProgParam{store}{curr}{CtExper} += int grep {
      ! $ProgOpt{-SlcID} or m/$ProgOpt{-SlcID}/o
      } @{$ProgParam{store}{curr}{foid}};
  };
  $ProgParam{action}{ExperProjroot} = sub {
    printf "summary for project root %s:\n  Experiment folders: %d\n  %sExperiment files%s: %d\n",
      $_[0],
      $ProgParam{store}{curr}{CtDir},
      $ProgOpt{-SlcID} ? 'selected ' : '',
      ($ProgOpt{-SlcReap} and $ProgOpt{-SlcReap} ne 'exp') ? " ($ProgOpt{-SlcReap})" : '',
      $ProgParam{store}{curr}{CtExper};
    delete $ProgParam{store}{curr}{CtDir};
    delete $ProgParam{store}{curr}{CtExper};
  };

  # do recursive iteration
  &ExperIterat ($PathRoot, -recursive=>0.5, -debug=>$debug);
}


# create Sample index from Experiment file structure
#
# INTERFACE
# - argument 1: Experiment root directory + project name + folder
#
# - global options:
#   -debug      [STD]
#
# DESCRIPTION
# - all Experiment folders in the specified directory structure will be
#   indexed
#
# DEBUG, CHANGES, ADDITIONS
# - index only single specified Experiment folders
#
sub ProgExperSampleIdx {
  my ($ArgExper) = @_;
  my ($debug);
  my ($PathExper, $NameGroup, $NameProj, $PathIndexDir, $PathIndex);
  my ($PathDir, $ReapDate, $ReapMachine, $PathReapbase);
  my ($pRead, $CtI, $pLine);

  # function parameters
  $debug = $ProgOpt{-debug};
  $PathExper = &PathExpand ($ArgExper);
  if ($PathExper =~ m|/+(\w+)/+(\w+)/*$|) {
    $NameGroup = $1;
    $NameProj = $2;
  } else {
    printf STDERR "ERROR: unable to extract project group/name from argument %s\n", $ArgExper||"''";
    exit 1;
  }

  # create target index
  $PathIndexDir = "$CorePath{GSCJ}{RawHome}/$NameGroup/$NameProj/$CorePath{GSCJ}{RawReapSub}";
  $PathIndex = $PathIndexDir . "/$NameProj$CorePath{GSCJ}{RawReapList}";
  printf STDERR "%s. writing index %s (%s)\n", &MySub,
    $PathIndex||"''", (-e $PathIndex) ? 'exists':'does not exist';
  if (! -e $PathIndex) { mkdir ($PathIndexDir) }
  open (OUTINDEX, ">$PathIndex");

  # loop over project folders
  foreach $PathDir (&ReadDir ($PathExper, -select=>'^\d{2}\.\d{2}\.\d{2}_', -expand=>1)) {

    # load REAP fofn
    if ($PathDir =~ m|(\d{2}\.\d{2}\.\d{2})_([\w.-]+)/*$|) {
      $ReapDate = $1;
      $ReapMachine = $2;
      $PathReapbase = $ReapDate . $ReapMachine . $NameProj . 'fn';
    } else {
      printf STDERR "%s. WARNING: unusual Experiment-like subfolder: %s\n", &MySub,
        $PathDir;
      next;
    }
    unless (-e "$PathDir/$PathReapbase" and -e "$PathDir/$PathReapbase.exp") {
      printf STDERR "%s. WARNING: Experiment folder doesn't contain REAP files, skipped\n", &MySub;
      printf STDERR "  checked: %s, %s\n", "$PathDir/$PathReapbase", "$PathDir/$PathReapbase.exp";
      next;
    }
    unless ($pRead = &LoadFoid ("$PathDir/$PathReapbase.exp")) {
      printf STDERR "%s. ERROR: unable to read REAP foid %s\n", &MySub,
        "$PathDir/$PathReapbase.exp";
      next;
    }

    # reformat reads list and write to index
    for ($CtI=0; $CtI<@$pRead; $CtI++) {
      $$pRead[$CtI] or next;
      $pLine = [
        'Results_' . $ReapDate .'_'. $ReapMachine,
        'Sample_' . ($CtI+1),
        $$pRead[$CtI],
        ];
      printf OUTINDEX  "%s\n", join ("   ", @$pLine);
    }
  }

  # tidy up
  close (OUTINDEX);
}


# move edited Experiment files to Experiment file structure
#
# INTERFACE
# - argument 1: path of source directory keeping edited Experiment files
#
# - global options:
#   -debug      [STD]
#   -timer      [STD]
#   -TraceRoot  Experiment file tree root (source), default:
#               $CorePath{ReadWatch}{ExperGroupDir}
#
# - global data:
#   $ProgParam{filter}{read}             constructed here
#   $ProgParam{store}{DirFixed}          fixed folder (source)
#   $ProgParam{store}{ReadIndex}         we keep it there
#
sub ProgExperStoreEd {
  my ($PathSrc) = @_;
  my ($debug, $dbg2, $PathExperBase, @RcReadIndex, $time);
  my (%nonred, @PathExper, $PlainReadsTreated);

  ##############################################################################
  # pre-work

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  $PathExperBase = $ProgOpt{-TraceRoot} || $CorePath{ReadWatch}{ExperGroupDir};
  &TraceRootChk ($PathExperBase);
  @RcReadIndex = exists($ProgOpt{-RcReadIndex}) ? @{$ProgOpt{-RcReadIndex}} : ();
  $PathSrc = $ProgParam{store}{DirFixed} = &PathExpand($PathSrc||'.');

  # non-redundant list of ID selectors (LOID)
  $ProgParam{filter}{read} = [ &ReadDir ($PathSrc, -debug=>$dbg2) ];
  unless (@{$ProgParam{filter}{read}}) {
    printf STDERR "ERROR: unable to read entries from source directory: %s\n", $PathSrc||"''";
    exit 1;
  }
  $debug and printf STDERR "found %d entr%s in source directory: %s\n",
    int(@{$ProgParam{filter}{read}}), (@{$ProgParam{filter}{read}}==1) ? 'y':'ies',
    $PathSrc;
  $ProgParam{filter}{read} = &DataTreeSlc ($ProgParam{filter}{read}, [[0,'all']], -unique=>1);

  ##############################################################################
  # Experiment file tree actions

  # start handling of timer
  $ProgParam{time}{started} = &Sum ((times)[0,2]);

  # define file actions (mode 'get all into GAP4 project folder')
  $ProgParam{action}{ExperDir} = \&ExperIterDir;
  $ProgParam{action}{ExperItem}{exper} = \&ItemStoreEd;

  # do recursion on the Experiment file tree old-fashioned
  if ($ProgOpt{-recursive}) {
    $time = &Sum ((times)[0,2]);
    # $ProgOpt{-SlcReap} will influence &ExperIterat
    &ExperIterat ($PathExperBase, -recursive=>1, -debug=>$dbg2);
    $ProgParam{time}{ExperIterat} = &Sum((times)[0,2]) - $time;
  }

  # locate files via ReadIndex
  else {

    # initialise read index
    $time = &Sum ((times)[0,2]);
    $ProgParam{store}{ReadIndex} = ReadWatch::ReadIndex->new(@RcReadIndex);
    $ProgParam{store}{ReadIndex}->{switch}{-FullMatch} = $ProgOpt{-FullMatch};
    $ProgParam{store}{ReadIndex}->{base}{exper} = $PathExperBase;
    $ProgParam{time}{IndexLoad} = &Sum((times)[0,2]) - $time;

    # select entries from read index
    $time = &Sum ((times)[0,2]);
    @PathExper = $ProgParam{store}{ReadIndex}->Retrieve('ExperFull',@{$ProgParam{filter}{read}});
    if ($debug) {
      printf STDERR "%s. found %d Experiment files via read index (%d selector%s)\n", &MySub,
        int @PathExper, int @{$ProgParam{filter}{read}},
        (@{$ProgParam{filter}{read}}>1) ? 's':'';
      printf STDERR "  %s\n", join ("\n  ", @PathExper);
    }
    $ProgParam{time}{IndexLocate} = &Sum((times)[0,2]) - $time;

    # perform iteration step on Experiment folder
    $time = &Sum ((times)[0,2]);
    foreach (@PathExper) {
      &ExperIterRead ($_, -ListReap=>'failed', -debug=>$dbg2);
    }
    $ProgParam{time}{ExperIterat} = &Sum((times)[0,2]) - $time;
  }
}


# move reads to GAP4 project folder (Staden Experiment and SCF file)
#
# INTERFACE
# - argument 1: path of file of read IDs
# - argument 2: optional: path of target folder
#
# - global options:
#   -debug      [STD]
#   -edited     [STD], default = 1
#   -FullMatch  use full string match in index look-up
#   -SlcReap    [STD]
#   -recursive  [STD]
#   -timer      [STD]
#   -TraceRoot  Experiment file tree root (source), default:
#               $CorePath{ReadWatch}{ExperGroupDir}
#   -verbose    output verbosely to STDERR
#
# - global data:
#   $ProgParam{filter}{read}             constructed here
#   $ProgParam{store}{DirFixed}          fixed folder (target)
#   $ProgParam{store}{read}{$ReapStatus} filled by &ExperIterRead and used here
#   $ProgParam{store}{ReadIndex}         we keep it there
#   $ProgParam{store}{treated}{*}        reference to array of treated trace items
#
# DESCRIPTION
# - the target directory for storage of Experiment files and creation
#   of new links is cwd
#
sub ProgExperProvide {
  my ($PathFoid, $PathTgt) = @_;
  my ($debug, $dbg2, $verbose, $dbgvb, $PathExperBase, @RcReadIndex, $time);
  my (@PathExper, $PlainReadsTreated);
  my ($PathFoidBlast, @CallTag, $call);

  ##############################################################################
  # pre-work

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $verbose = $ProgOpt{-verbose};
  $dbgvb = $debug || $verbose;

  $PathExperBase = $ProgOpt{-TraceRoot} || $CorePath{ReadWatch}{ExperGroupDir};
  &TraceRootChk ($PathExperBase);
  @RcReadIndex = (exists $ProgOpt{-RcReadIndex}) ?
    @{$ProgOpt{-RcReadIndex}} : ();
  $PathTgt = $ProgParam{store}{DirFixed} = &PathExpand($PathTgt||'.');

  # non-redundant list of ID selectors (LOID)
  $ProgParam{filter}{read} = &LoadFoid ($PathFoid);
  unless (@{$ProgParam{filter}{read}}) {
    printf STDERR "ERROR: unable to read entries from file of read IDs: %s\n", $PathFoid||"''";
    exit 1;
  }
  $dbgvb and printf STDERR "found %d entr%s in file of read selectors: %s\n",
    int(@{$ProgParam{filter}{read}}), (@{$ProgParam{filter}{read}}==1)?'y':'ies',
    $PathFoid;
  $ProgParam{filter}{read} = &DataTreeSlc ($ProgParam{filter}{read}, [[0,'all']], -unique=>1);
  if ($dbgvb) {
    printf STDERR "%d non-redundant entr%s entered to list of read IDs (LOID)\n",
      int(@{$ProgParam{filter}{read}}), (@{$ProgParam{filter}{read}}==1)?'y':'ies';
    printf STDERR "target folder is %s\n", &PathExpand($PathTgt);
    $ProgOpt{-recursive} and print STDERR "doing recursion on Experiment file tree old-fashioned\n";
  }
  print STDERR  "\n";

  # confirm existence of target directory
  unless (-d &PathLinkResol ($ProgParam{store}{DirFixed})) {
    unless (mkdir ($ProgParam{store}{DirFixed})) {
      printf STDERR "ERROR: unable to create target directory %s\n", $ProgParam{store}{DirFixed};
      exit 1;
    }
    $dbgvb and printf STDERR "creating target directory %s\n", $ProgParam{store}{DirFixed};
  }

  ##############################################################################
  # Experiment file tree actions

  # start handling of timer
  $ProgParam{time}{started} = &Sum ((times)[0,2]);

  # define file actions (mode 'get all into GAP4 project folder')
  $ProgParam{action}{ExperDir} = \&ExperIterDir;
  $ProgParam{action}{ExperItem}{exper} = $ProgOpt{-edited} ?
    \&ItemCopyEd : \&ItemCopy;
  $ProgParam{action}{ExperItem}{scf} = \&ItemLink;

  # recursion on Experiment file tree old-fashioned
  if ($ProgOpt{-recursive}) {
    $time = &Sum ((times)[0,2]);
    # $ProgOpt{-SlcReap} will influence &ExperIterat
    &ExperIterat ($PathExperBase, -recursive=>1, -debug=>$dbg2);
    $ProgParam{time}{ExperIterat} = &Sum((times)[0,2]) - $time;
  }

  # locate files via ReadIndex
  else {

    # initialise read index
    $time = &Sum ((times)[0,2]);
    $ProgParam{store}{ReadIndex} = ReadWatch::ReadIndex->new(@RcReadIndex);
    $ProgParam{store}{ReadIndex}->{switch}{-FullMatch} = $ProgOpt{-FullMatch};
    $ProgParam{store}{ReadIndex}->{base}{exper} = $PathExperBase;
    $ProgParam{time}{IndexLoad} = &Sum((times)[0,2]) - $time;

    # select entries from read index
    $time = &Sum ((times)[0,2]);
    @PathExper = $ProgParam{store}{ReadIndex}->Retrieve('ExperFull',@{$ProgParam{filter}{read}});
    if ($debug) {
      printf STDERR "%s. found %d Experiment files via read index (%d selector%s)\n", &MySub,
        int @PathExper, int @{$ProgParam{filter}{read}},
        (@{$ProgParam{filter}{read}}>1) ? 's':'';
      printf STDERR "  %s\n", join ("\n  ", @PathExper);
    }
    $ProgParam{time}{IndexLocate} = &Sum((times)[0,2]) - $time;

    # iteration step on Experiment folder
    $time = &Sum ((times)[0,2]);
    foreach (@PathExper) {
      &ExperIterRead ($_, -ListReap=>'failed', -debug=>$dbg2);
    }
    $ProgParam{time}{ExperIterat} = &Sum((times)[0,2]) - $time;
  }

  ##############################################################################
  # summary, report

  # non-redundant lists of treated items
  if ($ProgParam{action}{ExperItem}{exper}) {
    my %nonred = map { ($_=>1) } @{$ProgParam{store}{treated}{exper}};
    $ProgParam{store}{treated}{exper} = [ keys %nonred ];
  }

  map { $$_ ||= []; }
    \$ProgParam{filter}{read},
    \$ProgParam{store}{FilterFail},
    \$ProgParam{store}{treated}{exper},
    \$ProgParam{store}{read}{failed},
  ;

  # FOFN entries that failed to match
  if ($ProgParam{action}{ExperItem}{exper}) {
    $PlainReadsTreated = join ('', map { "$_\n" } @{$ProgParam{store}{treated}{exper}});
    foreach (@{$ProgParam{filter}{read}}) {
      if ($PlainReadsTreated !~ m/$_/) {
        push @{$ProgParam{store}{FilterFail}}, $_;
      }
    }
  } elsif ($ProgParam{action}{ExperItem}{scf}) {
    $PlainReadsTreated = join ('', map { "$_\n" } @{$ProgParam{store}{treated}{scf}});
    foreach (@{$ProgParam{filter}{read}}) {
      if ($PlainReadsTreated !~ m/$_/) {
        push @{$ProgParam{store}{FilterFail}}, $_;
      }
    }
  }

  # print summary
  print  STDERR "\nPROCESS SUMMARY\n";
  printf STDERR "entries in list of read IDs (LOID): %d\n", int @{$ProgParam{filter}{read}};
  printf STDERR "  LOID entries that failed to match: %d\n", int @{$ProgParam{store}{FilterFail}};
  if ($ProgOpt{-timer}) {
    print  STDERR "Process Times [CPU+system s]:\n";
    printf STDERR "  getting started: %.3f\n", $ProgParam{time}{started};
    unless ($ProgOpt{-recursive}) {
    printf STDERR "  loading read index: %.3f\n", $ProgParam{time}{IndexLoad};
    printf STDERR "  locate files via read index: %.3f\n", $ProgParam{time}{IndexLocate};
    }
    printf STDERR "  iteraion through Experiment file tree: %.3f\n", $ProgParam{time}{ExperIterat};
    printf STDERR "REAL TIME [s]: %.3f\n", time - $ProgParam{time}{start};
  }
  printf STDERR "Trace Items Touched: %d\n",
    int @{$ProgParam{store}{treated}{exper}};
  printf STDERR "  files copied from Experiment folders: %d\n",
    int @{$ProgParam{store}{treated}{exper}};
  printf STDERR "  files in REAP's *.failed file: %d\n",
    int @{$ProgParam{store}{read}{failed}};
  if (@{$ProgParam{store}{FilterFail}}) {
    print  STDERR "\nNON-MATCHING LOID ENTRIES\n";
    print  STDERR map {"$_\n"} sort @{$ProgParam{store}{FilterFail}};
  }
  if ($dbgvb) {
    if (@{$ProgParam{store}{treated}{exper}}) {
      print  STDERR "\nFILES COPIED FROM EXPERIMENT FOLDERS\n";
      print  STDERR map {"$_\n"} sort @{$ProgParam{store}{treated}{exper}};
    }
    if (@{$ProgParam{store}{read}{failed}}) {
      printf STDERR "\nFILES IN REAP'S *.failed FILE%s\n", ($ProgOpt{-SlcReap} =~ m/passed/) ? ' (neglected)':'';
      print  STDERR map {"$_\n"} sort @{$ProgParam{store}{read}{failed}};
    }
  }
  if ($ProgOpt{-OutFail}) {
    &WriteFile ($ProgOpt{-OutFail}, join ('',
      map {"$_\n"} sort @{$ProgParam{store}{read}{failed}}));
  }
  if ($ProgOpt{-OutPass}) {
    &WriteFile ($ProgOpt{-OutPass}, $PlainReadsTreated);
  }

  # perform tagging on copied Experiment files
  if ($ProgOpt{-tag} and $PlainReadsTreated =~ m/\w/) {

    # save file of read IDs, change to target directory
    $PathFoidBlast = $ProgParam{TmpManag}->Create();
    unless (&WriteFile ($PathFoidBlast, $PlainReadsTreated)) {
      print  STDERR "ERROR: writing fofn for tagging process failed\n";
      exit 1;
    }
    chdir $PathTgt;

    # do tagging
    if ($ProgOpt{-tag} != 1) {  # explicit specification of tag types
      @CallTag = @{$ProgOpt{-tag}};
    } else {                       # use standard tag procedure library
      @CallTag = @{$ProgParam{call}{TagStd}};
      if ($PlainReadsTreated =~ m/^13/m) {
        push @CallTag, 'scer';
      }
    }
    foreach (@CallTag) {
      $call = "$ProgParam{call}{tag}{$_} -fofn=$PathFoidBlast" .
        ($dbg2 ? " -debug=$dbg2":'');
      print  STDERR "\nperforming tagging, parameter set $_\n";
      if (open (INTAGPROC, "$call &|")) {
        &FileCopy (\*INTAGPROC, \*STDOUT);
        close INTAGPROC;
        print  STDERR "done\n";
      } else {
        print  STDERR "call '$call' failed\n";
        next;
      }
    }

    # tidy up
    $debug or unlink $PathFoidBlast;
  }
}


################################################################################
# manage sample plate and sheet formats, do sequencing experiments
################################################################################


# prepare list of read IDs from a plate ID
#
# INTERFACE
# - global options:
#   -debug      [STD]
#   -PlateSize  specify plate size
#
# DESCRIPTION
# - read list is printed on STDOUT
#
sub PlateToReads {
  my ($primer, @PlateID) = @_;
  my ($debug, $PlateSize);
  my (@ListId, $ItPlate, $ItRow, $ItCol);

  # function parameters
  $debug = $ProgOpt{-debug};
  $PlateSize = $ProgOpt{-PlateSize} || $ProgParam{default}{PlateSize};

  # chain to code corresponding to plate format
  if ($PlateSize == 64) {
    foreach $ItPlate (@PlateID) {
      foreach $ItRow ('a' .. 'h') {
        foreach $ItCol (1 .. 12) {
          push @ListId, sprintf "%s%s%02d.%s1", $ItPlate, $ItRow, $ItCol, $primer;
        }
      }
    }
  } else {
    print STDERR "unknown plate format %s\n", $PlateSize||"''";
    exit 1;
  }

  # exit SUB
  print join ('', map {"$_\n"} @ListId);
}


# sort read or clone list logically
#
# INTERFACE
# - global options:
#   -debug      [STD]
#
# - return val: - array of IDs
#               - undef if an error occurs
#
# DESCRIPTION
# - starting multi-column list the output will be sinle-column (first
#   column) only
#
# DEBUG, CHANGES, ADDITIONS
# - use of black-box functions inside map/grep etc. is critical!
#
sub ProgSortRead {
  my (@PathID) = @_;
  my ($debug);
  my ($PathCurr, @IdPrim, %IdIdx, @IdFin);
  my ($ItRead, $TrueRead);

  # function parameters
  $debug = $ProgOpt{-debug};

  # load list(s)
  foreach $PathCurr (@PathID) {
    push @IdPrim, @{ &LoadFoid($PathCurr,-FltEmpty=>1) };
  }

  # expand to data structure holding name fields, sort
  foreach $ItRead (@IdPrim) {
    $TrueRead = ($ItRead =~ m/\./) ? $ItRead : "$ItRead.s1";
    $IdIdx{$ItRead} = &ReadidToFields ($TrueRead),
  }
  @IdFin = sort {
    $IdIdx{$a}{lib} cmp $IdIdx{$b}{lib} or
    $IdIdx{$a}{plt} <=> $IdIdx{$b}{plt} or
    $IdIdx{$a}{coo} cmp $IdIdx{$b}{coo} or
    $IdIdx{$a}{prm} cmp $IdIdx{$b}{prm} or
    $IdIdx{$a}{num} <=> $IdIdx{$b}{num} or
                 $a <=> $b } keys %IdIdx;

  # print list
  print map { "$_\n" } @IdFin;
}


# prepare Sample Sheet from file of read names or array of read names
#
# INTERFACE
# - argument 1: - reference to array of read names on well plate (maximum #: 64)
#               - file of read names
#
# - options:
#   -debug      [STD]
#
# - return val: - reference to array of read names on gel
#               - undef if an error occurred
#
# DESCRIPTION
# - sample numbering is in human logics, not in machine logics for better
#   readability
#
sub SampleSheet {
  my (@sort);
  my ($ArgReadname, %opt) = @_;
  my (@Readname, @ReadSorted, $CtI);

  # function parameters
  if (ref($ArgReadname) eq 'ARRAY') {
    @Readname = @$ArgReadname;
  } elsif (-r $ArgReadname) {
    @Readname = @{ &LoadFoid($ArgReadname) };
  }
  int(@Readname) or return undef;
  unshift @Readname, 0;  # fake position to achieve human numbering logics

  # sorting rule
  # number on gel for array of well positions (A1..A8,B1..B8,...)
  @sort = (
    0,
    29, 31, 61, 63, 30, 32, 62, 64,
    25, 27, 57, 59, 26, 28, 58, 60,
    21, 23, 53, 55, 22, 24, 54, 56,
    17, 19, 49, 51, 18, 20, 50, 52,
    13, 15, 45, 47, 14, 16, 46, 48,
     9, 11, 41, 43, 10, 12, 42, 44,
     5,  7, 37, 39,  6,  8, 38, 40,
     1,  3, 33, 35,  2,  4, 34, 36,
    );

  # loop over entries
  # sort reading names into order of gel loading
  for ($CtI=1; $CtI<@sort; $CtI++) {
    $ReadSorted[$sort[$CtI]] = $Readname[$CtI];
  }
  shift @ReadSorted;
     # delete fake position to get back to machine numbering logics

  # exit SUB
  return \@ReadSorted;
}


# prepare Sample Sheets from file of read names
#
# INTERFACE
# - argument 1: file of read names
#
# - global options:
#   -debug      [STD]
#   ...
#
sub ProgSampleSheet {
  my $Linefeed = "\r";
  my ($FileReadname) = @_;
  my ($pSheet, $FileSheet);

  # sort
  # ...

  # get array of names on gel
  $pSheet = &SampleSheet ($FileReadname, -debug=>$ProgOpt{-debug});
  unless (@$pSheet) {
    print STDERR "ERROR: unable to read file of read names\n";
    exit 1;
  }

  # output file
  $FileSheet  = &PathChgSuffix ($FileReadname, '', -last=>1);
  $FileSheet .= '_sht';
  if (open (OUTSHEET, ">$FileSheet")) {
    print "saving sample sheet as $FileSheet\n";
  } else {
    print "ERROR: Unable to save sample sheet as $FileSheet\n";
    exit 1;
  }
  printf OUTSHEET "%s$Linefeed", join ($Linefeed, @$pSheet);
  close OUTSHEET;

  # exit SUB
  return;
}


# prepare plate loading schemes and Sample sheets from file of read names
#
# INTERFACE
# - argument 1: file of read names
#
# - global options:
#   -debug      [STD]
#   -OutStump   [STD]
#   ...
#
sub ProgSeqExtra {
  my $Linefeed = "\n";
  my ($FileReadname) = @_;
  my ($debug);
  my ($pRead, @plate, $pPlate96, %GelMixed, @Plate64, $pSheet);
  my (@Row2, $LineLeft, $LineRight, $CtPlate, $CtWell);
  my (%file);

  # function parameters
  $debug = $ProgOpt{-debug};
  $file{OutStump} = &PathChgSuffix ($FileReadname, '', -last=>1);

  # get array of reads
  unless (@{ $pRead = &LoadFoid ($FileReadname) }) {
    print "ERROR: unable to read file of read names\n";
    exit 1;
  }

  # optionally: sort
  # ...

  # loop over 96-well plates made from list
  @plate = &PlateLoad96 ($pRead);
  while (($pPlate96 = shift (@plate)) or %GelMixed) {
    $CtPlate ++;
    if ($pPlate96) {

      # output plate listing
      $file{list} = $file{OutStump} . sprintf ('_%02d.list', $CtPlate);
      if (open (OUTLIST, ">$file{list}")) {
        print "saving plate listing $file{list}\n";
      } else {
        print STDERR "ERROR while saving plate listing as $file{list}\n";
        next;
      }
      @Row2 = ('A','E','B','F','C','G','D','H');
      while (($LineLeft, $LineRight) = splice (@Row2, 0, 2)) {
        for ($CtWell=0; $CtWell<12; $CtWell++) {
          printf OUTLIST "%s%02d  %15s           %s%02d  %15s\n",
            $LineLeft,  $CtWell+1, $$pPlate96{$LineLeft}[$CtWell],
            $LineRight, $CtWell+1, $$pPlate96{$LineRight}[$CtWell];
        }
        printf OUTLIST "\n";
      }
      close OUTLIST;

#      # output plate loading scheme (nice)
#      $file{load} = $file{OutStump} . sprintf ('_%02d.plt', $CtPlate);
#      if (open (OUTLOAD, ">$file{load}")) {
#        print "saving plate loading scheme $file{load}\n";
#      } else {
#        print STDERR "ERROR: unable to save plate loading scheme as $file{load}\n";
#        next;
#      }
#      printf OUTLOAD "\t%s\n", join ("\t", (1..12));
#      foreach ('A','B','C','D','E','F','G','H') {
#        printf OUTLOAD "$_\t%s\n", join ("\t", @{$$pPlate96{$_}});
#      }
#      close OUTLOAD;

      # output sample sheet for left two-third of plate
      @Plate64 = ();
      foreach ('A','B','C','D','E','F','G','H') {
        push @Plate64, @{$$pPlate96{$_}}[0..7];
      }
      $pSheet = &SampleSheet (\@Plate64, -debug=>$debug);
      $file{sheet} = $file{OutStump} . sprintf ('_%02d.sht', $CtPlate);
      if (open (OUTSHEET, ">$file{sheet}")) {
        print "saving sample sheet $file{sheet}\n";
      } else {
        print STDERR "ERROR: unable to save sample sheet as $file{sheet}\n";
        next;
      }
      printf OUTSHEET "%s$Linefeed", join ($Linefeed, @$pSheet);
      close OUTSHEET;
    }

    # output mixed sample sheet
    if (%GelMixed and @{$GelMixed{A}} and $GelMixed{A}[0]) {
      @Plate64 = ();
      foreach ('A','B','C','D','E','F','G','H') {
        push @Plate64, @{$GelMixed{$_}}, @{$$pPlate96{$_}}[8..11];
      }
      $pSheet = &SampleSheet (\@Plate64, -debug=>$debug);
      $file{sheet} = $file{OutStump} . sprintf ('_%02d_%02d.sht', $CtPlate-1, $CtPlate);
      if (open (OUTSHEET, ">$file{sheet}")) {
        print "saving sample sheet $file{sheet}\n";
      } else {
        print STDERR "ERROR: unable to save sample sheet as $file{sheet}\n";
        next;
      }
      printf OUTSHEET "%s$Linefeed", join ($Linefeed, @$pSheet);
      close OUTSHEET;
      %GelMixed = ();
    }

    # push higher one-third to mixed gel
    elsif (@{$$pPlate96{A}} > 8 and $$pPlate96{A}[8]) {
      foreach ('A','B','C','D','E','F','G','H') {
        push @{$GelMixed{$_}}, @{$$pPlate96{$_}}[8..11];
      }
    }

  } # end while $pPlate96

}


# prepare plate loading schemes and Sample sheets from file of
# clone/primer pairs
#
# INTERFACE
# - argument 1: file of read names
#
# - global options:
#   -debug      [STD]
#   -TraceRoot  Experiment file tree root (source), default:
#               $CorePath{ReadWatch}{ExperGroupDir}
#
sub ProgSeqPrimer {
  my $Linefeed = "\n";
  my ($FileReadname) = @_;
  my ($debug, $dbg2, @RcReadIndex, $PathExperBase, $pGlobalIdx);
  my ($pRead, $ReadCurr, $primer, $number, $ReadType, %ReadTypeNew);
  my (@plate, $pPlate96, %GelMixed, @Plate64, $pSheet);
  my (@RowOrder, $LineLeft, $LineRight, $CtPlate, $CtWell);
  my (%file);

  # function parameters
  $debug = $ProgOpt{-debug};
  @RcReadIndex = (exists $ProgOpt{-RcReadIndex}) ?
    @{$ProgOpt{-RcReadIndex}} : ();
  $PathExperBase = $ProgOpt{-TraceRoot} || $CorePath{ReadWatch}{ExperGroupDir};
  $pGlobalIdx = ReadWatch::ReadIndex->new(@RcReadIndex);
  $pGlobalIdx->{base}{exper} = $PathExperBase;
  $file{OutStump} = &PathChgSuffix ($FileReadname, '', -last=>1);

  # get array of clone/read pairs
  unless (@{ $pRead = &PlainToTable ($FileReadname, -delimit=>{line=>$reEndl,col=>' +|\t' }) }) {
    print "ERROR: unable to read file of clone/primer pairs\n";
    exit 1;
  }

  # prepare readnames from clone/primer pairs
  # select first free name regarding read index and growing list of new reads
  foreach $ReadCurr (@$pRead) {
    $primer = ($ReadCurr->[1]=~m/^([rst])(\d)?$/) ? $1 : 'p';
    $number = $2;
    $ReadType = $ReadCurr->[0] .'.'. $primer;
    if ($ReadTypeNew{$ReadType}) {
      $ReadCurr->[2] = $ReadType . (++$ReadTypeNew{$ReadType});
    } else {
      $ReadCurr->[2] = $pGlobalIdx->FirstExtra ($ReadType, -number=>$number);
      $ReadTypeNew{$ReadType} = int substr ($ReadCurr->[2], -1, 1);
    }
  }

  # optionally: sort
  # ...

  # loop over 96-well plates made from list
  @plate = &PlateLoad96 ($pRead);
  while (($pPlate96 = shift (@plate)) or %GelMixed) {
    $CtPlate ++;
    if ($pPlate96) {

      # output plate listing
      $file{list} = $file{OutStump} . sprintf ('_%02d.list', $CtPlate);
      if (open (OUTLIST, ">$file{list}")) {
        print "saving plate listing $file{list}\n";
      } else {
        print STDERR "ERROR: to save plate listing as $file{list}\n";
        next;
      }
      @RowOrder = ('A','E','B','F','C','G','D','H');
      while (($LineLeft, $LineRight) = splice (@RowOrder, 0, 2)) {
        for ($CtWell=0; $CtWell<12; $CtWell++) {
          printf OUTLIST "%s%02d  %13s  %-7s     %s%02d  %13s  %-7s\n",
            $LineLeft,  $CtWell+1, $$pPlate96{$LineLeft}[$CtWell][0],  $$pPlate96{$LineLeft}[$CtWell][1],
            $LineRight, $CtWell+1, $$pPlate96{$LineRight}[$CtWell][0], $$pPlate96{$LineRight}[$CtWell][1];
        }
        printf OUTLIST "\n";
      }
      close OUTLIST;

      # output read name listing of plate
      $file{ListDbg} = $file{OutStump} . sprintf ('_%02d.read', $CtPlate);
      if (open (OUTLIST, ">$file{ListDbg}")) {
        print "saving plate listing $file{ListDbg}\n";
      } else {
        print STDERR "ERROR: to save plate listing as $file{ListDbg}\n";
        next;
      }
      foreach $LineLeft ('A' .. 'H') {
        for ($CtWell=0; $CtWell<12; $CtWell++) {
          printf OUTLIST "%s%02d  %13s  %-7s  %15s\n",
            $LineLeft,  $CtWell+1, $$pPlate96{$LineLeft}[$CtWell][0],  $$pPlate96{$LineLeft}[$CtWell][1],
            $$pPlate96{$LineLeft}[$CtWell][2];
        }
        printf OUTLIST "\n";
      }
      close OUTLIST;

      # output sample sheet for left two-third of plate
      @Plate64 = ();
      foreach ('A','B','C','D','E','F','G','H') {
        push @Plate64, map { $_->[2] } @{$$pPlate96{$_}}[0..7];
      }
      $pSheet = &SampleSheet (\@Plate64, -debug=>$debug);
      $file{sheet} = $file{OutStump} . sprintf ('_%02d.sht', $CtPlate);
      if (open (OUTSHEET, ">$file{sheet}")) {
        print "saving sample sheet $file{sheet}\n";
      } else {
        print STDERR "ERROR: unable to save sample sheet as $file{sheet}\n";
        next;
      }
      printf OUTSHEET "%s$Linefeed", join ($Linefeed, @$pSheet);
      close OUTSHEET;
    }

    # output mixed sample sheet
    if (%GelMixed and @{$GelMixed{A}} and $GelMixed{A}[0]) {
      @Plate64 = ();
      foreach ('A','B','C','D','E','F','G','H') {
        push @Plate64, map { $_->[2] } @{$GelMixed{$_}}, @{$$pPlate96{$_}}[8..11];
      }
      $pSheet = &SampleSheet (\@Plate64, -debug=>$debug);
      $file{sheet} = $file{OutStump} . sprintf ('_%02d_%02d.sht', $CtPlate-1, $CtPlate);
      if (open (OUTSHEET, ">$file{sheet}")) {
        print "saving sample sheet $file{sheet}\n";
      } else {
        print STDERR "ERROR: unable to save sample sheet as $file{sheet}\n";
        next;
      }
      printf OUTSHEET "%s$Linefeed", join ($Linefeed, @$pSheet);
      close OUTSHEET;
      %GelMixed = ();
    }

    # push higher one-third to mixed gel
    elsif (@{$$pPlate96{A}} > 8 and $$pPlate96{A}[8]) {
      foreach ('A','B','C','D','E','F','G','H') {
        push @{$GelMixed{$_}}, @{$$pPlate96{$_}}[8..11];
      }
    }

  } # end while $pPlate96

}


################################################################################
# read pool handling
################################################################################


# read pool data structure
#
# DESCRIPTION
# - this structure is prepared by function &GetReadpoolStruct
#
# - data structure is an array of hashes. Each read entry hash has following
#   data fields:
#   id         read identifier
#   IdCorrect  (optional) corrected read identifier
#   IdChanges  (optional) changes are made or should be made to identifier
#   field      hash of semantic fields of read identifier as worked out by
#              &ReadWatch::Read::ReadidToFields
#   IdAnti     identifier of anti read (that not neccessarily exists)
#   multiple   identifier occurs several times in the sequence source.
#              Value+1 is the actual number of occurrences of the identifier.
#   NmbAnti    number of existing anti reads
#


# get list of read IDs from a sequence source
#
# INTERFACE
# - ...
#
# - options:
#   -file         path of fastA source file, default: $ProgParam{SrcReads}
#
# - global options:
#   -SlcID     ...
#   -SlcDescr  ...
#
# - return val:   - array of IDs
#                 - undef if an error occurs
#
sub GetIds {
  my (%opt) = @_;
  my ($PathSeq);
  my ($CallGetIds, @ListId);

  # function parameters
  $PathSeq = $opt{-file} || $ProgParam{SrcReads};

  # prepare call to SeqHandle.pl
  $CallGetIds = join (' ', $CorePath{call}{SeqID},
    $ProgOpt{-SlcID} ? '-SlcID='.$ProgOpt{-SlcID} : (),
    $ProgOpt{-SlcDescr} ? '-SlcDescr='.$ProgOpt{-SlcDescr} : (),
    $PathSeq);

  # get list
  unless (open (INIDLIST, "$CallGetIds |")) {
    printf STDERR "ERROR: unable to get list of IDs, call was:\n  %s\n", $CallGetIds;
    exit 1;
  }
  @ListId = map {m/(.+)\s*\n$/} &ReadFile(\*INIDLIST);

  # exit SUB
  return @ListId;
}


# prepare read pool data structure from fastA source file
#
# INTERFACE
# - options:
#   -debug      [STD]
#   -file       path of fastA file to get a list of read identifiers,
#               default: $ProgParam{SrcReads}
#   -IdCorrect  correct read identifiers via &NiceName and enter
#               status of correctness
#   -IdList     supply a list of read identifiers (array reference)
#   -SlcUnique  select for unique read identifiers. A multiple-
#               occurring ID is returned only once.
#
# - return val: - reference to data structure
#               - wantarray:
#                 - reference to data structure
#                 - reference to read index (referrring to data structure)
#               - undef if an error occurs
#
sub GetReadpoolStruct {
  my (%opt) = @_;
  my ($debug, $dbg2);
  my (@ListId, %ReadEntry, @ReadArray, %ReadIndex);
  my ($id, $CtI, $IdPaired);

  # function parameters
  $debug = $opt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # get list of IDs
  unless (@ListId = @{$opt{-IdList}}) {
    unless ((@ListId = &GetIds (%opt)) and $ListId[0]) {
      print STDERR "ERROR while retrieving list of sequence IDs\n";
      exit 1;
    }
  }

  # loop over sequence IDs
  foreach $id (@ListId) {
    %ReadEntry = (
      id       => $id,
      multiple => 0,
      NmbAnti  => 0,
      );

    # correct name scheme
    if ($opt{-IdCorrect}) {
      $ReadEntry{IdCorrect} = $_;
      $ReadEntry{IdChanges} = &NiceName (\$ReadEntry{IdCorrect})
        || $id !~ m/$reSeqid{GscjRd}/;
    }

    # split identifier to semantic fields
    $ReadEntry{field} = &ReadidToFields($id) || {full=>$id};

    # unique ID?
    if ($ReadIndex{$id}) {
      $ReadIndex{$id}{multiple} ++;
      if ($opt{-SlcUnique}) { next }
      $ReadEntry{multiple} ++;
    }

    # pairing partner status
    if ($ReadEntry{field}{prd}) {

      # anti read present?
      for ($CtI=3; $CtI>=1; $CtI--) {
        $IdPaired = $ReadEntry{field}{cln}
          .'.'. $ReadidField{prd2prm}{GSCJ}{$ReadidField{prdAnti}{$ReadEntry{field}{prd}}}
          . $CtI;
        if (exists($ReadIndex{$IdPaired}) and defined($ReadIndex{$IdPaired})) {
          $ReadEntry{NmbAnti} ++;
          $ReadIndex{$IdPaired}{NmbAnti} ++;
        }
      }
      $ReadEntry{IdAnti} = $IdPaired;

    } elsif ($debug) {
      printf STDERR "%s. current primer unable to pair\n", &MySub;
      printf STDERR "%s. fields: %s\n", &MySub, join(',', sort keys %{$ReadEntry{field}});
    }

    # push to array
    push @ReadArray, { %ReadEntry };
    $ReadIndex{$id} = $ReadArray[$#ReadArray];
  }

  # exit SUB
  return (wantarray ? (\@ReadArray, \%ReadIndex) : \@ReadArray);
}


# prepare list concerning read identifier scheme and pairing
#
# INTERFACE
# - argument 1: sequence source file
#
sub ProgSeqPairs {
  my ($FileArg) = @_;
  my ($pReadList, $pRead);

  # get list of IDs
  # error handling see GetReadpoolStruct
  $pReadList = &GetReadpoolStruct (-file=>$FileArg, -IdCorrect=>1);

  # print report header
  printf "%s\n", join ("\t", 'id','id_ok','id_unique','anti_reads');

  # loop over sequence IDs
  # report
  foreach $pRead (sort {
    $a->{field}{lib} cmp $b->{field}{lib} or
    $a->{field}{plt} <=> $b->{field}{plt} or
    $a->{field}{coo} cmp $b->{field}{coo} or
    $a->{field}{prm} cmp $b->{field}{prm} or
    $a->{field}{num} <=> $b->{field}{num} or
                  $a <=> $b
  } @$pReadList) {
    printf "%s\n", join ("\t",
      $$pRead{id},
      int (! $$pRead{IdChanges}),
      int (! $$pRead{multiple}),
      int ($$pRead{NmbAnti})
      );
  }
}


# prepare list of missing counter reads
#
# INTERFACE
# - argument 1:  file containing list of read IDs
# - argument 2*: file containing list of clones for selection
#
# - global options
#   -debug      [STD]
#   -TraceRoot  Experiment file tree root (source), default:
#               $CorePath{ReadWatch}{ExperGroupDir}
#
# DESCRIPTION
# - list of suggested extra reads is printed to STDOUT
#
sub ProgReadMiss {
  my ($FileReadpool, $FileCloneslc) = @_;
  my ($debug, @RcReadIndex, $PathExperBase);
  my ($pIdList, $pGlobalIdx, %CloneSlcidx, $pCloneIdx);
  my ($pClone, @miss);

  ##############################################################################
  # pre-work

  # function parameters
  $debug = $ProgOpt{-debug};
  @RcReadIndex = (exists $ProgOpt{-RcReadIndex}) ?
    @{$ProgOpt{-RcReadIndex}} : ();
  $PathExperBase = $ProgOpt{-TraceRoot} || $CorePath{ReadWatch}{ExperGroupDir};

  # get list of IDs => read pool
  # prepare index on read pool
  $pIdList = &LoadFoid ($FileReadpool);
  $pCloneIdx = &ReadTreeIndex ($pIdList, -format=>'clone', -idref=>1);
  if ($FileCloneslc and -e $FileCloneslc) {
    %CloneSlcidx = map { ($_=>1) } &LoadFoid ($FileCloneslc);
    foreach (keys %$pCloneIdx) {
      if (! $CloneSlcidx{$_}) { delete $$pCloneIdx{$_}; }
    }
  }
  if ($debug) {
    printf STDERR "%s. constructed following lists/indices:\n", &MySub;
    printf STDERR "  read pool: %d\n", int(@$pIdList);
    printf STDERR "  clone list for selection: %d\n", int(keys %CloneSlcidx);
    printf STDERR "  clone index: %d\n", int keys %$pCloneIdx;
  }

  # we need the global read index to see what's already out there
  #   (all Experiment files)
  $pGlobalIdx = ReadWatch::ReadIndex->new(@RcReadIndex);
  $pGlobalIdx->{switch}{-FullMatch} = 1;
  $pGlobalIdx->{base}{exper} = $PathExperBase;

  ##############################################################################
  # work

  # loop over clones in index
  foreach $pClone (values %$pCloneIdx) {

    # skip completed clones
    # skip empty clones (error?)
    if (exists($$pClone{'1'}) and %{$$pClone{'1'}} and
        exists($$pClone{'-1'}) and %{$$pClone{'-1'}}) { next }
    unless (%{$$pClone{'1'}} or %{$$pClone{'-1'}}) { next }

    # sample successful side of partially sequenced clones
    # get name of first extra (counter) read
    foreach my $ItEnd (qw(1 -1)) {
      unless (exists($$pClone{$ItEnd}) and %{$$pClone{$ItEnd}}) { next }
      push @miss, &ReadidToFields ($pGlobalIdx->FirstExtra ($$pClone{id}
        .'.'. $ReadidField{prd2prm}{GSCJ}{$ReadidField{prdAnti}{$ItEnd}}) );
    }
  }

  # print sorted list of missing anti reads
  foreach (sort {
    $$a{lib} cmp $$b{lib} or
    $$a{plt} <=> $$b{plt} or
    $$a{coo} cmp $$b{coo} or
    $$a{prm} cmp $$b{prm}
  } @miss) {
    print "$_->{full}\n";
  }
}


# filter list of reads / read file paths according to contamination lists
#
# INTERFACE
# - argument 1: output filename stem
# - argument 2: path of input file
#
sub ProgReadFilter {
  my ($PathOutStem, $PathIn) = @_;
  my ($pTab, %FilterQual, %FilterExtra, $FilterId);
  my ($line, $id, $hIn, $hOutAll, $hOut);

  # load files of filter IDs
  $pTab = &LoadFoid ($CorePath{ReadWatch}{FoidQual}, -FltEmpty=>1);
  %FilterQual = map { ($_=>1) } @$pTab;
  $pTab = &LoadFoid ($CorePath{ReadWatch}{FoidExtra}, -FltEmpty=>1);
  %FilterExtra = map { ($_=>1) } @$pTab;
  $FilterId = '\b('. join ('|',
    'JAX4[a-ez]\d{2,3}[a-h]\d{2}\.[rs]\d',   # genome shotgun
    'JC1[a-b]\d{2,3}[a-h]\d{2}\.[rs]\d',     # chromosome 1
    'JC2[a-gx-z]\d{2,3}[a-h]\d{2}\.[rs]\d',  # chromosome 2
    ) .')\b';

  # open input/output files
  $hIn = FileHandle->new($PathIn,'r');
  $hOutAll = FileHandle->new("${PathOutStem}_all",'w');
  open (OUTSCHEM, ">${PathOutStem}_scheme");
  open (OUTCQUAL, ">${PathOutStem}_contamqual");
  open (OUTCXTRA, ">${PathOutStem}_contamextra");
  open (OUTFLT, ">${PathOutStem}_filtered");

  # read list of Experiment paths from STDIN
  while (defined ($line=<$hIn>)) {
    print $hOutAll $line;
    $line =~ m|.*/(.*)$|; $id = $1;

    if (0) {}
    elsif ($id !~ m/$FilterId/o) { $hOut = \*OUTSCHEM; }
    elsif ($FilterQual{$id})     { $hOut = \*OUTCQUAL; }
    elsif ($FilterExtra{$id})    { $hOut = \*OUTCXTRA; }
    else                         { $hOut = \*OUTFLT; }
    print $hOut $line;
  }

  close (OUTSCHEM);
  close (OUTCQUAL);
  close (OUTCXTRA);
  close (OUTFLT);
}


################################################################################
# mixed actions
################################################################################


# compare Sample and Experiment folder structures
#
# INTERFACE
# - argument 1: target subfolder (for both structures, e.g. 'AX4')
#
# - global options:
#   -debug      [STD]
#   -TraceRoot  Experiment file tree root (source), default:
#               $CorePath{ReadWatch}{ExperGroupDir}
#
# DESCRIPTION
# - iterations over Sample and Experiment data structures have been done
#   already via MAIN.
#
sub ProgFolderCmp {
  my ($target) = @_;
  my ($debug, $dbg2, $PathExperBase);
  my ($PathFileIdx, $PathIdx, $pIdx, $pIdxNew);
  my ($LabelFolder, $pFolder);
  my ($bRepet, $bStrange);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $PathExperBase = $ProgOpt{-TraceRoot} || $CorePath{ReadWatch}{ExperGroupDir};
  &TraceRootChk ($PathExperBase);
  &TraceRootChk ($CorePath{GSCJ}{RawHome} .'/'. &PathSplit($PathExperBase)->{name});
  printf "date/time: %s\n", &TimeStr();
  printf "trace base directory: %s\n", $PathExperBase;
  printf "Sample/Experiment target: %s\n", $target||"''";
  printf "comparing Sample and Experiment data\n";

  # make index to available data
  # => %{$ProgParam{store}{folder}}
  $ProgParam{action}{SampleDir} = \&SampleDirRegist;
  &SampleIterat ($CorePath{GSCJ}{RawHome} .'/'. &PathSplit($PathExperBase)->{name} .'/'. $target);
  $ProgParam{action}{ExperDir} = \&ExperDirRegist;
  &ExperIterat ($PathExperBase .'/'. $target, -recursive=>1);

  # check read index file(s) for duplicates
  print "\nDOUBLETTES IN READ INDEX:\n";
  foreach $PathFileIdx ('.all_reads', '.duplicate_reads', '.more_reads') {
    $PathIdx = $CorePath{GSCJ}{RawHome} .'/'. &PathSplit($PathExperBase)->{name}
      .'/'. $target .'/.read_names/'. $target . $PathFileIdx;
    unless (-e $PathIdx and -s $PathIdx) { next }
    # create index data structure (hash) using read names (3rd column) as keys
    $pIdxNew = &PlainToTable ($PathIdx, -TabType=>'HIA', -delimit=>'SpaceRet', -ColIdx=>2, -debug=>$dbg2);
    unless ($pIdx) {
      $pIdx = $pIdxNew;
      next;
    } else {
      foreach (keys %$pIdxNew) {
        if ($$pIdx{$_}) {
          printf "found doublette for read name %s\n", $_;
          $bRepet = 1;
        } else {
          $$pIdx{$_} = $$pIdxNew{$_};
        }
      }
    }
  }
  printf "read indices contain %d non-redundant entries\n", int keys (%$pIdx);
  undef $pIdxNew;
  $bRepet or print "no doublettes\n";
  undef $bRepet;

  # add index file data to index data structure of Sample/Experiment folders
  $PathIdx = $CorePath{GSCJ}{RawHome} .'/'. &PathSplit($PathExperBase)->{name}
    .'/'. $target .'/.read_names/'. $target . '.all_reads';
  $pIdx = &PlainToTable ($PathIdx, -TabType=>'HIA', -delimit=>'SpaceRet', -debug=>$dbg2);
  foreach (keys %$pIdx) {
    $LabelFolder = $_;
    $LabelFolder =~ s/^Results_//;
    $ProgParam{store}{folder}{$LabelFolder} ||= {};
    $ProgParam{store}{folder}{$LabelFolder}{index} = 1;
  }

  # loop over indexed Sample/Experiment folders
  # to verify it's existence in both data structures
  print "\nERROR IN EXISTENCE CROSS-CHECK\n";
  foreach (sort keys %{$ProgParam{store}{folder}}) {
    $pFolder = $ProgParam{store}{folder}{$_};

    # report strange guys
    unless ($$pFolder{raw} and $$pFolder{exper} and $$pFolder{index}) {
      unless ($bStrange) {
        print "\traw\texper\tindex\n";
      }
      $bStrange = 1;
      printf "%s\t%s\t%s\t%s\n", $_,
        $$pFolder{raw}? '+':'-', $$pFolder{exper}? '+':'-', $$pFolder{index}? '+':'-';
    }
  }
  $bStrange or print "*** NONE ***\n";
  undef $bStrange;

  # tidy up
  print "\n";
}
# $Id: GscjRead.pl,v 1.28 2008/06/11 08:44:58 szafrans Exp $
