#! /usr/local/bin/perl
################################################################################
#
#  Perl AlnK Alignment Suite
#  Main Program
#
#  copyright (c)
#  Karol Szafranski on behalf of IMB Jena, Dept. Genome Analysis, 1998-2002,
#    szafrans@imb-jena.de
#
################################################################################
#
#  DESCRIPTION
#
# - See function &usage for description of command line syntax
#
# - each function comes along with a description at the beginning of the code
#   block
#
# - The code makes use of the Clustal W program. It should be properly
#   installed and accessible at path $CorePath{call}{clustalw}.
#
# - The code makes use of an external text editor. It's path should be
#   specified in $CorePath{call}{edit}.
#
# - A separate documentation of console commands is available under
#   $CorePath{call}{MeInstDoc}/Align.html
#
# - Project filenames cannot contain the "," character because it's the
#   data field delimiter in the project argument.  Additionally, project names
#   (stated in the project header of the project file) cannot contain the
#   "'" character or symbols that have any special meaning in regular
#   expressions. You're on the safer side if you only use word characters.
#
# - For description of the alignment project data structure see section
#   'alignment project data structure' in SeqLab::AlnProj.pm.
#
################################################################################
#
#  FUNCTIONS, DATA
#
# - MAIN
#   %GlobStore
#   $ProgFile,$ProgFstump
#   %ProgParam
#   $ProgMode,%ProgOpt,@ProgArg
#
# - usage help, command line arguments
#   &usage
#   &AddSwitch
#
# - user session: core
#   &SessionMain
#   &SessionDialog
#   &SessionOpen (currently building place)
#   &SessionClose (currently building place)
#   &SessionSave
#   &SessionExit
#   &SessionDismiss
#
# - user session: operation interface
#   &SessionParam
#   &SessionRevcompl
#   &SessionExtensShow
#   &SessionSort
#   &SessionEdit
#   &SessionReport
#   &SessionEntryAdd
#   &SessionSplit
#   &SessionAppend
#
# - BLAST session
#   &BlastsessMain
#   &BlastsessAdd
#
# - extract information
#   &SessionPrint
#   &ExtractFasta
#   &ExtractMatrix
#   &ExtractLogo
#
# - report
#   &ReportAlign
#   &ReportCover
#   &ReportDist
#   &ReportEnds
#   &ReportParam
#   &ReportStatist
#
# - alignment construction
#   &AlnprojEntryAdd
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - no use without input path at the moment
#   session command 'load' or 'open' is missing.
#
# - implement range syntax 'off..end'
#
# - use translator %SeqLab::SeqBench::SeqTypeBasic
#   to determine basic seq type when needed
#
# - look also for notes in the header of each function block
#
################################################################################


# global constants and variables
# also used by modules as a unique global data anchor
our %GlobStore;

# include path(s), includes
BEGIN {
  unshift @INC, grep{$_} split(/:+/,$ENV{KPERLPATH}||$ENV{PERLPATH}||'');
}
#use strict; use warnings;  # OK 20040813
use FileHandle;
use MainLib::StrRegexp;
use MainLib::Data;
use MainLib::Path;
use MainLib::Cmdline;
use MainLib::File;
use MainLib::FileTmp;
use MainLib::Misc;
use Math::Calc;
use Math::Plot2D;
use Math::PlotImg;
use database::Table;
use SeqLab::SeqBench;
use SeqLab::SeqFormat;
use SeqLab::SeqComp qw(&MatrixSimrelCalc);
use SeqLab::Align;
use SeqLab::AlnProj;
use SeqLab::Blast;


# script ID
# - program name as specified on the command line
our $ProgFile = ( split('/',__FILE__) )[-1];
our $ProgFstump=$ProgFile; $ProgFstump=~s/\.\w{1,4}$//;

# global constants (esp. default values)
our %ProgParam;
$ProgParam{Blast}{ThreshQualRel}      = 0.976;
$ProgParam{Blast}{ThreshExpectMultip} = 10000;
$ProgParam{Blast}{ThreshScoreMultip}  = 0.90;
$ProgParam{default}{DbStatist} = "$CorePath{GSCJ}{DictyHome}/$CorePath{GSCJ}{DictyPre}";
$ProgParam{default}{OutImgWidth} = 640;
$ProgParam{default}{ProgMode} = 'session';

# working desk
$ProgParam{store} = undef;


# manage I/O #####################################

# organise I/O handles
&Unbuffer();

# organise tmp files
# - encapsulation in a sub{} allows MainLib::FileTmp to identify the calling
#   package
my $pcFT = sub{ $ProgParam{TmpManag} = MainLib::FileTmp->new(); };
&$pcFT;


# command line interface #########################
# NOTE:
# - &GetoptsNArgs (via &AddSwitch) modifies global variables:
#   $ProgMode @ProgArg %ProgOpt
# - @ProgArg may be pre-filled in &GetoptsNArgs via &AddSwitch (option -fofn)

# arguments, switches, default subprogram
our $ProgMode = undef;
our @ProgArg = ();
our %ProgOpt = ();
@ProgArg = &GetoptsNArgs();
$ProgMode ||= $ProgParam{default}{ProgMode};

# eventually open LOG file
if ($ProgOpt{-log}) {
  $ProgOpt{LogFile} = ($ProgOpt{-log}!=1) ? $ProgOpt{-log} : undef;
  &LogOpen (-file=>$ProgOpt{LogFile}, -stamp=>$ProgFstump, -prog=>"$ProgFile -$ProgMode");
}
END {
  $ProgOpt{-log} and &LogClose();
}


# work flow manifold #############################

# chain to program mode without input argument(s)
if (0) { }
elsif (!@ARGV or $ProgMode=~m/^h(elp)?$/i) { &usage() }

# ensure input argument(s)
unless (@ProgArg) {
  die "ERROR: input arguments missing\n";
}
# validate input argument(s)
# currently, we do not do that because syntax $file,$prj is allowed
# foreach (@ProgArg) {
#   ( -s $_ ) or printf STDERR "WARNING: input file %s does not exist or has zero length\n", $_||"''";
# }

# chain to program mode (with input argument(s))
if (0) { }
elsif ($ProgMode =~ m/^blast$/i) {
  &BlastsessMain (shift @ProgArg);
}
elsif ($ProgMode =~ m/^session$/i) {
  &SessionMain (shift @ProgArg);
}
else {
  die "ERROR: unknown program mode or switch '$ProgMode'\n";
}

# exit script successfully
# cf. END blocks!
exit 0;


################################################################################
# usage help, command line arguments
################################################################################


sub usage {
  print "\n";
  print <<END_USAGE;
DESCRIPTION
 suite for managing sequence alignment projects. In default mode, the
 program functions as a console. A detailed documentation of console commands
 is available under <package>/DocExample/Align.html.

COMMAND LINE SYNTAX
 $ProgFile  -<ModeSwitch> [-<OptionalSwitch> ...] <Arg1> [<Arg2> ...]

Arguments
---------
 Typically, args specify alignment input files.  Accepted formats are:
 internal *.alk or Clustal W format.

path arguments:
 Relative paths will be resolved according to the pwd. Prefixes "~" and "~uid"
 are resolved to the home directories. Path "-" resolves to STDIN or STDOUT,
 depending on the context.

ModeSwitch (case-insensitive)
-----------------------------
<none>            default ModeSwitch -$ProgParam{default}{ProgMode} if program arguments are given.
                  Otherwise like ModeSwitch -help.
-blast            add BLAST match HSPs to alignment project
-h(elp)           output command line syntax description and exit
-session          alignment project session (default mode).

OptionalSwitch (case-insensitive)
---------------------------------
switch argument types: B:=boolean, F:=floating point/scientific, N:=integer,
S:=string, X:=varying type.

-BlastLimit=N     limit BLAST search to first N BLAST hits
-debug(=N)        print debug protocol to STDOUT/STDERR
                  N           debug depth value
-log(=S)          redirect STDOUT and STDERR to LOG file.
                  S           log file path, default path worked out
                              automatically from built-in directives.
-noDB             don't refer to original sequence data from the database.
                  this switch doesn't work with the ModeSwitch -Session.
-OutImgTransp     turn image's background transparent
-OutImgWidth=N    define the output image's pixel width
-script           for mode '-session' this option forces the program to behave
                  like an interpreter rather than a command line console. No
                  prompt is printed out and dialogue functions are suppressed.
-timer            print time-performance protocol to STDOUT.
-v(erbose)        print extended progress report to STDOUT.

Environment Variables
---------------------
 \$PERLPATH        primary search path for Perl package look-up
 \$TEMPPATH        directory for storage of temporary files, default /tmp

Temporary Files
---------------
 The program may generate temporary files. These will be placed either in a
 directory specified by \$ENV{TEMPPATH} or in /tmp.
END_USAGE
  print "\n";
  exit 0;
}


# add program switches to global table (hash)
#
# INTERFACE
# - argument 1:  switch argument without leading '-'
#
# - global data:
#   $ProgMode
#   %ProgOpt  switch data which gets processed here
#
# DESCRIPTION
# - this function gets called by &MainLib::Misc::GetoptsNArgs
# - switch arguments are tested for validity. Arguments are parsed with highest
#   possible tolerance. This way, syntax errors can reported in accordance to
#   the actual switch, rather than reporting ANY syntax error.
#
sub AddSwitch {
  my $switch = shift;

  # optional switches
  if ($switch =~ m/^BlastLimit=(\d+)?$/i) {
    $ProgOpt{-BlastLimit} = $1;
    return;
  }
  if ($switch =~ m/^debug(=(\d+))?$/i) {
    $ProgOpt{-debug} = defined($2) ? int($2) : 1;
    return;
  }
  if ($switch =~ m/^log(=(.*))?$/i) {
    $ProgOpt{-log} = $2 ? &PathExpand($2) : 1;
    return;
  }
  if ($switch =~ m/^noDB$/i) {
    $ProgOpt{-noDB} = 1;
    return;
  }
  if ($switch =~ m/^OutImgTransp(ar)?$/i) {
    $ProgOpt{-OutImg} ||= '';
    $ProgOpt{-OutImgTransp} = 1;
    return;
  }
  if ($switch =~ m/^OutImgWidth=(\d+)?$/i) {
    $ProgOpt{-OutImg} ||= '';
    $ProgOpt{-OutImgWidth} = $1;
    return;
  }
  if ($switch =~ m/^script$/i) {
    $ProgOpt{-script} = 1;
    return;
  }
  if ($switch =~ m/^timer$/i) {
    $ProgOpt{-timer} = 1;
    return;
  }
  if ($switch =~ m/^v(erbose)?$/i) {
    $ProgOpt{-verbose} = 1;
    return;
  }

  # program mode switches
  if (defined($ProgMode)) {
    die sprintf "ERROR: multiple specification of program mode or unknown switch, %s and %s\n",
      '-'.($ProgMode||"''"), '-'.($switch||"''");
  }
  else {
    $ProgMode = $switch;
    return;
  }
}


################################################################################
# user session: core
################################################################################


# session for alignment project
#
# INTERFACE
# - argument 1: project descriptor: (path,name) of project
#
# DESCRIPTION
# - program exit door is in SessionDialog. Only if there's no project
#   found this function terminates the program.
#
sub SessionMain {
  my ($Target) = shift;
  my ($PathProject, $NameProj, $pProj);

  # function parameters
  ($PathProject, $NameProj) = split (/,/, $Target);

  # read project
  $pProj = &AlnprojGet ($PathProject, $NameProj,
    -dialog => ! ($ProgOpt{-script} || $ProgOpt{-log}),
    -noDB   => $ProgOpt{-noDB},
    -timer  => $ProgOpt{-timer},
    -debug  => $ProgOpt{-debug},
    );
  unless ($pProj) {
    die sprintf "ERROR: unable to find %s in file %s\n",
      $NameProj ? "project $NameProj" : 'any project', $PathProject||"''";
  }
  &AlnCompress ($$pProj{align});

  # switch to dialogue
  &SessionDialog ($pProj);
}


# dialogue for user session
#
# INTERFACE
# - argument 1: reference to project data
#
# DESCRIPTION
# - function has program exit doors via:
#   - command 'exit'    -> function SessionExit
#   - command 'dismiss' -> function SessionDismiss
#
sub SessionDialog {
  my $pProj = shift;
  my $debug = 0;
  my (%Action, $order, @param);

  # hash of possible user commands and corresponding functions
  %Action = (
    add        => \&SessionEntryAdd,
    append     => \&SessionAppend,
#    close      => \&SessionClose,
    del        => \&AlnprojEntryDel,
    dismiss    => \&SessionDismiss,
    edit       => \&SessionEdit,
    exit       => \&SessionExit,
    fasta      => \&ExtractFasta,
    hide       => \&AlnprojExtensHide,
    logo       => \&ExtractLogo,
    matrix     => \&ExtractMatrix,
    name       => \&AlnprojName,
#    open       => \&SessionOpen,
    param      => \&SessionParam,
    print      => \&SessionPrint,
    report     => \&SessionReport,
    resolveunk => \&SessionResolveUnk,
    revcompl   => \&SessionRevcompl,
    save       => \&SessionSave,
    show       => \&SessionExtensShow,
    sort       => \&SessionSort,
    split      => \&SessionSplit,
    validate   => \&AlnprojExtensValid,
    );

  while (1) { # console loop, anonymous block doesn't work

    # print prompt, get action
    unless ($ProgOpt{-script}) {
      print 'AlnK console> ';
    }
    chop ($order=<STDIN>);

    # parse command
    ($order, @param) = split (/ +/, $order);
    $order =~ tr/A-Z/a-z/;
    unshift @param, $pProj;
    $debug and printf STDERR "%s. got command '$order'\n", &MySub;

    # action
    $order or next;
    if ($Action{$order}) {
      &{$Action{$order}} (@param);
    } else {
      unless ($ProgOpt{-script}) {
        print "unknown command '$order'\n";
      }
    }
  }

}


# change project in session
#
# INTERFACE
# - argument 1: reference to current project data
# - argument 2: (path,name) of new project
#
# - global options:
#   -debug      [STD]
#
sub SessionOpen {
  my ($pProj, $ArgIn) = @_;
  my ($debug, $dbg2);
  my ($PathIn, $NameProj);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # save current project
  if ($pProj and %$pProj) { &SessionClose ($pProj); }

  # work out project directives
  ($PathIn, $NameProj) = split (/,/, $ArgIn);

  # open new project
  # project structure is replaced by the new project
  %$pProj = %{ &AlnprojGet ($PathIn, $NameProj,
    -dialog => ! ($ProgOpt{-script}||$ProgOpt{-log}),
    -noDB   => $ProgOpt{-noDB},
    -timer  => $ProgOpt{-timer},
    -debug  => $dbg2) };
  unless (%$pProj) {
    die sprintf "ERROR: unable to find %s in file %s (arg was %s)\n",
      $NameProj ? "project $NameProj" : 'any project', $PathIn||"''", $ArgIn;
  }

  # nice alignment
  &AlnCompress ($$pProj{align});
}


# close current project of the session
#
# INTERFACE
# - argument 1: reference to project data
#
# - global options:
#   -debug      [STD]
#
sub SessionClose {
  my $pProj = shift;
  my ($debug);

  # work out function parameters
  $debug = $ProgOpt{-debug};

  # save file if changed
  if ($$pProj{changes}) {
    if ($ProgOpt{-script} or $ProgOpt{-log}) {
      &SessionSave ($pProj);
    } else {
      print  "changes in current project, save project?";
      if (&QueryConfirm()) {
        &SessionSave ($pProj);
      }
    }
  } elsif ($debug) {
    print  "no changes in current project\n";
  }

  # clear container of sequence originals
  &SeqOrigClear()
}


# save project
#
# INTERFACE
# - argument 1: reference to project data
# - argument 2: output file path, cf. description
#
# - options:
#   -force      overwrite without interaction if file exists
#
# - global options:
#   -debug      [STD]
#
# DESCRIPTION
# - without argument 2 (stated as "''") it functions as a 'save', with output
#   file argument it functions as a 'save as'. Note the number of 2 arguments is
#   a MUST if options are specified.
#
sub SessionSave {
  my ($pProj, $PathOut, %opt) = @_;
  my ($debug, $dbg2);

  # work out function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # function parameters
  if (! $PathOut or $PathOut eq "''") {
    if ($debug and ! $$pProj{changes}) {
      print  "no changes in current project\n";
      # the action will always be performed although there may be no changes
      # at all. It's safest this way just a little bit more work which does
      # not hurt anyone.
    }
    $PathOut = $$pProj{Path};
  } else {
    # safe overwrite by user interaction
    if (-e $PathOut and ! $opt{-force} and ! $ProgOpt{-script} and ! $ProgOpt{-log}) {
      print "overwrite existing file?";
      unless (&QueryConfirm()) { return }
    }
  }

  # output file, return from SUB
  if ($debug) {
    printf "%s. saving project to file %s\n", &MySub, $PathOut;
    printf "%s. number of alignment entries: %d\n", &MySub, int @{$$pProj{align}};
  }
  if (&AlnprojToFile ($pProj, $PathOut, -debug=>$dbg2)) {
    $$pProj{Path} = $PathOut;
    $$pProj{changes} = 0;
    return;  # success
  } else {
    print  "an error occurred while saving the project\n";
    $PathOut = $ProgParam{TmpManag}->Create();
    &AlnprojToFile ($pProj, $PathOut, -debug=>$dbg2);
    printf "project saved as %s instead\n", $PathOut||"''";
    return;  # error
  }
}


# exit session
#
# INTERFACE
# - argument 1: reference to project data
#
# - global options:
#   -debug      [STD]
#
sub SessionExit {
  my $pProj = shift;
  my $debug = $ProgOpt{-debug};

  # save file if changed
  if ($$pProj{changes}) {
    if ($ProgOpt{-script} or $ProgOpt{-log}) {
      &SessionSave ($pProj);
    } else {
      print "changes in current project, save project?";
      if (&QueryConfirm()) {
        &SessionSave ($pProj);
      }
    }
  } elsif ($debug) {
    print "no changes in current project\n";
  }

  # exit program
  exit 0;
}


# exit session harshly
#
sub SessionDismiss {
  exit 0;
}


################################################################################
# user session: operation interface
################################################################################


# read or assign project parameter values
#
# INTERFACE
# - argument 1:  reference to project data
#                the project itself is changed if arg3 is specified!
# - argument 2*: string specifying parameter entry, default: parameter root
#                (resulting in output of the complete parameter data tree). The
#                hierarchy delimiter is '::'.
# - argument 3*: optional: parameter value for assignment
#
# - global options:
#   -debug      [STD]
#
sub SessionParam {
  my ($pProj, $ParamSpec, $val) = @_;
  my ($debug, $dbg2);
  my ($AccessNext, $ParamVal, $pParam);

  # work out parameters from arg(s)
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # get reference to parameter entry through recursion
  $debug and printf STDERR "%s. accessing parameter: %s\n", &MySub, join ('->', split (/::/, $ParamSpec));
  $debug and printf STDERR "%s. assigning value: %s\n", &MySub, defined($val) ? $val : 'NONE';
  $ParamVal = defined($val) ? $$pProj{param} : &DataClone($$pProj{param});
  foreach $AccessNext (grep{$_} split(/::/,$ParamSpec||'')) {
    if ($AccessNext =~ m/^\d+$/) {
      $$ParamVal[$AccessNext] ||= {};
      $pParam = \$$ParamVal[$AccessNext];
      $ParamVal = $$ParamVal[$AccessNext];
    } else {
      $$ParamVal{$AccessNext} ||= {};
      $pParam = \$$ParamVal{$AccessNext};
      $ParamVal = $$ParamVal{$AccessNext};
    }
  }

  # action:
  # - do assignment or
  # - print data (tree)
  if (defined ($val)) {
    $$pParam = $val;
  } else {
    &DataPrint ($ParamVal);
  }
}


# change project sequence entries to reverse-complement
#
# INTERFACE
# - argument 1: reference to project data
#               the project itself is changed!
#
# DEBUG, CHANGES, ADDITIONS
# - move switching of 'orient' field to &AlnRevcompl.
#
sub SessionRevcompl {
  my ($pProj) = @_;
  my ($CtI);

  # change alignment, switch values for strandedness
  $$pProj{align} = &AlnRevcompl ($$pProj{align});
  $$pProj{consens} = $$pProj{align}[0];
  for ($CtI=1; $CtI<@{$$pProj{align}}; $CtI++) {
    $$pProj{align}[$CtI]{orient} = ($$pProj{align}[$CtI]{orient} eq 'F') ? 'R' : 'F';
  }
  $$pProj{changes} = 1;
}


# replace 'unknown symbols' in sequence entries to consensus
#
# INTERFACE
# - argument 1: reference to project data
#               the project itself is changed!
#
sub SessionResolveUnk {
  my ($pProj, $ThreshReport) = @_;
  my ($debug, $dbg2);
  my ($pResolve);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # change alignment
  ($$pProj{align}, $pResolve) = &AlnResolveUnk ($$pProj{align},
    -report=>$ThreshReport, -debug=>$dbg2);

  # report vague replacements
  if (@$pResolve) {
    print  "uncertainties in resolving unknown symbols:\n";
    printf "%s\n", join ("\t", 'pos','id','consens','entropy','seqrange');
    foreach (sort { $a->{pos} <=> $b->{pos}; } @$pResolve) {
      printf "%s\n", join ("\t", @{$_}{'pos','id','consens','entropy','seqrange'});
    }
  } else {
    print  "resolving unknown symbols done - no uncertainties found\n";
  }
}


# show stretch extension
#
# INTERFACE
# - argument 1: reference to project data
#               data is explicitly changed.
# - arguments+: - length  length limit for extension. An optional sign
#                         specifies the stretch end to be extended (-: left,
#                         +: right).
#               - end     stretch end to be extended ('left' or 'right').
#               - ids     identifiers of stretches to be extended
#
# - global options:
#   -debug      [STD]
#
# DESCRIPTION
# - This is desired for the concept of hiding / showing extensions of stretch
#   ends in divergent SimClusters.
# - The referenced project data is explicitly changed. Changes aren't
#   registered in the flag $$pProj{changes}, as they are regarded as
#   'layout modifications'.
# - offset values of the sequence entries should refer to the validated
#   part of the sequence stretch (big letters).
# - Parameters are represented internally as a hash. $Param{id} is an
#   array of pointers to the sequence structures in @{$$pProj{align}}.
#
sub SessionExtensShow {
  my $LenMax = 100;
  my ($pProj, @PlusArg) = @_;
  my ($debug, $dbg2, %Param);
  my ($LenEff, $StrWork, $StretchStrPos, $temp);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  foreach $temp (@PlusArg) {

    # effective end for stretch extension
    if ($temp eq 'left' or $temp eq 'right') {
      $Param{$temp} = 1;
      next
    }

    # effective length for stretch extension
    if ($temp =~ m/^(-|\+)?(\d+)$/) {
      $Param{length} = $2;
      if ($1 eq '-') { $Param{left}  = 1; }
      if ($1 eq '+') { $Param{right} = 1; }
      next;
    }

    # effective IDs for stretch extension
    foreach (@{$$pProj{align}}) {
      if ($temp eq $$_{id}) {
        push @{$Param{id}}, $_;
        next;
      }
    }
  }

  # work out final parameters
  unless ($Param{id}) {
    @{$Param{id}} = @{$$pProj{align}};
    if ($Param{id}[0]{id} =~ m/$reAlnConsens/io) {
      shift @{$Param{id}};
    }
  }
  unless ($Param{length}) {
    $Param{length} = $LenMax;
  }
  if (! $Param{left} and ! $Param{right}) {
    $Param{left}  = 1;
    $Param{right} = 1;
  }

  # remove current stretch end extensions
  &AlnprojExtensHide ($pProj);

  # extend left end
  if ($Param{left}) {

    # insert left margin space
    foreach (@{$$pProj{align}}) {
      substr ($_->{sequence}, 0, 0) = '-' x  $Param{length};
    }

    # extend stretches
    foreach (@{$Param{id}}) {

      # get stretch extension
      $StrWork = &SeqExtend ($pProj, $_, -1, -debug=>$dbg2);
      $StrWork =~ tr/A-Z/a-z/;

      # extend sequence stretch
      $_->{sequence} =~ m/^-*/;
      $StretchStrPos = length $&;
      $LenEff = &Min (length $StrWork, $Param{length});
      substr ($_->{sequence}, $StretchStrPos - $LenEff, $LenEff) =
        substr ($StrWork, -$LenEff, $LenEff);
      $debug and printf STDERR "%s. extending %s at the left, extension: %s\n", &MySub,
        $_->{id}, $StrWork;
    }
  }

  # extend right end
  if ($Param{right}) {
    foreach (@{$Param{id}}) {

      # delete end space
      $_->{sequence} =~ s/-*$//;

      # get stretch extension
      $StrWork = &SeqExtend ($pProj, $_, 1, -debug=>$dbg2);
      $StrWork =~ tr/A-Z/a-z/;

      # extend sequence stretch
      $_->{sequence} .= substr ($StrWork, 0, $Param{length});
    }
  }

  # nice alignment
  &AlnMargins ($$pProj{align});
}


# sort sequence entries in project
#
# INTERFACE
# - argument 1: reference to project data
#               the project itself is changed!
# - argument X: offset   sort by sequence offset in the alignment
#
sub SessionSort {
  my $SortType = 'id';
  my ($pProj, @PlusArg) = @_;
  my ($ConsEntry);

  # function parameters
  foreach (@PlusArg) {
    if ($_ eq 'offset') {
      $SortType = 'offset';
    } else {
      printf STDERR "%s. WARNING: unknown argument '$_' in session call 'sort'\n", &MySub;
    }
  }

  # sort alignment, excluding consensus sequence
  $ConsEntry = shift @{$$pProj{align}};
  &AlnSort ($$pProj{align}, -SortType=>$SortType);
  unshift @{$$pProj{align}}, $ConsEntry;
  $$pProj{consens} = $$pProj{align}[0];

  # exit SUB
  $$pProj{changes} = 1;
  return;
}


# edit project alignment in external editor
#
# INTERFACE
# - argument 1: reference to project data
# - argument X: uncond   switch off the 'condensed' feature
#               1block   show alignment in one big Clustal W block
#
# - global options:
#   -debug      [STD]
#
# DESCRIPTION
# - project is left unchanged if an error occurs in &AlnprojClustalOpen.
#
sub SessionEdit {
  my $bCondens = 1;
  my $b1Block  = 0;
  my ($pProj, @PlusArg) = @_;
  my ($debug, $dbg2, $PathTempfile, $pProjNew);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  foreach (@PlusArg) {
    if ($_ eq 'uncond' or $_ eq 'uncomp') {
      $bCondens = 0;
    } elsif ($_ eq '1block') {
      $b1Block = 1;
    } else {
      print "Unknown argument '$_' in session call 'edit'\n";
    }
  }

  # save temporary Clustal W file
  $PathTempfile = $ProgParam{TmpManag}->Create();
  unless ($hOutClust = &GetWriteHandle($PathTempfile,-HdlOnSucc=>undef)) {
    return;
  }
  print $hOutClust &AlnprojClustalSprint ($$pProj{align},
    -compact    => $bCondens,
    -ConsensNum => 1,
   '-1block'    => $b1Block,
    -debug      => $dbg2);
  close $hOutClust;

  # call editor
  system "$CorePath{call}{edit} $PathTempfile";

  # pick up project from edited file
  $pProjNew = &AlnprojClustalOpen ($PathTempfile, undef,
    -dialog => 1,
    -noDB   => $ProgOpt{-noDB},
    -debug  => $dbg2);
  unless ($pProjNew) {
    printf STDERR "%s. ERROR: edited project is corrupt, project left unchanged\n", &MySub;
    printf STDERR "  edited file left as %s\n", $PathTempfile;
    return;
  }

  # tidy up
  # - compact alignment
  # - redirect consensus reference to updated consensus
  $$pProj{align} = &AlnCompress ($$pProjNew{align}, -debug=>$dbg2);
  $$pProj{consens} = $$pProj{align}[0];
  $$pProj{changes} = 1;
  $debug or unlink $PathTempfile;
}


# sort sequence entries in project
#
# INTERFACE
# - argument 1:  reference to project data
#                the project itself is changed!
# - argument 2*: additional arguments for report function
#
sub SessionReport {
  my %RepCall = (
    align   => \&ReportAlign,
    cover   => \&ReportCover,
    dist    => \&ReportDist,
    diverg  => \&ReportDist,
    ends    => \&ReportEnds,
    param   => \&ReportParam,
    statist => \&ReportStatist,
    );
  my ($pProj, $RepType, @PlusArg) = @_;

  # chain to subroutine
  if (defined $RepCall{$RepType}) {
    &{$RepCall{$RepType}} ($pProj, @PlusArg);
  } else {
    if ($RepType) {
      print  STDERR "ERROR: unknown report type '$RepType'\n";
    } else {
      print  "available report types:\n";
      map { print  "$_\n" } sort keys %RepCall;
    }
  }
}


# add new sequence stretch to alignment
#
# INTERFACE
# - argument 1: reference to project data
# - argument 2: identifier of sequence to be added
# - argument 3-...: see &AlnprojEntryAdd.
#
# - global options:
#   -debug      [STD]
#
# DESCRIPTION
# - work is done by &AlnprojEntryAdd.
#
sub SessionEntryAdd {
  my ($pProj, $SeqID, @PlusArg) = @_;
  my ($debug, $dbg2);
  my (%PairInfo, $Identity, $bConsensChg);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # check if ID already exists
  foreach (@{$$pProj{align}}) {
    if ($SeqID eq $_->{id}) {
      print "sequence already contributing to project, continue anyway?";
      if (! &QueryConfirm()) { return }
      else { last }
    }
  }

  # move pair info to appropriate structure, do work
  %PairInfo = (
    strand     => $PlusArg[0],
    StretchOff => $PlusArg[1],
    StretchEnd => $PlusArg[2],
    AlnOff     => $PlusArg[3],
    AlnEnd     => $PlusArg[4],
    );
  ($Identity, $bConsensChg) =
    &AlnprojEntryAdd ($pProj, $SeqID, \%PairInfo,
      -dialog => ! ($ProgOpt{-script} || $ProgOpt{-log}),
      -debug  => $dbg2);

  # analyse and report results
  printf "new sequence stretch added, %f rel. identity to consensus sequence\n", $Identity;
  if ($bConsensChg) {
    print "consensus length has changed\n";
  }
}


# split project into two parts - add parts to project file - restart
#
# INTERFACE
# - argument 1: reference to project data
# - argument 2: consensus length of the 5' part
#
# - global options:
#   -debug      [STD]
#
# DESCRIPTION
# - length value of argument 2 refers to the pure consensus sequence.
# - The split into two parts according to the given 5' end length value.
#   The mother project and the two part projects are written back to
#   a temporary project file and then the file is handed over to
#   AlnprojGet allowing to choose a project to be edited next.
#
# DEBUG, CHANGES, ADDITIONS
# - So far, the function doesn't work in script mode. It's because there's
#   a dialogue in the course of temporary project file reload.
#
sub SessionSplit {
  my ($pProj,$LenCons5end) = @_;
  my $debug = $ProgOpt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  unless ($LenCons5end) {
    printf STDERR "%s. argument ERROR, syntax is:\n", &MySub;
    print  STDERR "  split <Len5primePart>\n";
    return;
  }
  if ($ProgOpt{-script}) {
    printf STDERR "%s. SORRY, ERROR: i don't work in script mode\n", &MySub;
    return;
  }

  # split project, delete empty entries in child projects
  $debug and printf STDERR "%s. calling AlnprojSplit\n", &MySub;
  my ($pProj5end,$pProj3end) = &AlnprojSplit ($pProj, $LenCons5end, -consens=>1, -debug=>$dbg2);
  unless ($pProj5end and $pProj3end) {
    printf STDERR "%s. ERROR in AlnprojSplit: missing at least one project fragment\n", &MySub;
    return;
  }
  &AlnCompress ($$pProj5end{align}, -debug=>$dbg2);
  &AlnCompress ($$pProj3end{align}, -debug=>$dbg2);

  # names for project split files
  $$pProj5end{name} = $$pProj{name} .'_5part';
  $$pProj3end{name} = $$pProj{name} .'_3part';

  # save temporary project file
  my $PathTmp = $ProgParam{TmpManag}->Create();
  $debug and printf STDERR "%s. saving temporary file %s\n", &MySub, $PathTmp||"''";
  unless (open (PROJFILE, ">$PathTmp")) {
    printf STDERR "%s. ERROR: unable to open temporary path %s\n", &MySub, $PathTmp||"''";
    return;
  }
  print PROJFILE $$pProj{DocAnte};
  print PROJFILE &AlnprojSprint($pProj,-debug=>$dbg2);
  print PROJFILE "\n";
  print PROJFILE &AlnprojSprint($pProj5end,-debug=>$dbg2);
  print PROJFILE "\n";
  print PROJFILE &AlnprojSprint($pProj3end,-debug=>$dbg2);
  print PROJFILE $$pProj{DocPost};
  close PROJFILE;

  # reload temporary project file
  my $pProjNew = &AlnprojGet ($PathTmp, $$pProj{name},
    -dialog => ! ($ProgOpt{-script} || $ProgOpt{-log}),
    -debug  => $dbg2);
  unless ($pProjNew) {
    printf STDERR "%s. ERROR, BUG: finally unable to find project '%s' in file '%s'\n", &MySub,
      $$pProj{name}, $PathTmp||"''";
    return;
  }
  $debug or unlink $PathTmp;
  &AlnCompress ($$pProjNew{align});

  # replace project
  $$pProjNew{Path} = $$pProj{Path};
  %$pProj = %$pProjNew;
  $$pProj{changes} = 1;
}


# append project
#
# INTERFACE
# - argument 1: reference to project data
# - argument 2: end for append ('5' or '3')
# - argument 3: project descriptor for project to append
#
# - global options:
#   -debug      [STD]
#
# DESCRIPTION
# - The joint project will have the project parameters of the mother
#   project (which is the currently active project).
# - The project to be appended will be load from the disk space, although
#   it might be part of the project file of the current project.
#
sub SessionAppend {
  my ($pProj, $End, $ProjDescr) = @_;
  my ($debug, $dbg2);
  my (@Descr, $pProjNew, $pProjJoin);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  @Descr = split (/,/, $ProjDescr);
  if (@Descr < 1 or @Descr > 2) {
    printf STDERR "%s. argument ERROR, syntax is:\n", &MySub;
    print  STDERR "  append <EndNum> <ProjectDescriptor>\n";
    return;
  } elsif (@Descr < 2) {
    push @Descr, undef;
  }
  if ($End ne '5' and $End ne '3') {
    printf STDERR "%s. argument ERROR for end specification\n", &MySub;
    return;
  }
  &AlnprojExtensHide ($pProj);

  # open second project
  $pProjNew = &AlnprojGet (@Descr,
    -dialog => ! ($ProgOpt{-script} || $ProgOpt{-log}),
    -noDB   => $ProgOpt{-noDB},
    -timer  => $ProgOpt{-timer},
    -debug  => $dbg2);
  unless ($pProjNew) {
    printf STDERR "%s. ERROR: unable to find referenced project %s\n", &MySub, join (' / ', @Descr);
    return;
  }
  &AlnCompress ($$pProjNew{align});
  &AlnprojExtensHide ($pProjNew);

  # join projects
  if      ($End eq '5') {
    $pProjJoin = &AlnprojJoin ($pProjNew, $pProj);
  } elsif ($End eq '3') {
    $pProjJoin = &AlnprojJoin ($pProj, $pProjNew);
  }
  unless ($pProjJoin) {
    printf STDERR "%s. ERROR in \&AlnprojJoin\n", &MySub;
    return;
  }

  # replace alignment in current project
  $$pProj{align} = $$pProjJoin{align};
  $$pProj{consens} = $$pProj{align}[0];
  $$pProj{changes} = 1;
}


################################################################################
# BLAST session
################################################################################


# BLAST session
#
# INTERFACE
# - argument 1: project descriptor: (path,name) of project
#
# - global options:
#   -debug      [STD]
#
# DESCRIPTION
# - function has the main program exit door
#
sub BlastsessMain {
  my ($target, %opt) = @_;
  my $debug = $ProgOpt{-debug} || 0;
  my $dbg2  = $debug ? $debug-1 : undef;

  # work out and check project directives
  my ($PathProj,$NameProj) = split (/,/,$target);
  if ($ProgOpt{-noDB}) {
    unless (%SeqLab::AlnProj::SeqOrig) {
      %SeqLab::AlnProj::SeqOrig = ( fake=>'' );  # fake to suppress sequence preload
    }
  }

  # read project
  my $pProj = &AlnprojGet ($PathProj, $NameProj,
    -dialog => 0,
    -noDB   => $ProgOpt{-noDB},
    -timer  => $ProgOpt{-timer},
    -debug  => $dbg2);
  unless ($pProj) {
    printf STDERR "ERROR: unable to find project %s in file %s\n", $NameProj||"''", $PathProj||"''";
    exit 1;
  }
  &AlnCompress ($$pProj{align});
  my $NmbEntryAnte = @{$$pProj{align}};
  my $NmbEntryAdd;

  # print protocol
  if ($debug or $ProgOpt{-verbose} or $ProgOpt{-log}) {
    print "starting BLAST search for new alignment entries\n";
    print "  project file: $PathProj\n";
    print "  project name: $NameProj\n";
  }
  if ($debug) {
    print  "  all BLAST parameters:\n";
    foreach (sort keys %{$$pProj{param}{BlastEntry}}) {
      printf "    %s = %s\n", $_, $$pProj{param}{BlastEntry}{$_};
    }
  }
  elsif ($ProgOpt{-verbose}) {
    printf "  BLAST parameters: M=%d, N=%d, W=%d\n", @{$$pProj{param}{BlastEntry}}{'M','N','W'};
    print  "  threshold criteria:\n";
    printf "    %s identity: %f\n", '%', $$pProj{param}{BlastEntry}{ThresholdIdentity};
    if ($$pProj{param}{BlastEntry}{ThresholdScore}) {
      printf "    score:      %d\n", $$pProj{param}{BlastEntry}{ThresholdScore};
    } else {
      printf "    expectancy: %E\n", $$pProj{param}{BlastEntry}{ThresholdExpect};
    }
    if ($$pProj{param}{BlastEntry}{NeglectID}) {
      printf "  IDs to be ignored (RegExp): %s\n", $$pProj{param}{BlastEntry}{NeglectID};
    }
  }
  if ($debug or $ProgOpt{-verbose} or $ProgOpt{-log}) {
    print "\n";
  }

  # do BLAST search, add sequence entries
  &BlastsessAdd ($pProj);

  # print protocol
  $NmbEntryAdd = @{$$pProj{align}} - $NmbEntryAnte;
  printf "added %d %s\n", $NmbEntryAdd, ($NmbEntryAdd==1) ? 'sequence' : 'sequences';
  if ($$pProj{changes}) {
    &SessionSave ($pProj);
  }

  # exit program
  exit 0;
}


# add new sequences by doing BLAST search
#
# INTERFACE
# - argument 1: reference to project data
#
# - global options:
#   -BlastLimit limit BLAST search to first N BLAST hit
#   -debug      [STD]
#   -timer      [STD]
#
# DESCRIPTION
# - addition of new sequences is done by &AlnprojEntryAdd.
# - function returns to calling function without return value.
#
sub BlastsessAdd {
  my ($pProj, %opt) = @_;
  my ($debug, $dbg2, $time);
  my ($cons, $BlastPathQuery);
  my (%BlastOpt, $BlastStrProgDb, $BlastStrParam, $pBlastParamEff, $CallBlast);
  my ($pBlparse, $pBlastMatch, $BlastCtSize);
  my ($NeglectId, $MatchId, $MatchHspNmb, $pMatchHsp, $bThrowout);
  my (%PairInfo, $RelId, $bConsChg, $SeqCons, $SeqNew);

  # work out options and function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # work out BLAST program, BLAST database, parameter set
  BlastsessAddBlast: {
    if ($ProgOpt{-timer}) { $time = &Sum((times)[0,2]) }
    %BlastOpt = (
      -program => $$pProj{param}{BlastEntry}{Program},
      -db      => $$pProj{param}{BlastEntry}{Database},
      -param   => $$pProj{param}{BlastEntry}{ParameterSet},
      -ValE    => $$pProj{param}{BlastEntry}{ThresholdExpect} * $ProgParam{Blast}{ThreshExpectMultip},
      -ValS    => $$pProj{param}{BlastEntry}{ThresholdScore}  * $ProgParam{Blast}{ThreshScoreMultip},
      -ValM    => $$pProj{param}{BlastEntry}{M},
      -ValN    => $$pProj{param}{BlastEntry}{N},
      -ValW    => $$pProj{param}{BlastEntry}{W},
      -ValB    => int (&Max (100, 1.5 * @{$$pProj{align}}, 1.5 * $$pProj{param}{BlastEntry}{SizeReport})),
      -debug   => $dbg2);
    ($BlastStrProgDb, $BlastStrParam, $pBlastParamEff) = &BlastParamStr (%BlastOpt);

    # save consensus sequence as query file
    $BlastPathQuery = $ProgParam{TmpManag}->Create(-touch=>1);
    $cons = $$pProj{param}{BlastEntry}{MaskPoly} ?
      &MaskPoly ($$pProj{consens}{sequence}, -ThreshPoly=>$$pProj{param}{BlastEntry}{MaskPoly}, -SmbMask=>'N', -debug=>$dbg2) :
      $$pProj{consens}{sequence};
    &WriteFile ($BlastPathQuery, &SeqentryToFasta (
      { sequence=>$cons, id=>'consensus' },
      -phrase => "alignment project $$pProj{name}",
      -pure   => 1));

    # do BLAST, parse report, error handling
    $CallBlast = "$BlastStrProgDb $BlastPathQuery $BlastStrParam";
    $pBlparse = &BlastStructBycall ($CallBlast, -WarnFatal=>1, -debug=>$dbg2);
    unless ($pBlparse and %$pBlparse) {
      printf STDERR "%s. ERRROR: unable to start BLAST process or to parse BLAST report\n", &MySub;
      return;
    }
    if (@{$$pBlparse{fatal}}) { return }
    unless (%{$$pBlparse{Match}}) {
      print "no matches found in BLAST report\n";
      if ($debug or $ProgOpt{-verbose}) {
        print "  no fatal errors\n";
        if (@{$$pBlparse{warn}}) {
          print "  warning messages:\n";
          print join '', @{$$pBlparse{warn}};
        } else {
          print "  no warnings\n";
        }
      }
      return;
    }
    $debug or unlink $BlastPathQuery;
    $ProgOpt{-timer} and printf "%s. CPU time for BLAST (incl. subprocess): %.3f s\n", &MySub, &Sum ((times)[0,2])-$time;

    # loop over all BLAST matches
    $pBlastMatch = $$pBlparse{Match};
    $BlastCtSize = 0;
    $NeglectId = $$pProj{param}{BlastEntry}{NeglectID};
    BlastsessAddMatch:
    foreach $MatchId (sort {
        $$pBlastMatch{$b}{score} <=> $$pBlastMatch{$a}{score} or
        $a cmp $b;
        } keys %$pBlastMatch) {
      $BlastCtSize ++;
      if ($ProgOpt{-BlastLimit} and $BlastCtSize > $ProgOpt{-BlastLimit}) {
        last BlastsessAddBlast;
      }

      # check if ID already exists in the project
      foreach (grep { $MatchId eq $_->{id} } @{$$pProj{align}}) {
        if ($debug or $ProgOpt{-verbose}) {
          printf "Matching entry %s already in project\n", $MatchId;
          printf "  rel. identity of HSP0: %f\n", $$pBlastMatch{$MatchId}{HSP}[0]{RelId};
          printf "  high. score: %d\n", $$pBlastMatch{$MatchId}{score};
          printf "  low. expectancy: %E\n", $$pBlastMatch{$MatchId}{expect}
            if ($$pProj{param}{BlastEntry}{ThresholdExpect});
        }
        next BlastsessAddMatch;
      }

      # ignore these IDs
      if ($NeglectId and $MatchId =~ m/$NeglectId/) {
        if ($debug or $ProgOpt{-verbose}) {
          printf "Matching entry %s ignored\n", $MatchId;
          printf "  rel. identity of HSP0: %f\n", $$pBlastMatch{$MatchId}{HSP}[0]{RelId};
          printf "  high. score: %d\n", $$pBlastMatch{$MatchId}{score};
          if ($$pProj{param}{BlastEntry}{ThresholdExpect}) {
          printf "  low. expectancy: %E\n", $$pBlastMatch{$MatchId}{expect};
          }
        }
        next BlastsessAddMatch;
      }
      if ($$pProj{param}{BlastEntry}{problem} and
        grep {$_ eq $MatchId} keys %{$$pProj{param}{BlastEntry}{problem}}
      ) {
        if ($debug or $ProgOpt{-verbose}) {
          printf "Matching entry %s skipped (caused problems previously)\n", $MatchId;
          printf "  rel. identity of HSP0: %f\n", $$pBlastMatch{$MatchId}{HSP}[0]{RelId};
          printf "  high score: %d\n", $$pBlastMatch{$MatchId}{score};
          if ($$pProj{param}{BlastEntry}{ThresholdExpect}) {
          printf "  low. expectancy: %E\n", $$pBlastMatch{$MatchId}{expect};
          }
        }
        next BlastsessAddMatch;
      }

      # loop over all HSPs of the BLAST match entry
      # at the moment add only first (best) HSP
      BlastsessAddHsp:
      foreach $pMatchHsp ($$pBlastMatch{$MatchId}{HSP}[0]) {

        # check threshold criteria against match
        $bThrowout = 0;
        if ($$pMatchHsp{RelId} < $$pProj{param}{BlastEntry}{ThresholdIdentity}) {
          $bThrowout = 1;
        }
        if ($$pProj{param}{BlastEntry}{ThresholdScore} and
            $$pMatchHsp{score} < $$pProj{param}{BlastEntry}{ThresholdScore}) {
          $bThrowout = 1;
        }
        if ($$pProj{param}{BlastEntry}{ThresholdExpect} and
            $$pMatchHsp{expect} > $$pProj{param}{BlastEntry}{ThresholdExpect}) {
          $bThrowout = 1;
        }
        if ($bThrowout) {
          if ($debug or $ProgOpt{-verbose}) {
            printf "%s. matching entry %s fails threshold criteria:\n", &MySub, $MatchId;
            printf "  rel. identity: %f\n", $$pMatchHsp{RelId};
            printf "  high score:    %d\n", $$pMatchHsp{score};
            if ($$pProj{param}{BlastEntry}{ThresholdExpect}) {
            printf "  expectancy:    %E\n", $$pMatchHsp{expect};
            }
          }
          next BlastsessAddHsp;
        }

        # move sequence pair info to appropriate structure, do work
        $bConsChg = 0;
        %PairInfo = (
          strand     => ($$pMatchHsp{orient} > 0) ? 'F' : 'R',
          StretchOff => $$pMatchHsp{MatchBeg},
          StretchEnd => $$pMatchHsp{MatchEnd},
          AlnOff     => $$pMatchHsp{QueryBeg},
          AlnEnd     => $$pMatchHsp{QueryEnd},
          );
        if ($debug) {
          printf "%s. submitting following parameters to \&AlnprojEntryAdd:\n", &MySub;
          print  "  sequence to be added: $MatchId\n";
          print  "  pair info structure:\n";
          foreach (sort keys %PairInfo) {
          printf "    %s = %s\n", $_, $PairInfo{$_};
          }
        }
        $ProgOpt{-timer} and $time = &Sum ((times)[0,2]);
        ($RelId, $bConsChg, $SeqCons, $SeqNew) = &AlnprojEntryAdd (
          $pProj, $MatchId, \%PairInfo,
          -ThreshStrange => $$pMatchHsp{RelId} * $ProgParam{Blast}{ThreshQualRel},
          -debug         => $dbg2);
        $ProgOpt{-timer} and printf "%s. CPU time for retrieving and adding sequence stretch (incl. subprocess): %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
        unless (defined $RelId) {
          printf STDERR "%s. unable to add new sequence stretch %s, ERROR in \&AlnprojEntryAdd\n", &MySub, $MatchId||"''";
          $$pProj{param}{BlastEntry}{problem}{$MatchId} = sprintf ("error in \&AlnprojEntryAdd%s",
            $MainLib::File::LibGlob{LogFile} ? ", see $MainLib::File::LibGlob{LogFile}" : '',
            );
          $$pProj{changes} = 1;
          &AlnprojEntryDel ($pProj, $MatchId, -debug=>$dbg2);
          next BlastsessAddMatch;
        }

        # analyse and report results
        printf "Added sequence stretch from database entry %s\n", $MatchId;
        if ($debug or $ProgOpt{-verbose}) {
          printf "  high score:    %d\n", $$pMatchHsp{score};
          if ($$pProj{param}{BlastEntry}{ThresholdExpect}) {
            printf "  expectancy:    %E\n", $$pMatchHsp{expect};
          }
          printf "  identity to consensus sequence in BLAST report: %d / %d = %f\n",
            int ($$pMatchHsp{RelId} * $$pMatchHsp{MatchLen}), $$pMatchHsp{MatchLen},
            $$pMatchHsp{RelId};
          printf "  rel. identity to consensus sequence in Clustal W alignment: %d / %d = %f\n",
            int ($RelId * $$pMatchHsp{MatchLen}), $$pMatchHsp{MatchLen},
            $RelId;
          printf "  performance relation Clustal W to BLAST: %f\n", $RelId / $$pMatchHsp{RelId};
        }
        if ($RelId < ($$pMatchHsp{RelId} * $ProgParam{Blast}{ThreshQualRel})) {
          print  "encountered problems due to quality drop in updated alignment\n";
          printf "  BLAST vs. Clustal W: %.3f : %.3f\n", $$pMatchHsp{RelId}, $RelId;
          print  "sequence originals for two methods:\n";
          printf "  SeqFastA: %s\n", ${ &SeqentryPopFasta (
            "$CorePath{GSCJ}{DictyHome}/$CorePath{GSCJ}{DictyPre}",
            -MatchID=>[$MatchId], -pure=>1) }{sequence};
          printf "  SeqExper: %s\n", ${ &SeqOriginal ($pProj, $MatchId) }{sequence};
          print  "sequence entries in alignment (consensus, new entry):\n";
          printf "  %s\n  %s\n", $SeqCons, $SeqNew;
          $$pProj{param}{BlastEntry}{problem}{$MatchId} = sprintf ("quality drop %.3f%s",
            $RelId/$$pMatchHsp{RelId},
            $MainLib::File::LibGlob{LogFile} ? ", see $MainLib::File::LibGlob{LogFile}" : '',
            );
          $$pProj{changes} = 1;
          &AlnprojEntryDel ($pProj, $MatchId, -debug=>$dbg2);
          next BlastsessAddMatch;
        }

        # update counter for IDs in BLAST report
        if ($BlastCtSize > $$pProj{param}{BlastEntry}{SizeReport}) {
          $$pProj{param}{BlastEntry}{SizeReport} = $BlastCtSize;
          $$pProj{changes} = 1;
        }

        # new BLAST or continue in current report?
        $bConsChg and last BlastsessAddMatch;
        next BlastsessAddMatch;

      } # foreach $MatchHspNmb
    } # foreach $MatchId

    # repeat BLAST search in case of:
    # - consensus length has changed
    # - reached report size limit
    # => recursive call
    if ($bConsChg or $$pProj{param}{BlastEntry}{SizeReport} > (0.95 * $$pBlastParamEff{B})) {
      if ($debug or $ProgOpt{-verbose}) { print  STDERR "\n"; }
      printf "starting new BLAST search %s\n", $bConsChg ? '(consensus sequence length has changed)':'(report size limit reached)';
      undef $pBlparse;
      undef $pBlastMatch;
      undef $bConsChg;
      redo BlastsessAddBlast;
    }
  } # new BLAST

  # exit SUB successfully
  printf "BLAST report exhausted completely%s\n",
    $ProgOpt{-BlastLimit} ? ' until limit' : '';
  return;
}


################################################################################
# extract information
################################################################################


# session commad 'print' - output alignment, sequences, ids
#
# INTERFACE
# - argument 1:     reference to project data
# - argument 2+:    optional arguments (case-insensitive)
#     [+-]?\d+ 1st  offset of alignment range (consensus pos) to be reported
#     [+-]?\d+ 2nd  end of alignment range (consensus pos) to be reported
#     alignment     (default) print the (specified range of the) alignment
#                   on the screen (Clustal W format). Append the corresponding
#                   consensus sequence in fastA format.
#     consensus     print consensus only
#     ends          print sequence's divergent ends
#     ids           print sequence identifiers only
#     origs         print sequence originals
#     seqs          print aligned sequence as fastA
#
# - global options:
#   -debug      [STD]
#
# DESCRIPTION
# - print (parts of) the alignment on the screen. Output may be restricted
#   to a range of the alignment, the consensus, or the IDs of the sequences
#   contributing to the alignment.
#
# DEBUG, CHANGES, ADDITIONS
# - have a discussion about counting in computational or biological system.
#   range positions are counted in biological system.
#
sub SessionPrint {
  my ($pProj, @PlusArg) = @_;
  my ($debug, $dbg2);
  my ($directive, $RangeOff, $RangeEnd, $RangeTotal);
  my (@IdOrig, $sSeq);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # define: function directive, alignment range
  # loop over optional arguments
  while (my $PlusArgCurr = shift(@PlusArg)) {
    if ($PlusArgCurr =~ m/^[-\+]?\d+$/) {
      unless (defined($RangeOff)) {
        $RangeOff = $PlusArgCurr;
      } elsif (! defined($RangeEnd)) {
        $RangeEnd = $PlusArgCurr;
      } else {
        printf "%s. superficious numerical argument $PlusArgCurr\n", &MySub;
      }
    }
    elsif ($PlusArgCurr =~ m/^(alignment|consensus|ends|ids|origs|seqs)$/i) {
      $directive = lc $&;
      if ($directive eq 'orig') {
        @IdOrig = @PlusArg;
        last;
      }
    }
  }
  $directive ||= 'alignment';
  $debug and printf STDERR "%s. effective directive: %s\n", &MySub, $directive||"''";

  ##############################################################################
  # function directives without effect of alignment range

  # output sequence originals
  if ($directive eq 'origs') {
    foreach (@IdOrig) {
      printf "%s: %s\n", $_, $SeqLab::AlnProj::SeqOrig{$_};
    }
    return;
  }

  ##############################################################################
  # apply alignment range

  # in case of range selection
  if (defined($RangeOff)) {
    $debug and printf STDERR "%s. using range, arg off: %s\n", &MySub, $RangeOff||"''";

    # work out range parameters
    $RangeTotal = length &SeqStrPure($$pProj{consens}{sequence});
    $RangeOff ||= 1;
    $RangeEnd ||= $RangeTotal - 1;

    # translate minus syntax
    if ($RangeOff < 0) {
      $RangeOff = $RangeTotal + $RangeOff;
    }
    if ($RangeEnd < 0) {
      $RangeEnd = $RangeTotal + $RangeEnd;
    }

    # verify parameters
    if ($RangeEnd<$RangeOff or $RangeEnd>$RangeTotal) {
      printf STDERR "%s. ERROR: invalid range: %s..%s\n", &MySub,
        $RangeOff, $RangeEnd;
      return;
    }
    $debug and printf STDERR "%s. working with range: %s..%s\n", &MySub,
      $RangeOff, $RangeEnd;

    # get sub-project (safe copy), compress it
    (undef,$pProj) = &AlnprojSplit3 ($pProj, $RangeOff-1, $RangeEnd-$RangeOff+1, -consens=>1, -debug=>$dbg2);
    &AlnCompress ($$pProj{align});
  } else {
    $RangeOff = 0;
  }

  ##############################################################################
  # function directives with effect of alignment range

  # output list of IDs
  if ($directive eq 'ids') {
    foreach (@{$$pProj{align}}) {
      if ($_->{id} =~ m/$reAlnConsens/io) { next }
      print "$_->{id}\n";
    }
    return;
  }

  # output sequences in alignment (range)
  if ($directive eq 'seqs') {
    foreach (@{$$pProj{align}}) {
      print &SeqentryToFasta ($_, -pure=>1);
    }
    return;
  }

  # output alignment
  if ($directive eq 'alignment') {
    print &AlnprojClustalSprint ($$pProj{align}, -debug=>$dbg2);
  }

  # output consensus as fastA
  if ($directive eq 'consensus' or $directive eq 'alignment') {
    &AlnprojConsUpdate ($pProj);
    my $phrase = 'consensus sequence' . ($RangeOff ? ", range $RangeOff-$RangeEnd" : '');
    print  &SeqentryToFasta (
      { sequence=>$$pProj{consens}{sequence}, id=>$$pProj{name} },
      -phrase => $phrase,
      -pure   => 1,
      -debug  => $dbg2);
    if ($RangeOff > 1) {
      print  "\n";
      $phrase = 'consensus sequence pre-range masked';
      $sSeq = ('N' x ($RangeOff - 1)) . $$pProj{consens}{sequence};
      print  &SeqentryToFasta (
        { sequence=>$sSeq, id=>$$pProj{name} },
        -phrase => $phrase,
        -pure   => 1,
        -debug  => $dbg2);
    }
  }

  # exit SUB
  $debug and printf STDERR "%s. exit SUB\n", &MySub;
  return;
}


# ouput alignment in fastA format
#
# INTERFACE
# - argument 1: reference to project data
# - argument 2: optional path of output file
#               as a default the fastA is printed on the screen
#
sub ExtractFasta {
  my ($pProj, $PathOut) = @_;
  my ($debug, $dbg2, $hOut);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # open output file
  if ($PathOut) {
    $hOut = FileHandle->new($PathOut,'w');
    unless ($hOut) {
      printf STDERR "%s. unable to open output file %s\n", &MySub, $PathOut||"''";
      return;
    }
  } else {
    $hOut = \*STDOUT;
  }

  # print fastA
  print  $hOut &AlnToFasta ($$pProj{align}, -debug=>$dbg2);
}


# calculate log-likelihood matrix for the alignment project
#
# INTERFACE
# - argument 1:   reference to alignment project
#                 The alignment is left unchanged. Work and temporary (symbol)
#                 changes are done on a copy of the alignment.
#
# - options:
#   -range        do range selection, syntax is: 'off..end'
#   -PseudoHomog  use homogenous pseudo-count for non-observed symbols,
#                 default: 0,5
#   -PseudoSimil  use similarity-based pseudo-count for non-observed symbols,
#                 default: 0
#
# - global options:
#   -debug        [STD]
#
# DESCRIPTION
# - output is done to STDOUT
# - the weight matrix type is log-likelihood
# - there are three different pseudo-count approaches available:
#   - 'tabula rasa'-approach: same pseudo-count for all non-observed
#     sequence symbols.
#   - 'substitution probability'-approach: pseudo-count according
#     to rel. similarity (calculated from standard BLOSUM40 substitution
#     matrix)
#   - 'shared properties'-approach (cmp. Bork group): from the observed
#     sequence symbols the conserved AA chemical properties are extracted.
#     pseudo-counts are given then according to chemical property overlap
#     between non-observed and observed sequence symbols.
#   All three can be applied independently and simultaneously. However a
#   combination of the 'substitution probability'- and 'shared properties'-
#   approaches does not seem reasonable.
#
# DEBUG, CHANGES, ADDITIONS
# - there are problems to handle seq type 'RNA'
# - there are strange effects if there's a poly(N) stretch in the alignment
#   a count is added in a random fashion to one single base.
#
sub ExtractMatrix {
  my ($pProj, %opt) = @_;
  my ($debug, $dbg2, %MaskGapOpt);
  my (@symbol, $PseudoHomog, $pMatrixSimil);
  my ($AlnConsLen, $RangeOff, $RangeEnd);
  my ($pTabFreq, $pAlnCol, $ItSmb, %SmbFreqSignif, $MatrixMin, $MatrixMax);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $PseudoHomog = defined ($opt{-PseudoHomog}) ? $opt{-PseudoHomog} : 0.5;
  if ($opt{-PseudoSimil}) {
    $pMatrixSimil = &MatrixSimrelCalc (-debug=>$dbg2);
  }

  # parameters from project data
  if ($$pProj{param}{SeqType} eq 'RNA') {
    printf STDERR "%s. SORRY: cannot handle seq type 'RNA'\n", &MySub;
    return;
  }
  @symbol = ($$pProj{param}{SeqType} eq 'protein') ?
    qw(A C D E F G H I K L M N P Q R S T V W Y) : qw(A C G T);
  %MaskGapOpt = (
    -MaskGapEnd => $$pProj{param}{Report}{SymbolAnalysis}{MaskGapEnd},
    -MaskGapIns => $$pProj{param}{Report}{SymbolAnalysis}{MaskGapInside},
    );

  ##############################################################################
  # prepare copy of alignment for analysis

  # hide all extensions
  $pProj = &AlnprojClone ($pProj);
  &AlnprojExtensHide ($pProj);

  # start-up consensus length
  $AlnConsLen = length &SeqStrPure ($$pProj{consens}{sequence});

  # do range selection
  if ($opt{-range}) {
    if ($opt{-range} =~ m/^\s*(\d*)\s*\.\.\s*(\d*)\s*$/) {
      $RangeOff = $1 || 1;
      $RangeEnd = $2 || $AlnConsLen;
      $debug and printf STDERR "%s. range args: %d..%d\n", &MySub, $RangeOff, $RangeEnd;
    } else {
      printf STDERR "%s. ERROR in range specification: %s\n", &MySub, $opt{-range};
      return;
    }

    # verify parameters
    if ($RangeEnd < $RangeOff or
        $RangeEnd > $AlnConsLen) {
      printf STDERR "%s. ERROR: invalid range: %d..%d\n", &MySub, $RangeOff, $RangeEnd;
      return;
    }
    $debug and printf STDERR "%s. working on range: %d..%d\n", &MySub, $RangeOff, $RangeEnd;

    # get sub-project (safe copy), compress it
    (undef,$pProj) = &AlnprojSplit3 ($pProj, $RangeOff-1, $RangeEnd-$RangeOff+1, -consens=>1, -debug=>$dbg2);
    &AlnCompress ($$pProj{align});
  }

  # resolve unknown symbols
  $$pProj{align} = &AlnResolveUnk ($$pProj{align});

  ########################################################################
  # calculate matrix from symbol frequency table

  $pTabFreq = &AlnTabSmbFreqCPos ($$pProj{align}, %MaskGapOpt, -debug=>$dbg2);

  # matrix header
  printf "# %s positional weight matrix\n", $SeqTypeBasic{$$pProj{param}{SeqType}};
  printf "# date/time: %s\n", &TimeStr();
  print  "#\n";
  printf "# produced by AlnK v%s\n", $SeqLab::AlnProj::LibGlob{Version};
  printf "# from alignment project %s\n", $$pProj{name}||"''";
  print  "#\n";
  print  "# matrix width, logical offset of site, threshold score\n";
  printf "%d  1  -1\n", int(@$pTabFreq);

  # matrix definition notes
  print  "#\n";
  print  "# scale = ln (frequency)\n";
  printf "# frequency counts are based on N(seq) = %d\n", int @{$$pProj{align}};
  if ($PseudoHomog or $opt{-PseudoSimil} or $opt{-PseudoPropt}) {
    print  "# frequency values contain pseudo-counts:\n";
  }
  $PseudoHomog and
    printf "# - %.2f homogenously added for all symbols\n", $PseudoHomog;
  $opt{-PseudoSimil} and
    printf "# - 0 to %.2f added for non-observed symbols depending on PAM score\n#   to observed symbols\n", $opt{-PseudoSimil};
  $opt{-PseudoPropt} and
    printf "# - 0 to %.2f added for non-observed symbols depending on property\n#   overlap to observed symbols\n", $opt{-PseudoPropt};
  printf "#\n#  %s\n", join ("     ", @symbol);

  # calculate/output matrix values
  # loop over alignment positions
  foreach $pAlnCol (@$pTabFreq) {

    # significance-corrected symbol frequency

    # homogenous pseudo-count
    %SmbFreqSignif = (map { ( $_ => $$pAlnCol{$_}+$PseudoHomog ) } @symbol);
    $debug and printf STDERR "homogenous pseudo-count: %s\n", join ('  ', map { "$_ $SmbFreqSignif{$_}" } sort keys (%SmbFreqSignif) );

    # similarity-based pseudo-count
    if ($opt{-PseudoSimil}) {
      foreach $ItSmb (@symbol) {
        $$pAlnCol{$ItSmb} and next;
        $SmbFreqSignif{$ItSmb} += $opt{-PseudoSimil} * &Mean (
          map { (&Max($$pMatrixSimil{$ItSmb}{$_},$$pMatrixSimil{$_}{$ItSmb})) x $$pAlnCol{$_}; }
          grep { $$pAlnCol{$_} } @symbol);
      }
      $debug and printf STDERR "similarity-based pseudo-count: %s\n", join ('  ', map { "$_ $SmbFreqSignif{$_}" } sort keys (%SmbFreqSignif) );
    }

    # property-based pseudo-count
    if ($opt{-PseudoPropt}) {
      # ...
      $debug and printf STDERR "property-based pseudo-count: %s\n", join ('  ', map { "$_ $SmbFreqSignif{$_}" } sort keys (%SmbFreqSignif) );
    }

    # re-calculate logarithmic, determine min./max.
    map {
      $SmbFreqSignif{$_} = log ($SmbFreqSignif{$_}||0.001); 
    } keys (%SmbFreqSignif);
    $MatrixMin += &Min (values %SmbFreqSignif);
    $MatrixMax += &Max (values %SmbFreqSignif);

    # output matrix line
    printf "%s\n", join ('',
      map { sprintf "%6.2f", $SmbFreqSignif{$_}; }
      sort keys (%SmbFreqSignif)
      );
  }

  # final statistics
  print  "#\n";
  printf "# min. score = %.2f, max. score = %.2f\n", $MatrixMin, $MatrixMax;
}


# report divergence characteristics of the alignment project as a logo
#
# INTERFACE
# - argument 1:   reference to alignment project
#                 The alignment is left unchanged. Work and temporary (symbol)
#                 changes are done on a copy of the alignment.
#
# - options:
#   -range        do range selection, syntax is: 'off..end'
#   -PseudoHomog  use homogenous pseudo-count for non-observed symbols,
#                 default: 0,5
#   -PseudoSimil  use similarity-based pseudo-count for non-observed symbols,
#                 default: 0
#
# - global options:
#   -debug        [STD]
#
# DESCRIPTION
# - there are three different pseudo-count approaches available:
#   - 'tabula rasa'-approach: same pseudo-count for all non-observed
#     sequence symbols.
#   - 'substitution probability'-approach: pseudo-count according
#     to rel. similarity (calculated from standard BLOSUM40 substitution
#     matrix)
#   - 'shared properties'-approach (cmp. Bork group): from the observed
#     sequence symbols the conserved AA chemical properties are extracted.
#     pseudo-counts are given then according to chemical property overlap
#     between non-observed and observed sequence symbols.
#   All three can be applied independently and simultaneously. However a
#   combination of the 'substitution probability'- and 'shared properties'-
#   approaches does not seem reasonable.
#
# DEBUG, CHANGES, ADDITIONS
# - there are problems to handle seq type 'RNA'
# - there are strange effects if there's a poly(N) stretch in the alignment
#   a count is added in a random fashion to one single base.
#
sub ExtractLogo {
  my ($pProj, %opt) = @_;
  my ($debug, $dbg2, %MaskGapOpt);
  my (@symbol, $PseudoHomog, $pMatrixSimil, %param);
  my ($pTabFreq, $CtPos, $pAlnCol, $ItSmb, %SmbFreqSignif, $MatrixMin, $MatrixMax);
  my (%path);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $PseudoHomog = defined ($opt{-PseudoHomog}) ? $opt{-PseudoHomog} : 0.5;
  if ($opt{-PseudoSimil}) {
    $pMatrixSimil = &MatrixSimrelCalc (-debug=>$dbg2);
  }

  # parameters from project data
  if ($$pProj{param}{SeqType} eq 'RNA') {
    printf STDERR "%s. SORRY: cannot handle seq type 'RNA'\n", &MySub;
    return;
  }
  @symbol = ($$pProj{param}{SeqType} eq 'protein') ?
    qw(A C D E F G H I K L M N P Q R S T V W Y) : qw(A C G T);
  %MaskGapOpt = (
    -MaskGapEnd => $$pProj{param}{Report}{SymbolAnalysis}{MaskGapEnd},
    -MaskGapIns => $$pProj{param}{Report}{SymbolAnalysis}{MaskGapInside},
    );

  # organise paths
  $path{cwd} = &PathCwd();
  $path{root}  = &PathSplit ($$pProj{Path})->{dir};
  $path{root} .= "/$$pProj{name}_";

  ##############################################################################
  # prepare copy of alignment for analysis

  # hide all extensions
  $pProj = &AlnprojClone ($pProj);
  &AlnprojExtensHide ($pProj);

  # start-up consensus length
  $param{RangeLen} = length &SeqStrPure ($$pProj{consens}{sequence});
  if (! $param{RangeLen}) {
    printf STDERR "%s. ERROR: Null consensus\n", &MySub;
    return;
  }

  # do range selection
  if ($opt{-range}) {
    if ($opt{-range} =~ m/^\s*(\d*)\s*\.\.\s*(\d*)\s*$/) {
      $param{RangeOff} = $1 || 1;
      $param{RangeEnd} = $2 || $param{RangeLen};
      $param{RangeLen} = $param{RangeEnd} - $param{RangeOff} + 1;
      $debug and printf STDERR "%s. range args: %d..%d\n", &MySub, $param{RangeOff}, $param{RangeEnd};
    } else {
      printf STDERR "%s. ERROR in range specification: %s\n", &MySub, $opt{-range};
      return
    }

    # verify parameters
    if ($param{RangeEnd}<$param{RangeOff} or $param{RangeEnd}>$param{RangeLen}) {
      printf STDERR "%s. ERROR: invalid range: %d..%d\n", &MySub, $param{RangeOff}, $param{RangeEnd};
      return;
    }
    $debug and printf STDERR "%s. working on range: %d..%d\n", &MySub, $param{RangeOff}, $param{RangeEnd};

    # get sub-project (safe copy), compress it
    (undef,$pProj) = &AlnprojSplit3 ($pProj, $param{RangeOff}-1, $param{RangeLen}, -consens=>1, -debug=>$dbg2);
    &AlnCompress ($$pProj{align});
  } else {
    $param{RangeOff} = 1;
    $param{RangeEnd} = $param{RangeLen};
  }

  # resolve unknown symbols
  $$pProj{align} = &AlnResolveUnk ($$pProj{align});

  ########################################################################
  # calculate matrix from symbol frequency table

  $pTabFreq = &AlnTabSmbFreqCPos ($$pProj{align}, %MaskGapOpt, -debug=>$dbg2);

  # temporary directory and files
  $path{TmpDir} = $ProgParam{TmpManag}->Create();
  mkdir ($path{TmpDir});
  foreach (qw(colors marks wave)) {
    &touch ("$path{TmpDir}/$_");
  }

  # makelogo parameter file
  $param{ScaleHeight} = 10.0;
  $param{CharWidth} = sprintf ('%.3f', &Min (23.5 / $param{RangeLen}, 1.5));
  $path{makelogop} = $path{root} . 'makelogop.dat';
  $param{makelogop} = join ('', map { "$_\n" }
    join (' ', $param{RangeOff}, $param{RangeEnd}),
    $param{RangeOff},
    "16 2",
    90,
    $param{CharWidth},
    join (' ', $param{ScaleHeight}, '0.1'),
    2,
    '1.0',
    'not 2nd bar',
    'no box element',
    'no outline',
    'capitalise',
    $param{RangeLen},
    1,
    1.1,
    'numbering',
    1,
    1,
    join (' ', 0, $param{ScaleHeight}+2, 0.025 * $param{RangeLen}),
    sprintf ('AlnK project "%s"', $$pProj{name}||"''"),
    'page-oriented bounding',
    'd',
    );
  printf "writing file $path{makelogop}\n";
  &WriteFile ($path{makelogop}, $param{makelogop});
  &FileCopy ($path{makelogop}, "$path{TmpDir}/makelogop");

  # calculate/output symbol vector parameters, output to parameter file
  # loop over alignment positions
  $path{symvec} = $path{root} . 'symvec.dat';
  printf "writing file $path{symvec}\n";
  open (OUTSYMVEC, ">$path{symvec}");
  printf OUTSYMVEC "%d\n", int @symbol;
  foreach $pAlnCol (@$pTabFreq) {
    $CtPos ++;

    # output matrix line
    printf OUTSYMVEC "%d %d %.5f 1e-4\n", $param{RangeOff} + $CtPos - 1,
      &Sum (map { $$pAlnCol{$_} } @symbol),
      2 - (-1 / log (2)) * $$pAlnCol{entropy};
    foreach $ItSmb (@symbol) {
      printf OUTSYMVEC "%s %d\n", uc ($ItSmb), $$pAlnCol{$ItSmb};
    }
  }
  close (OUTSYMVEC);
  &FileCopy ($path{symvec}, "$path{TmpDir}/symvec");

  # call makelogo
  $path{logo} = $path{root} . 'logo.ps';
  chdir $path{TmpDir};
  system "$CorePath{call}{makelogo}";
  chdir $path{cwd};
  &FileCopy ("$path{TmpDir}/logo", $path{logo});
  printf "writing file $path{logo}\n";

  # tidy up or debug
  if ($debug) { 
    printf STDERR "%s. temporary output left in dir $path{TmpDir}\n", &MySub;
  } else {
    unlink (glob "$path{TmpDir}/*");
    rmdir $path{TmpDir};
  }
}


################################################################################
# report
################################################################################


# report 'topographic' characteristics of the alignment project
#
# INTERFACE
# - argument 1: reference to alignment project
#
# - global options:
#   -debug      [STD]
#
sub ReportAlign {
  my ($pProj) = @_;
  my ($debug, $dbg2);
  my ($OrigLen, $OrigPre, $OrigPost);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # report header, table header
  printf "# alignment report for alignment project %s\n", $$pProj{name}||"''";
  printf "# date/time: %s\n", &TimeStr();
  printf "# consensus length: %d\n", length &SeqStrPure ($$pProj{consens}{sequence});
  printf "#\n# column labels:\n# %s\n", join ("\t",
    'ID',
    'strandedness',
    'stretch length',
    'aln. offset',
    'aln. end',
    'pre stretch',
    'post stretch',
    'original length',
    );

  # loop over alignment entries
  foreach (@{$$pProj{align}}) {
    if ($_->{id} =~ m/consens/i) { next }
    my ($OffPrim, $EndPrim, $iSeqLen, $OffSec, $EndSec);

    # alignment offset, length, alignment end
    if ($_->{sequence} =~ m/([A-Z]-*)+[A-Z]/) {
      $OffPrim = length $`;
      $EndPrim = length ($`.$&);
      $iSeqLen = length &SeqStrPure ($&);
      $OffSec = (length &SeqStrPure ( substr ($$pProj{consens}{sequence}, 0, $OffPrim) )) + 1;
      $EndSec =  length &SeqStrPure ( substr ($$pProj{consens}{sequence}, 0, $EndPrim) );
      if ($debug) {
        printf STDERR "%s. pattern match results:\n", &MySub;
        print  STDERR "  \$OffPrim = $OffPrim\n";
        print  STDERR "  \$EndPrim = $EndPrim\n";
        print  STDERR "  \$EndPrim = $EndPrim\n";
      }
    } else {
      printf STDERR "%s. CODE ERROR: failure in stretch pattern match\n", &MySub;
      next;
    }

    # length of pre / post stretch
    my $pSeqOrig = &SeqOriginal ($pProj, $_->{id}, -debug=>$dbg2);
    if (my $sSeqOrig = $$pSeqOrig{sequence}) {
      $OrigLen = length ($sSeqOrig);
      $OrigPre = $_->{offset} - 1;
      $OrigPost = $OrigLen - ($_->{offset}+$iSeqLen-1);
      if ($_->{orient} eq 'R') {
        ($OrigPre, $OrigPost) = ($OrigPost, $OrigPre);
      }
    } else {
      $OrigLen = '-';
      $OrigPre = '-';
      $OrigPost = '-';
    }

    # table line
    printf "%s\n", join ("\t",
      $_->{id},
      $_->{orient},
      $iSeqLen,
      $OffSec,
      $EndSec,
      $OrigPre,
      $OrigPost,
      $OrigLen,
      );
  }
}


# report local coverage of the alignment project
#
# INTERFACE
# - argument 1: reference to alignment project
#
# - global options:
#   -debug        [STD]
#   -OutImgWidth  [STD]
#
sub ReportCover {
  my ($pProj) = @_;
  my ($debug, $dbg2, $PathRoot, %MaskGapOpt);
  my ($pPlotData, $ImgWidth, %graph, $ImgPath);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  # parameters from project data
  %MaskGapOpt = (
    -MaskGapEnd => $$pProj{param}{Report}{SymbolAnalysis}{MaskGapEnd},
    -MaskGapIns => $$pProj{param}{Report}{SymbolAnalysis}{MaskGapInside},
    );
  $PathRoot  = &PathSplit ($$pProj{Path})->{dir};
  $PathRoot .= "/$$pProj{name}_";

  # get coverage plot data, prepare graphical representation
  &AlnprojExtensHide ($pProj);
  $pPlotData = &AlnTabCover ($$pProj{align}, %MaskGapOpt, -debug=>$dbg2);
  if ($debug) {
    printf STDERR "%s. plot of alignment coverage:\n", &MySub;
    printf STDERR "  %s\n", join (',', @$pPlotData);
  }
  $ImgWidth = $ProgOpt{-OutImgWidth} || $ProgParam{default}{OutImgWidth};
  %graph = (
    BgTranspar => $ProgOpt{-OutImgTransp},
    plot => [
      { DimPixel  => { x=>$ImgWidth },
        HeightRel => 0.2,
        DataType  => 'A1y',
        data      => { y=>$pPlotData },
        DataRange => { y=>[0] },
        ReprType  => 'column',
      },
      ],
    scale => [
      { location => 'x',
        PlotNum  => 0,
      },
      { location => 'left',
        PlotNum  => 0,
      },
      { location => 'right',
        PlotNum  => 0,
      },
      ],
    );

  # create and save graph image
#  $ImgPath = $PathRoot . 'cover.dat';
#  my $hOut;
#  if ($hOut = FileHandle->new($ImgPath,'w')) {
#    printf "saving coverage image data to file %s\n", $ImgPath||"''";
#  } else {
#    printf STDERR "ERROR: failure in saving coverage image data to file %s\n", $ImgPath||"''";
#  }
#  &DataPrint (\%graph, -handle=>$hOut);
  $ImgPath = $PathRoot . 'cover.png';
  if (&Graph (\%graph, -save=>$ImgPath, -debug=>$dbg2)) {
    printf "saving coverage plot image to file %s\n", $ImgPath||"''";
  } else {
    printf STDERR "ERROR: failure in saving coverage plot image to file %s\n", $ImgPath||"''";
  }
}


# report divergence characteristics of the alignment project
#
# INTERFACE
# - argument 1:  reference to alignment project
#                The alignment is left unchanged. Work and temporary (symbol)
#                changes are done on a copy of the alignment.
# - argument 2*: subfunction selector, see list of subfunctions
#                may be comma-separated list of several subfunctions
#
# - global options:
#   -debug        [STD]
#   -OutImgWidth  [STD]
#
# DESCRIPTION
# - subfunctions:
#   ConsDev        table of polymorphic alignment positions
#   ConservEntrop  plot of local symbol entropy
#   FrameShift     plot of deviations producing frameshifts
#   LocalMatscore  plot of local distance according to pairwise distance matrix
#   PairwDist      table of pairwise distances
#   SmbFreq        symbol frequency table
#   unique         plot of local sequence uniqueness
# - output is done to files whose paths are derived from the project path.
#   This behaviour cannot be controlled
# - counting philosophy for position values:
#   in most plots returned from SeqLab::Align.pm, array positions are according
#   to alignment positions (counting start 0).
#
# DEBUG, CHANGES, ADDITIONS
# - Kann man in den Konservierungsplots die Positionsnumerierung der
#   Einzelsequenzen einfgen? Oder als separate Tabelle mit den
#   Positionsbeziehungen.
#   Der Hintergrund ist der: Ein Alignment wird in zwei nicht-berlappende
#   Subsets von Sequenzen unterteilt, z.B. TRE-ORF2-Alignment in
#   TRE3-ORF2 und TRE5-ORF2. Von jeweils beiden Alignments wird ein
#   Konservierungsplot erstellt, und anschlieend sollen die Konservierungs-
#   plots miteinander verglichen werden. Das Problem ist nur: Die
#   Posititionen, die ja Consensuspositionen sind, stimmen nicht genau
#   miteinander berein.
#
sub ReportDist {
  my ($pProj, $SlcFunc) = @_;
  my ($debug, $dbg2, $ImgWidth, %path, %MaskGapOpt);
  my ($pAln, $AlnConsLen, $CtPos, @column, $pColumn, %ColLabel);
  my ($pTabConsDev, $pTabDist, $pTabFreq, $pSmb);
  my ($pPlotEntropy, $pPlotShift, $WinSize, $WinStep);
  my ($AlnHeight, %AlnSubstr, $pPlotUniq);
  my (%graph, $hOut);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $SlcFunc ||= 'all';
  $debug and printf STDERR "%s. subfunction selector: %s\n", &MySub, $SlcFunc||"''";
  $ImgWidth = $ProgOpt{-OutImgWidth} || $ProgParam{default}{OutImgWidth};

  # parameters from project data
  %MaskGapOpt = (
    -MaskGapEnd => $$pProj{param}{Report}{SymbolAnalysis}{MaskGapEnd},
    -MaskGapIns => $$pProj{param}{Report}{SymbolAnalysis}{MaskGapInside},
    );
  $path{stamp}  = &PathSplit ($$pProj{Path})->{dir};
  $path{stamp} .= "/$$pProj{name}_";

  # prepare copy of alignment for analysis
  # - hide all extensions
  # - resolve unknown symbols
  $pProj = &AlnprojClone ($pProj);
  &AlnprojExtensHide ($pProj);
  $$pProj{align} = &AlnResolveUnk ($$pProj{align});
  $AlnConsLen = length &SeqStrPure ($$pProj{consens}{sequence});

  ########################################################################
  # table of consensus deviations
  if ($SlcFunc =~ m/ConsDev/i or $SlcFunc eq 'all') {

    # get table
    $pTabConsDev = (&AlnTabConsDev ($$pProj{align}, %MaskGapOpt, -debug=>$dbg2))[0];

    # output path, report header, table header
    $path{report} = $path{stamp} .'ConsDev.tab';
    my $hOutDev = FileHandle->new($path{report},'w');
    if (!$hOutDev) {
      printf STDERR "ERROR: unable to open output file %s\n", $path{report}||"''";
    } else {

      # report header
      @column            = qw(pos ConsPos ConsSmb class emit     freq      id );
      @ColLabel{@column} = qw(pos PosCons cons    class emission frequency IDs);
      printf $hOutDev "# table of consensus deviations (polymorphisms) for alignment project %s\n", $$pProj{name}||"''";
      printf $hOutDev "# date/time: %s\n", &TimeStr();
      printf $hOutDev "# consensus length: %d\n", $AlnConsLen;
      printf $hOutDev "# symbol diversity (pi): %.6f\n", &AlnDiversity ($$pProj{align}, -ConsLen=>$AlnConsLen, -ConsDev=>$pTabConsDev);
      printf $hOutDev "#\n# column labels:\n# %s\n", join ("\t", @ColLabel{@column});

      # loop over alignment positions
      foreach my $pDev (sort {
          $$a{pos} <=> $$b{pos} or
        $$a{class} cmp $$b{class} or
         $$a{emit} cmp $$b{emit} } @$pTabConsDev
      ) {
        printf $hOutDev "%s\n", join ("\t", @{$pDev}{@column});
      }
      printf "table of consensus deviations saved as %s\n", $path{report}||"''";
    }
  }

  ########################################################################
  # table of pairwise distances
  if ($SlcFunc =~ m/PairwDist/i or $SlcFunc eq 'all') {

    # get distance table
    ($pTabDist, $pColumn) = &AlnTabDist ($$pProj{align}, %MaskGapOpt, -MaskGapIns=>1, -debug=>$dbg2);
    ($pTabDist, $pColumn) = &TableConvert ('HIH', 'AH', $pTabDist,
      -LineLabel=>$pColumn, -ColLabel=>['id',@$pColumn], -debug=>$dbg2);

    # output path, report header, table header
    $path{report} = $path{stamp} .'Dist.tab';
    unless (open (OUTTAB, ">$path{report}")) {
      printf STDERR "ERROR: unable to open output file %s\n", $path{report}||"''";
    } else {
      printf OUTTAB "# pairwise distance table for alignment project %s\n", $$pProj{name}||"''";
      printf OUTTAB "# date/time: %s\n", &TimeStr();
      printf OUTTAB "# consensus length: %d\n", $AlnConsLen;
      printf OUTTAB "#\n# column labels:\n# %s\n", join ("\t", @$pColumn);
      foreach (@$pTabDist) {
        printf OUTTAB "%s\n", join ("\t", @{$_}{@$pColumn});
      }
      close OUTTAB;
    }
    printf "table of pairwise distances saved as %s\n", $path{report}||"''";
  }

  ########################################################################
  # symbol frequency table, entropy plot data

  # get plots
  # - symbol frequency and entropy
  # - counting philosophy is computational here
  if ($SlcFunc =~ m/\b(ConservEntrop|SmbFreq)\b/i or $SlcFunc eq 'all') {
    ($pTabFreq, $pSmb) = &AlnTabSmbFreqCPos ($$pProj{align}, %MaskGapOpt, -debug=>$dbg2);

    # extract entropy plot
    if ($SlcFunc =~ m/ConservEntrop/i or $SlcFunc eq 'all') {
      @$pPlotEntropy = map { $_->{entropy} } @$pTabFreq;
      if ($debug > 1) {
        printf STDERR "%s. plot of local entropy in alignment:\n%s", &MySub,
          join ('', map { "  $_\n" } @$pPlotEntropy);
      }
    }
  }

  # report symbol frequency
  if ($SlcFunc =~ m/SmbFreq/i or $SlcFunc eq 'all') {

    # output path, report header, table header
    $path{report} = $path{stamp} .'SmbFreq.tab';
    unless (open (OUTTAB, ">$path{report}")) {
      printf STDERR "ERROR: unable to open output file %s\n", $path{report}||"''";
    } else {

      # report header
      printf OUTTAB "# symbol frequency table for alignment project %s\n", $$pProj{name}||"''";
      printf OUTTAB "# date/time: %s\n", &TimeStr();
      printf OUTTAB "# consensus length: %d\n", $AlnConsLen;
      printf OUTTAB "#\n# column labels:\n# %s\n", join ("\t", 'pos', @$pSmb);

      # loop over alignment positions
      for ($CtPos=0; $CtPos<int(@$pTabFreq); $CtPos++) {
        printf OUTTAB "%s\n", join ("\t", $CtPos+1, @{$$pTabFreq[$CtPos]}{@$pSmb});
      }
      close OUTTAB;
      printf "symbol frequency table saved as %s\n", $path{report}||"''";
    }
    undef $pTabFreq;
    undef $pSmb;
  }

  ########################################################################
  # plot of rel. conservation and entropy
  # we need the entropy plot data calculated in the 'SmbFreq' paragraph
  if ($SlcFunc =~ m/ConservEntrop/i or $SlcFunc eq 'all') {

    # smoothening parameters
    $WinSize = 10;
    $WinStep = $AlnConsLen / $ImgWidth;  # one value per pixel

    # prepare plot data
    my $pPlotRelconserv = &AlnTabRelConserv ($$pProj{align}, %MaskGapOpt, -debug=>$dbg2);
    # calculation of entropy plot is connected to symbol frequency table
    #   see above
    my $poPlotRelconserv = Math::Plot2D->new($pPlotRelconserv, -TabType=>'A1y', -debug=>$dbg2)
      if (int (@$pPlotRelconserv));
    my $poPlotEntropy = Math::Plot2D->new($pPlotEntropy, -TabType=>'A1y', -debug=>$dbg2);

    # prepare plot graph
    %graph = (
      BgTranspar => $ProgOpt{-OutImgTransp},
      plot => [
        { DimPixel  => { x=>$ImgWidth },
          HeightRel => 0.2,
          DataType  => 'AA',
          data      => $poPlotEntropy->SmoothPlot ($WinStep, -window=>$WinSize),
          DataRange => { x=>[0] },
          ReprType  => 'line',
          ReprColor => 'black',
        },
        ],
      scale => [
        { location => 'x',
          PlotNum  => 0,
        },
        { location => 'left',
          PlotNum  => 0,
        },
        { location => 'right',
          PlotNum  => 0,
        },
        ],
      );
    if (@$pPlotRelconserv) {
      push @{$graph{plot}}, {
        DataType  => 'AA',
        data      => $poPlotRelconserv->SmoothPlot ($WinStep*1.5, -window=>$WinSize),
        DataRange => { x=>[0] },
        ReprType  => 'line',
        ReprColor => 'red',
        };
      $graph{scale}[2]{PlotNum} = 1;
    }

    # save plot data
    $path{ImgData} = $path{stamp} .'conserv_img.dat';
    if ($hOut = FileHandle->new($path{ImgData},'w')) {
      printf "saving conservation image data to file %s\n", $path{ImgData};
    } else {
      printf STDERR "%s. ERROR: failure in saving conservation image datato file %s\n", &MySub, $path{ImgData};
    }
    &DataPrint (\%graph, -handle=>$hOut);
    # save image
    $path{img} = $path{stamp} .'conserv.png';
    if (&Graph (\%graph, -save=>$path{img}, -debug=>$dbg2)) {
      printf "saving conservation plot image (smooth) to file %s\n", $path{img};
    } else {
      printf STDERR "%s. ERROR: failure in saving conservation plot image (smooth) to file %s\n", &MySub, $path{img};
    }

    # release memory
    undef $pPlotEntropy;
  }

  ########################################################################
  # plot of mean pairwise matrix distances
  if ($SlcFunc =~ m/(Local|Mean)Matscore/i or $SlcFunc eq 'all') {

    # smoothening parameters
    $WinSize = 6;
    $WinStep = $AlnConsLen / $ImgWidth;

    # prepare plot data
    my $pPlotMatScore = &AlnTabMatScore ($$pProj{align}, -table=>'PAM100', %MaskGapOpt, -debug=>$dbg2);
    $debug and printf STDERR "%s. plot of mean matrix score in alignment:\n  %s\n", &MySub,
      join ("\n  ", @$pPlotMatScore);
    my $poPlotMatScore = Math::Plot2D->new($pPlotMatScore, -TabType=>'A1y', -debug=>$dbg2);

    # prepare plot graph
    %graph = (
      BgTranspar => $ProgOpt{-OutImgTransp},
      plot => [
        { DimPixel  => { x=>$ImgWidth },
          HeightRel => 0.2,
          DataType  => 'AA',
          data      => $poPlotMatScore->SmoothPlot ($WinStep, -window=>$WinSize),
          DataRange => { x=>[0] },
          ReprType  => 'line',
          ReprColor => 'black',
        },
        ],
      scale => [
        { location => 'x',
          PlotNum  => 0,
        },
        { location => 'left',
          PlotNum  => 0,
        },
        { location => 'right',
          PlotNum  => 0,
        },
        ],
      );

#    # add plot(s) using another matrix
#    $pPlotMatScore = &AlnTabMatScore ($$pProj{align}, %MaskGapOpt, -table=>'PAM30', -debug=>$dbg2);
#    $poPlotMatScore = Math::Plot2D->new($pPlotMatScore, -TabType=>'A1y', -debug=>$dbg2);
#    push @{$graph{plot}}, {
#      DataType     => 'AA',
#      data         => $poPlotMatScore->SmoothPlot ($WinStep*1.5, -window=>$WinSize),
#      DataRangeRef => 0,
#      ReprType     => 'line',
#      ReprColor    => 'red',
#      };
#    $pPlotMatScore = &AlnTabMatScore ($$pProj{align}, %MaskGapOpt, -table=>'PAM250', -debug=>$dbg2);
#    $poPlotMatScore = Math::Plot2D->new($pPlotMatScore, -TabType=>'A1y', -debug=>$dbg2);
#    push @{$graph{plot}}, {
#      DataType     => 'AA',
#      data         => $poPlotMatScore->SmoothPlot ($WinStep*1.5, -window=>$WinSize),
#      DataRangeRef => 0,
#      ReprType     => 'line',
#      ReprColor    => 'blue',
#      };

    # save plot data
    $path{ImgData} = $path{stamp} .'MeanMatscore_img.dat';
    if ($hOut = FileHandle->new($path{ImgData},'w')) {
      printf "mean matrix score image data saved as %s\n", $path{ImgData}||"''";
    } else {
      printf STDERR "%s. ERROR: failure in saving mean matrix score image data\n", &MySub;
    }
    &DataPrint (\%graph, -handle=>$hOut);
    # save image
    $path{img} = $path{stamp} .'MeanMatscore.png';
    if (&Graph (\%graph, -save=>$path{img}, -debug=>$dbg2)) {
      printf "mean matrix score plot (smooth) saved as %s\n", $path{img}||"''";
    } else {
      printf STDERR "%s. ERROR: failure in plotting mean matrix score (smooth)\n", &MySub;
    }
  }

  ########################################################################
  # frameshift plot
  if ($SlcFunc =~ m/FrameShift/i or $SlcFunc eq 'all'
    and $$pProj{param}{SeqType} =~ m/^[DR]NA$/i) {

    # get plot data
    $pPlotShift = &AlnTabFrameshift ($$pProj{align}, -debug=>$dbg2);
    $debug and printf STDERR "%s. plot of frameshifts:\n%s", &MySub,
      join ('', map { "  $_\n" } @$pPlotShift);

    # prepare plot graph
    %graph = (
      BgTranspar => $ProgOpt{-OutImgTransp},
      plot => [
        { DimPixel  => { x=>$ImgWidth },
          HeightRel => 0.2,
          DataType  => 'A1y',
          data      => { y=>$pPlotShift, },
          ReprType  => 'line',
        },
        ],
      scale => [
        { location => 'x',
          PlotNum  => 0,
        },
        { location => 'left',
          PlotNum  => 0,
        },
        { location => 'right',
          PlotNum  => 0,
        },
        ],
      );

    # save image
    $path{img} = $path{stamp} .'frameshift.png';
    if (&Graph (\%graph, -save=>$path{img}, -debug=>$dbg2)) {
      printf "framshift plot saved as %s\n", $path{img}||"''";
    } else {
      printf STDERR "%s. ERROR: failure in plotting framshifts\n", &MySub;
    }
  }

  ########################################################################
  # plot of local sequence uniqueness
  if ($SlcFunc =~ m/unique/i or $SlcFunc eq 'all') {
    $pAln = &AlnClone ($$pProj{align}, -DelConsens=>1);

    # step / smoothening parameters
    $WinSize = 6;
    $WinStep = $AlnConsLen / $ImgWidth / 5;
#    $WinStep = $AlnConsLen / $ImgWidth * 5;
    $CtPos = 0;

    # prepare plot data
    $AlnHeight = int @$pAln;
    $pPlotUniq = [ [ 0, 0, 0 ] ];
    while (($CtPos += $WinStep) < 500) {
#    while (($CtPos += $WinStep) < length ($$pProj{align}[0]{sequence})) {
      %AlnSubstr = ();
      map { $AlnSubstr{substr($_->{sequence},0,$CtPos)} ++; } @$pAln;
      push @$pPlotUniq, [ $CtPos,
        int (keys %AlnSubstr) / $AlnHeight, 
        int (grep { $_ == 1 } values %AlnSubstr) / $AlnHeight 
        ];
      $debug and printf STDERR "%s. local seq uniqueness, pos. %d of %d: groups %.3f, unique %.3f\n", &MySub,
        $CtPos, length ($$pProj{align}[0]{sequence}), $$pPlotUniq[-1][1], $$pPlotUniq[-1][2];
    }

    # save plot data
    $path{data} = $path{stamp} .'RelUnique.tab';
    unless (open (OUTTAB, ">$path{data}")) {
      printf STDERR "ERROR: unable to open output file %s\n", $path{data}||"''";
    } else {
      printf OUTTAB "# local sequence uniqueness table for alignment project %s\n", $$pProj{name}||"''";
      printf OUTTAB "# date/time: %s\n", &TimeStr();
      printf OUTTAB "# consensus length: %d\n", $AlnConsLen;
      printf OUTTAB "#\n# column labels:\n# %s\n",
        join ("\t", qw(AlnPos RelGroups RelUnique));
      foreach (@$pPlotUniq) {
        printf OUTTAB "%s\n", join ("\t", @$_);
      }
      close OUTTAB;
    }
    printf "table of local sequence uniqueness saved as %s\n", $path{data}||"''";

    # prepare plot graph
    %graph = (
      BgTranspar => $ProgOpt{-OutImgTransp},
      plot => [
        { DimPixel  => { x=>$ImgWidth },
          HeightRel => 0.2,
          DataType  => 'HCA',
          data      => scalar &TableConvert ('AA', 'HCA', $pPlotUniq, -ColLabel=>['x','y','A']),
          ReprType  => 'line',
          ReprColor => 'black',
        },
        {
          DataType  => 'HCA',
          data      => scalar &TableConvert ('AA', 'HCA', $pPlotUniq, -ColLabel=>['x','A','y']),
          ReprType  => 'line',
          ReprColor => 'red',
        },
        ],
      scale => [
        { location => 'x',
          PlotNum  => 0,
        },
        { location => 'left',
          PlotNum  => 0,
        },
        { location => 'right',
          PlotNum  => 0,
        },
        ],
      );
    delete $graph{plot}[0]{data}{A};
    delete $graph{plot}[1]{data}{A};

    # save image
    $path{img} = $path{stamp} .'RelUnique.png';
    if (&Graph (\%graph, -save=>$path{img}, -debug=>$dbg2)) {
      printf "plot of local sequence uniqueness saved as %s\n", $path{img}||"''";
    } else {
      printf STDERR "%s. ERROR: failure in plotting local sequence uniqueness\n", &MySub;
    }
  }
}


# report sequence ends diverging off the alignment
#
# INTERFACE
# - argument 1:  reference to alignment project
# - argument 2*: select for minimal length of the ends that applies to
#                sequences which are extracted in fastA file format.
#
# - global options:
#   -debug        [STD]
#   -OutImgWidth  [STD]
#
# DESCRIPTION
# - what's done here:
#   - find groups of identical divergent stretches, log grouping
#     difficulties to STDOUT
#   - save divergent ends as fastA
#   - prepare table of divergent ends
# - output is done to files whoose paths are derived from the project path.
#
sub ReportEnds {
  my ($pProj, $SlcLen) = @_;
  my ($debug, $dbg2, $ImgWidth);
  my ($sCons, $iConsLen, $pSeq, $sSeq, $end, $pos);
  my (@ReportPrim, $pSeqHave, $pSeqNew, $LenMin);
  my (@ReportSec, %path, %plot, %graph);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $SlcLen ||= 1;
  $ImgWidth = $ProgOpt{-OutImgWidth} || $ProgParam{default}{OutImgWidth};

  # pathname root
  $path{stamp}  = &PathSplit ($$pProj{Path})->{dir};
  $path{stamp} .= "/$$pProj{name}_";

  ##############################################################################
  # find divergent ends

  # loop over alignment entries
  foreach $pSeq (@{$$pProj{align}}) {

    # for consensus: store sequence and skip entry
    if ($$pSeq{id} =~ m/$reAlnConsens/io) {
      $sCons ||= $$pSeq{sequence};
      $iConsLen = length (&SeqStrPure ($sCons));
      next;
    }
    $debug and printf STDERR "%s. examining seq %s\n", &MySub, $$pSeq{id};

    # loop over alignment entry ends
    foreach $end (-1, 1) {

      # get sequence stretch
      $sSeq = &SeqStrPure (&SeqExtend ($pProj, $pSeq, $end, -debug=>$dbg2));
      unless (length ($sSeq)) { next }

      # derive divergence position (position of first letter out of alignment)
      $$pSeq{sequence} =~ m/^(-*)(-*[a-z])+/i;
      $pos = length (($end<0) ? $1 : $&);
      $pos = length (&SeqStrPure (substr ($sCons,0,$pos)));
      if ($end > 0) { $pos += 1; }

      # enter results
      push @ReportPrim, {
        id       => $$pSeq{id},
        idlist   => $$pSeq{id},
        sequence => $sSeq,
        pos      => $pos,
        dir      => ($end < 0) ? 5 : 3,
        len      => length ($sSeq),
        };
      $debug and printf STDERR "%s. found diverging end - seq %s, end %s, length %d\n", &MySub,
        $ReportPrim[-1]{id}, $ReportPrim[-1]{dir}, $ReportPrim[-1]{len};
    }
  }

  # grouping: filter out redundant entries
  ReportEndsNew:
  foreach $pSeqNew (sort { $a->{pos} <=> $b->{pos} or
                           $a->{dir} cmp $b->{dir} or
                           $b->{len} <=> $a->{len}; } @ReportPrim) {
    foreach $pSeqHave (grep { $$pSeqNew{pos} == $_->{pos} and
                              $$pSeqNew{dir} eq $_->{dir}; } @ReportSec) {
      $LenMin = &Min ($$pSeqNew{len}, $$pSeqHave{len});
      foreach ($pSeqNew, $pSeqHave) {
        $_->{SeqPart} = substr ($_->{sequence}, ($_->{dir} eq '3') ? 0 : -$LenMin, $LenMin);
      }
      my $identity;
      if (($identity = &AlnIdentity ([$pSeqNew,$pSeqHave], -KeySeq=>'SeqPart', -MatchUnk=>'N')) == 1) {
        $$pSeqHave{idlist} .= " $$pSeqNew{id}";
        $sSeq = &AlnConsens ([$pSeqNew,$pSeqHave], -KeySeq=>'SeqPart', -debug=>$dbg2);
        substr ($$pSeqHave{sequence}, ($$pSeqHave{dir} eq '3') ? 0 : -$LenMin, $LenMin) = $sSeq;
        next ReportEndsNew;
      } elsif ($identity > 0.9) {
        printf "nearly identical: %s in %s, overlap %d chars, pos. %d, %s' direction, rel. identity %.3f\n",
          $$pSeqNew{id}, $$pSeqHave{id}, $LenMin, $$pSeqHave{pos}, $$pSeqHave{dir}, $identity;
      }
    }
    push @ReportSec, $pSeqNew;
  }

  ##############################################################################
  # tabular report and sequence output of divergent ends

  # start table output
  $path{report} = $path{stamp} .'ends.tab';
  unless (open (OUTREPORT, ">$path{report}")) {
    printf STDERR "ERROR: unable to write to file %s\n", $path{report}||"''";
    return;
  }
  printf "saving tabular report to file %s\n", $path{report}||"''";
  printf OUTREPORT "# report on divergent sequence stretches for alignment project %s\n", $$pProj{name}||"''";
  printf OUTREPORT "# date/time: %s\n", &TimeStr();
  printf OUTREPORT "#\n# column labels:\n# %s\n",
    join ("\t", 'DivPos','dir','length','read(s)');

  # start sequence output
  $path{seq} = $path{stamp} .'ends.fa';
  unless (open (OUTSEQ, ">$path{seq}")) {
    printf STDERR "ERROR: unable to save file %s\n", $path{seq}||"''";
    return;
  }
  printf "saving sequences to file %s\n", $path{seq}||"''";

  # loop over end sequences
  # print to each sequence file and table
  foreach $pSeq (@ReportSec) {

    # print report table
    # no length filtering
    printf OUTREPORT "%s\n", join ("\t", @$pSeq{'pos','dir','len','idlist'});

    # print sequences to fastA file, select by length
    if ($$pSeq{len} < $SlcLen) { next }
    print  OUTSEQ &SeqentryToFasta (
      { sequence => $$pSeq{sequence},
        id       => $$pSeq{id} .'_'. $$pSeq{dir} .'end' },
      -phrase => "consensus pos. $$pSeq{pos}",
      -pure   => 1);
  }
  close OUTREPORT;
  close OUTSEQ;

  ##############################################################################
  # prepare plot of positions of divergent ends

  # count divergent stretches
  # prepare plot data
  foreach $pSeq (@ReportSec) {
    if ($$pSeq{pos} == 0 or $$pSeq{pos} == $iConsLen+1) { next }
    if (@{$plot{x}} and $plot{x}[$#{$plot{x}}] == $$pSeq{pos}) {
      $plot{y}[$#{$plot{x}}] ++;
    } else {
      push @{$plot{x}}, $$pSeq{pos};
      push @{$plot{y}}, 1;
    }
  }

  # prepare plot graph
  %graph = (
    BgTranspar => $ProgOpt{-OutImgTransp},
    plot => [
      { DimPixel  => { x=>$ImgWidth },
        HeightRel => 0.2,
        DataType  => 'HCA',
        data      => { x=>$plot{x}, y=>$plot{y} },
        DataRange => {
          x => [ undef, $iConsLen ],
          y => [ 0 ],
          },
        ReprType  => 'column',
      },
      ],
    scale => [
      { location => 'x',
        PlotNum  => 0,
      },
      { location => 'left',
        PlotNum  => 0,
      },
      { location => 'right',
        PlotNum  => 0,
      },
      ],
    );
  $path{img} = $path{stamp} .'endplot.png';
  if (&Graph (\%graph, -save=>$path{img}, -debug=>$dbg2)) {
    printf "plot of divergent ends saved as %s\n", $path{img}||"''";
  } else {
    printf STDERR "%s. ERROR: failure in plotting divergent ends\n", &MySub;
  }
}


# report project parameters
#
# INTERFACE
# - argument 1: reference to project data
#
sub ReportParam {
  my ($pProj) = @_;

  # print parameter data
  printf "name      %s\n", $$pProj{name}||"''";
  printf "changes:  %s\n", $$pProj{changes} ? 'yes':'no';
  print  "-------------------------------\n";
  print &AlnprojParamSprint ($$pProj{param}, -debug=>($ProgOpt{-debug}?$ProgOpt{-debug}-1:undef));
}


# report statistics for the alignment project
#
# INTERFACE
# - argument 1: reference to alignment project
#
# - global options:
#   -debug      [STD]
#   -noDB       don't refer to database sequences
#
# DESCRIPTION
# - compare SeqHandle.pl -Stat...
#
sub ReportStatist {
  my ($pProj, %opt) = @_;
  my ($debug, $dbg2, $SlcID);
  my ($pSeqarr, $CtEntry, $CtNt);

  # function parameters
  $debug = $ProgOpt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $SlcID = $$pProj{param}{Report}{Statistics}{SelectID};

  # report header
  print  "Statistics Report for alignment project '$$pProj{name}'\n";
  printf "consensus length: %d\n", length &SeqStrPure ($$pProj{consens}{sequence});
  if ($SlcID) {
    printf "ID selector: %s\n", $SlcID;
  }

  # statistics for database
  unless ($ProgOpt{-noDB}) {
    $pSeqarr = &SeqarrFromFFmt ($ProgParam{default}{DbStatist},
      -SlcID => $SlcID,
      -pure  => 1,
      -debug => $dbg2);
    foreach (@$pSeqarr) {
      $CtNt += length $_->{sequence};
    }
    printf "database%s: %s entries (%s %s)\n",
      $SlcID ? ' selection' : '',
      &NumFormat (int @$pSeqarr, -CommaPre=>','),
      &NumFormat (int $CtNt, -CommaPre=>','),
      ($$pProj{param}{SeqType} eq 'protein') ? 'aa residues' : 'nucleotides',
      ;
    undef $pSeqarr;
    undef $CtNt;
  }

  # statistics for alignment
  foreach (@{$$pProj{align}}) {
    if ($_->{id} =~ m/consens/i) { next }
    if (! $SlcID or $_->{id} =~ m/$SlcID/) {
      $CtEntry ++;
      $CtNt += length &SeqStrPure ($_->{sequence});
    }
  }
  printf "alignment%s: %s entries (%s nucleotides)\n",
    $SlcID ? ' selection' : '',
    &NumFormat (int($CtEntry), -CommaPre=>','),
    &NumFormat (int($CtNt), -CommaPre=>','),
    ;
  undef $CtNt;
}


################################################################################
# alignment construction
################################################################################


# add new sequence stretch (from database) to alignment
#
# INTERFACE
# - argument 1: reference to project data
# - argument 2: identifier of sequence to be added
# - argument 3: hash reference with hash containing pair relation info
#   strand      strandedness of sequence to be added ('F' or 'R').
#   StretchOff  position at which the sequence stretch starts in the original
#               database sequence. For a value lower 1 or no value supplied
#               it's assumed to be 1. Notice that the position refers to the
#               original strandedness!
#   StretchEnd  position at which the sequence stretch ends in the original
#               database sequence. For a value lower 1 or no value supplied
#               the stretch is assumed to extend to the end of the database
#               sequence.
#   StretchLen  length of sequence stretch starts in overlap region
#   AlnOff      Like $AlnEnd this value describes the positional relation
#               between the new sequence stretch and the alignment project
#               in it's current form. $AlnOff is the consensus sequence
#               position that correlates with position 1 of the new sequence
#               stretch. A Negative values are allowed here and means that
#               the new sequence starts -$AlnOff bp left beyond the left
#               margin (consensus pos 1) of the current alignment project.
#   AlnEnd      Like $AlnOff this value describes the positional relation
#               between the new sequence stretch and the alignment project
#               in it's current form. Usually, field 'AlnEnd', i.e. the
#               consensus sequence position that correlates with the end of
#               the new sequence stretch, can be derived from $AlnOff and the
#               length of the new sequence stretch. But the calculated value
#               only holds true if there're no insertions / deletions in the
#               new sequence stretch compared to the consensus sequence. So,
#               the optional info field 'AlnEnd' tells the SUB where to expect
#               the end position of the new sequence stretch in relation to the
#               consensus sequence. This helps to work around the problems
#               of calculating the position.
#               If 0 or undef is specified for 'AlnEnd', a default working
#               frame is calculated (which may be wrong). The code manages the
#               sequence stretch being smaller than the alignment working
#               window, but not vice versa!
#
# - options:
#   -debug      [STD]
#   -dialog     enable user dialogue
#   -ThreshStrange  Identity wich should be reached in the consensus/new
#               sequence pair. If it's not fulfilled, the Clustal W working
#               files are left in the temporary directory.
#
# return val:   - result array with:
#                 - relative match of added sequence to consensus sequence
#                 - flag that consensus has changed
#                 and for debug:
#                 - consensus sequence in working block
#                 - new sequence in working block
#               - undef  if an error occurred.
#
# DESCRIPTION
# - the major problem to come around is that Clustal W doesn't support
#   extended end clipping. So, there's a problem to add a sequence to an
#   alignment project that extend far beyond to the left or the right of
#   the current project or to add a small sequence stretch somewhere
#   into a very broad alignment project.
# - At the moment it's unclear what happens if there's a RNA added to
#   an intron containing DNA sequence.
# - arguments 3-6 refer to the alignment working window which means the
#   consensus sequence stretch which takes place in the alignment operation
#   which is done to add the new sequence.
# - Variable set for calculating the working alignment segment:
#   Variables of the form $Aln... describe the positional relation between
#   the new sequence stretch and the alignment project in it's current form
#   where the consensus sequence is used as a master reference. The values
#   are submitted as primary function arguments. They serve to serve to
#   calculate the variables of the sets $WkAln... and $SqAln...
#   Variables of the form $WkAln... describe the positional values of the
#   alignment segment that's sent to Clustal W. The segment may be smaller
#   than the whole project.
#   Variables of the form $SqAln... describe the positional values of the
#   sequence segment that's sent to Clustal W in fastA format. The segment
#   may be smaller than the whole sequence stretch that's to be added to
#   the alignment project.
#
# DEBUG, CHANGES, ADDITIONS
# - have a discussion about counting in computational or biological system.
#
sub AlnprojEntryAdd {
  my ($pProj,$SeqID,$pPairInfo,%opt) = @_;
  my $debug = $opt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  my $bTimer = $ProgOpt{-timer}; my $time;

  my ($sCons, %SeqNew);
  my ($WkAln5len, $WkAlnLen, $WkAln3len, %SeqAln);
  my ($TPathSeq, $TPathClustal, $TPathClustNew, $TPathClustDnd);
  my (%ParamClustal, $ParamClustalPlain, $CallClustal, $ProtClustal);
  my ($LenGap, $SubstrGap, $pProjNewcore, $pAlnTemp);
  my ($NmbNew, $LenNew, $bConsChg, $Identity, $CtI);

  # preliminary check and pre-processing of arguments
  unless ($SeqID) {
    printf STDERR "%s. ERROR: no sequence ID given\n", &MySub;
    return;
  }
  if ($$pPairInfo{StretchOff}<1) { $$pPairInfo{StretchOff}=1 }
  $$pPairInfo{strand} =~ tr/a-z/A-Z/;
  $$pPairInfo{strand} =~ tr/FR//cd;
  unless ($$pPairInfo{strand}) {
    printf STDERR "%s. ERROR: no strandedness given\n", &MySub;
    return;
  }
  $sCons = &SeqStrPure ($$pProj{consens}{sequence});
  if ($$pPairInfo{AlnOff} > length $sCons) {
    printf STDERR "%s. ERROR: alignment working window right off alignment\n", &MySub;
    return;
  }
  if ($$pPairInfo{AlnEnd} and $$pPairInfo{AlnEnd}<$$pPairInfo{AlnOff}) {
    printf STDERR "%s. ERROR: nonsense alignment working window\n", &MySub;
    return;
  }
  if ($debug) {
    printf STDERR "%s. arguments pre-processed and OK\n", &MySub.
      "  \$\$pPairInfo{StretchOff} = %d\n".
      "  \$\$pPairInfo{StretchEnd} = %d\n".
      "  \$\$pPairInfo{AlnOff}     = %d\n".
      "  \$\$pPairInfo{AlnEnd}     = %d\n".
      '',
      $$pPairInfo{StretchOff}, $$pPairInfo{StretchEnd},
      $$pPairInfo{AlnOff}, $$pPairInfo{AlnEnd},
      ;
  }

  # Clustal W parameters
  %ParamClustal = (
    -gapopen => '0.385',
    -gapext  => '2.20',
    -type    => ($$pProj{param}{SeqType} eq 'RNA') ? 'DNA' : $$pProj{param}{SeqType},
    );
  foreach (keys %ParamClustal) {
    $ParamClustalPlain .= " $_=$ParamClustal{$_}";
  }

  # get new sequence from DB
  $bTimer and $time = (times)[0];
  %SeqAln = %{ &SeqOriginal ($pProj, $SeqID, -debug=>$dbg2) };
  $bTimer and printf STDERR "%s. CPU time for retrieving original sequence: %.3f s\n", &MySub, (times)[0]-$time;
  unless ($SeqAln{sequence}) {
    if ($opt{-dialog} or $ProgOpt{-verbose} or $debug) {
      printf STDERR "ERROR: missing database entry %s\n", $SeqID||"''";
    }
    return;
  } elsif ($debug) {
    printf STDERR "%s. retrieved database entry %s\n", &MySub, $SeqID||"''";
  }

  # sequence-dependent parameters, enter sequence stretch
  if ($$pPairInfo{StretchEnd}<1) { $$pPairInfo{StretchEnd} = length($SeqAln{sequence}) }
  $$pPairInfo{StretchLen} = $$pPairInfo{StretchEnd} - $$pPairInfo{StretchOff} + 1;
  if ($$pPairInfo{StretchLen} < 1) {
    printf STDERR "%s. ERROR: no resulting sequence stretch\n", &MySub;
    return;
  }

  # cut out sequence stretch from original sequence
  # prepare complete sequence data
  # $SeqNew is the newly generated sequence entry
  # $SeqAln is a working container
  $SeqNew{id}        = $SeqAln{id};
  $SeqNew{offset}    = $$pPairInfo{StretchOff};
  $SeqNew{orient}    = $$pPairInfo{strand};
  $SeqNew{idcomplex} = $SeqAln{id} .'_'. $$pPairInfo{StretchOff} . $$pPairInfo{strand};
  $SeqNew{sequence}  = '';
  $SeqAln{sequence} = substr ($SeqAln{sequence}, $$pPairInfo{StretchOff}-1, $$pPairInfo{StretchLen});
  $SeqAln{sequence} = ($$pPairInfo{strand} eq 'F') ? $SeqAln{sequence} : &SeqStrRevcompl ($SeqAln{sequence});
  if ($$pPairInfo{AlnOff}<1 and -$$pPairInfo{AlnOff}>length($SeqAln{sequence})) {
    printf STDERR "%s. ERROR: alignment working window left off alignment\n", &MySub;
    return
  }
  if ($debug) {
    printf STDERR "%s. got sequence stretch: $SeqAln{sequence}\n", &MySub;
    printf STDERR "%s. consensus sequence: $sCons\n", &MySub;
  }

  # work out alignment parameters (position values in biological syntax)
  $WkAln5len = ($$pPairInfo{AlnOff} > 0) ? $$pPairInfo{AlnOff}-1 : 0;
  $SeqAln{'5len'} = ($$pPairInfo{AlnOff} < 0) ? -$$pPairInfo{AlnOff} : 0;
  if ($$pPairInfo{AlnEnd}) {
    $WkAlnLen = &Min ($$pPairInfo{AlnEnd}, length $sCons) - &Max($$pPairInfo{AlnOff}, 1) + 1;
  } else {
    $WkAlnLen = &Min (length substr ($sCons, $WkAln5len, length $sCons),
                      length substr ($SeqAln{sequence},  $SeqAln{'5len'}, $$pPairInfo{StretchLen}));
  }
  $WkAln3len = (length $sCons) - $WkAln5len - $WkAlnLen;
  $SeqAln{'3len'} = 0; # this should be ok for Clustal W
                       # otherwise one has to estimate what hangs over beyond left end of alingment
  $SeqAln{len}  = $$pPairInfo{StretchLen} - $SeqAln{'5len'} - $SeqAln{'3len'};
  if ($debug) {
    printf STDERR "%s. secondary parameters:\n", &MySub;
    printf STDERR "  \$\$pPairInfo{StretchOff} = %d\n", $$pPairInfo{StretchOff};
    printf STDERR "  \$\$pPairInfo{StretchEnd} = %d\n", $$pPairInfo{StretchEnd};
    printf STDERR "  \$\$pPairInfo{StretchLen} = %d\n", $$pPairInfo{StretchLen};
    printf STDERR "  \$\$pPairInfo{AlnOff}     = %d\n", $$pPairInfo{AlnOff};
    printf STDERR "  \$\$pPairInfo{AlnEnd}     = %d\n", $$pPairInfo{AlnEnd};
    printf STDERR "  \$WkAln5len               = %d\n", $WkAln5len;
    printf STDERR "  \$WkAlnLen                = %d\n", $WkAlnLen;
    printf STDERR "  \$WkAln3len               = %d\n", $WkAln3len;
    printf STDERR "  \$SeqAln{'5len'}          = %d\n", $SeqAln{'5len'};
    printf STDERR "  \$SeqAln{len}           = %d\n", $SeqAln{len};
    printf STDERR "  \$SeqAln{'3len'}          = %d\n", $SeqAln{'3len'};
  }

  # save new sequence stretch as temporary fastA
  $TPathSeq = $ProgParam{TmpManag}->Create(-touch=>1);
  &WriteFile ($TPathSeq, &SeqentryToFasta (
    { sequence => substr ($SeqAln{sequence}, $SeqAln{'5len'}, $SeqAln{len}),
      id       => $SeqNew{idcomplex} },
    -pure => 1));

  # remove stretch extensions in project alignment
  # - this has to be done here cause the function may be called from AlnK
  #   command line
  # split out working alignment block
  # # fill abundant occurrences of leading gaps with consensus sequence
  # write working alignment block
  $bTimer and $time = (times)[0];
  &AlnprojExtensHide ($pProj);
  ($pProj5end,$pProjCore,$pProj3end) =
    &AlnprojSplit3 ($pProj, $WkAln5len, $WkAlnLen, -consens=>1, -debug=>$dbg2);
  foreach ($pProj5end,$pProjCore,$pProj3end) {
    unless ($_) {
      printf STDERR "%s. ERROR in \&AlnprojSplit3, missing project part(s)\n", &MySub;
      return;
    }
  }
  for ($CtI=0; $CtI<@{$$pProjCore{align}}; $CtI++) {
    if ($$pProjCore{align}[$CtI]{sequence} =~ m/^-*$/) {
      splice @{$$pProjCore{align}}, $CtI, 1;
      $CtI --;
    }
  }
  $TPathClustal = $ProgParam{TmpManag}->Create(-touch=>1);
  &WriteFile ($TPathClustal, &AlnprojClustalSprint ($$pProjCore{align}));
  $bTimer and printf STDERR "%s. CPU time for preparing/saving working alignment block: %.3f s\n", &MySub, (times)[0]-$time;

  # perform alignment using Clustal W
  $bTimer and $time = &Sum ((times)[0,2]);
  $TPathClustNew = $ProgParam{TmpManag}->Create(-touch=>1);
  $CallClustal = "$CorePath{call}{clustalw} -align -quicktree -profile1=$TPathClustal -profile2=$TPathSeq -outfile=$TPathClustNew $ParamClustalPlain";
  $debug and printf STDERR "%s. calling Clustal W with command line:\n  $CallClustal\n", &MySub;
  $ProtClustal = &ReadFile ("$CallClustal |");
  $bTimer and printf STDERR "%s. CPU time for Clustal W subprocess: %.3f s\n", &MySub, &Sum((times)[0,2])-$time;

  # open updated alignment working block
  # pre-analyse alignment (error recognition)
  $bTimer and $time = (times)[0];
  if ($ProtClustal =~ m/^guide tree.*?\[(.*?)\]$/im) {
    $TPathClustDnd = $1;
  } else {
    printf STDERR "%s. WARNING: no Clustal W guide tree found\n", &MySub;
  }
  $pProjNewcore = &AlnprojClustalOpen ($TPathClustNew, undef,
    -dialog => 0,
    -noDB   => 1,
    -debug  => $dbg2);
  unless ($pProjNewcore) {
    printf STDERR "%s. ERROR: unable to open new alignment after Clustal W procedure\n", &MySub;
    printf STDERR "%s. clustal W protocol:\n$ProtClustal", &MySub;
    return;
  }
  if ($debug) {
    printf STDERR "%s. clustal W protocol:\n$ProtClustal", &MySub;
    printf STDERR "%s. alignment check, core alignment and new core:\n", &MySub;
    print  STDERR &AlnprojClustalSprint ($$pProjCore{align});
    print  STDERR &AlnprojClustalSprint ($$pProjNewcore{align});
  }
  $NmbNew = $#{$$pProjNewcore{align}};
  if ($NmbNew != @{$$pProjCore{align}} or
      $$pProjNewcore{align}[$NmbNew]{id} ne $SeqNew{id}) {
    printf STDERR "%s. ERROR: new sequence missing in Clustal W alignment\n", &MySub;
    printf STDERR "%s. number of entries:\n", &MySub;
    printf STDERR "  3' part:       %d\n", int @{$$pProj3end{align}};
    printf STDERR "  core part:     %d\n", int @{$$pProjCore{align}};
    printf STDERR "  new core part: %d\n", int @{$$pProjNewcore{align}};
    printf STDERR "  5' part:       %d\n", int @{$$pProj5end{align}};
    printf STDERR "%s. comparison of project identifiers:\n", &MySub;
    print  STDERR "  ID of new stretch: $SeqNew{id}\n";
    print  STDERR "  ID of last Clustal W entry: $$pProjNewcore{align}[$NmbNew]{id}\n";
    printf STDERR "%s. path of Clustal W alignment file: '$TPathClustNew'\n", &MySub;
    printf STDERR "%s. original Clustal W protocol:\n$ProtClustal", &MySub;
    return;
  }
  &AlnprojExtensHide ($pProjNewcore);
  if (length &SeqStrPure ($$pProjNewcore{consens}{sequence}) != $WkAlnLen) {
    $bConsChg = 1;
  }

  # measure quality of alignment
  @$pAlnTemp = ($$pProjNewcore{consens}, $$pProjNewcore{align}[$NmbNew]);
  $Identity = &AlnIdentity ($pAlnTemp, -LenRef=>1, -uplow=>1, -MatchUnk=>$SeqSmbUnk{$$pProj{param}{SeqType}}, -debug=>$dbg2);

  # delete temporary files
  unless ($debug) {
    unlink ($TPathClustDnd);
    if ($Identity > $opt{-ThreshStrange}) {
      unlink ($TPathSeq, $TPathClustal, $TPathClustNew);
    } else {
      printf STDERR "%s. ERROR: violation of identity threshold\n", &MySub;
      printf STDERR "%s. sequence and alignment files left in temporary directory\n  %s, %s, %s\n", &MySub,
        $TPathSeq, $TPathClustal, $TPathClustNew;
    }
  }

  ################################################################################
  # recompose project

  # re-join project
  push @{$$pProj5end{align}}, { %SeqNew };
  push @{$$pProj3end{align}}, { %SeqNew };
  $pProjCore = &AlnprojJoin ($pProj5end, $pProjNewcore, $pProj3end);
  unless ($pProjCore) {
    printf STDERR "%s. ERROR in re-joint of projects - function \&AlnprojJoin\n", &MySub;
    return;
  }
  if ($debug) {
    printf STDERR "%s. joint check, new alignment:\n", &MySub;
    print  STDERR &AlnprojClustalSprint ($$pProjCore{align});
  }

  # insert hang-over sequence stretch 5'
  $$pProjCore{align}[$NmbNew]{sequence} =~ m/^-*/;
  if ($SeqAln{'5gap'} = length $&) {
    $SeqAln{'5len'} -= $SeqAln{'5gap'};
    if ($SeqAln{'5len'} < 0) {
      $SeqAln{'5space'}  = -$SeqAln{'5len'};
      $SeqAln{'5len'}  =  0;
      $SeqAln{'5gap'} -=  $SeqAln{'5space'};
    }
    substr ($$pProjCore{align}[$NmbNew]{sequence}, $SeqAln{'5space'}, $SeqAln{'5gap'}) =
      substr ($SeqAln{sequence}, $SeqAln{'5len'}, $SeqAln{'5gap'});
    $debug and printf STDERR "%s. gap parameters for 5' hang-over insertion:\n", &MySub.
      "  \$SeqAln{'5len'}   = %d\n".
      "  \$SeqAln{'5space'} = %d\n".
      "  \$SeqAln{'5gap'}   = %d\n".
      '', $SeqAln{'5len'}, $SeqAln{'5space'}, $SeqAln{'5gap'};
    $SeqAln{len} += $SeqAln{'5gap'};
  }
  if ($SeqAln{'5len'}) {
    foreach (@{$$pProjCore{align}}) {
      substr ($_->{sequence}, 0, 0) = '-' x $SeqAln{'5len'};
    }
    substr ($$pProjCore{align}[$NmbNew]{sequence}, 0, $SeqAln{'5len'}) = substr ($SeqAln{sequence}, 0, $SeqAln{'5len'});
    $SeqAln{len} += $SeqAln{'5len'};
    $debug and printf STDERR "%s. edge insertion of %d bp 5' of working frame\n", &MySub, $SeqAln{'5len'};
  }

  # insert hang-over sequence stretch 3'
  $$pProjCore{align}[$NmbNew]{sequence} =~ m/-*$/;
  if ($SeqAln{'3gap'} = length $&) {
    $SeqAln{'3len'} -= $SeqAln{'3gap'};
    if ($SeqAln{'3len'} < 0) {
      $SeqAln{'3space'}  = -$SeqAln{'3len'};
      $SeqAln{'3len'}    =  0;
      $SeqAln{'3gap'}   -=  $SeqAln{'3space'};
    }
    substr ($$pProjCore{align}[$NmbNew]{sequence}, -$SeqAln{'3gap'}-$SeqAln{'3space'}, $SeqAln{'3gap'}+$SeqAln{'3space'}) =
      substr ($SeqAln{sequence}, $SeqAln{'5space'}+$SeqAln{len}, $SeqAln{'3gap'}) . ('-' x $SeqAln{'3space'});
    $debug and printf STDERR "%s. gap parameters for 3' hang-over insertion:\n", &MySub.
      "  \$SeqAln{'3gap'} = %d\n".
      "  \$SeqAln{'3space'} = %d\n".
      "  \$SeqAln{'3len'} = %d\n".
      '', $SeqAln{'3gap'}, $SeqAln{'3space'}, $SeqAln{'3len'};
    $SeqAln{len} += $SeqAln{'3gap'};
  }
  if ($SeqAln{'3len'}) {
    $$pProjCore{align}[$NmbNew]{sequence} .= substr ($SeqAln{sequence}, $SeqAln{'5space'}+$SeqAln{len}, $SeqAln{'3len'});
    $debug and printf STDERR "%s. edge insertion of %d bp 3' of working frame\n", &MySub, $SeqAln{'3len'};
  }

  $bTimer and printf "%s. CPU time for reconstructing alignment: %.3f s\n", &MySub, (times)[0]-$time;

  ################################################################################
  # replace master project
  %{$pProj} = %{$pProjCore};	
  $$pProj{changes} = 1;

  # exit SUB
  return ($Identity,$bConsChg,$$pProjNewcore{consens}{sequence},$$pProjNewcore{align}[$NmbNew]{sequence});
}
# $Id: Align.pl,v 1.28 2008/06/11 08:44:57 szafrans Exp $
