################################################################################
#
#  kPerl Sequence Laboratory
#  Library for Sequence Analysis
#
#  copyright (c)
#  Karol Szafranski on behalf of IMB Jena, Dept. Genome Analysis, 2001,2003-2004,
#    szafrans@imb-jena.de
#  Karol Szafranski on behalf of FLI Jena, Genome Analysis Group, 2006,
#    szafrans@fli-leibniz.de
#
################################################################################
#
#  DESCRIPTION
#
# - individual description of functions can be found at the beginning of the
#   code blocks
#
################################################################################
#
#  FUNCTIONS, DATA
#
#   @EXPORT
#
# - feature report and statistics
#   &SeqCodeCount
#   &CdsStruct
#   &CdsFromGff  (not exported)
#
# - molecule masses
#   %TabNtMass  (not exported)
#   %TabProtMass  (not exported)
#   &SeqMass
#
# - chemical properties of proteins
#   %TabProtAcid  (not exported)
#   %TabProtHydrophob  (not exported)
#   &ProtPlotAcid
#   &ProtPlotHydrophob
#
#
#  STD OPTIONS
#
#   -debug      print debug protocol to STDERR
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - look also for notes in the header of each function block
#
################################################################################

package SeqLab::SeqAnalysis;

# includes
use strict; #use warnings;  # OK 20040506
use MainLib::Data qw(&DataTreeSlc &DataPrint);
use MainLib::Misc qw(&MySub);
use Math::Calc;
use SeqLab::SeqBench;
use SeqLab::SeqFormat qw(&SeqentryToFasta &AnnotExpandTscr);

# symbol export
use Exporter qw(import);
our @EXPORT = qw (
  &SeqCodeCount &CdsStruct
  &SeqMass
  &ProtPlotAcid &ProtPlotHydrophob
  );


################################################################################
# feature report and statistics
################################################################################


# count letter frequencies in a sequence
#
# INTERFACE
# - argument 1: sequence string
#
# - options:
#   -debug      [STD]
#   -pure       purify sequence prior to analysis
#               You may specify a sequence type. Then, fuzzy letters are
#               converted to official 'unknowns'.
#   -upper      unify upper/lower case symbols
#
# - return val: hash with frequency counts for all characters.
#
sub SeqCodeCount {
  my ($sSeq, %opt) = @_;
  my ($debug);
  my ($iSeqLen, $CtI, %CtFreq);

  # function parameters
  $debug = $opt{-debug};

  # prepare sequence string
  $opt{-pure} and
    $sSeq = &SeqStrPure ($sSeq, -SeqType=>$opt{-pure});
  $opt{-upper} and $sSeq =~ tr/a-z/A-Z/;

  # split to single characters
  $iSeqLen = length ($sSeq);
  if (length ($sSeq) > 1_000_000) {
    for ($CtI=0; $CtI<$iSeqLen; $CtI++) {
      $CtFreq{substr($sSeq,$CtI,1)} ++;
    }
  }
  # this is slightly faster than iteration over substrings
  # But, it's highly memory consuming on long sequences
  else {
    foreach (split (//, $sSeq)) {
      $CtFreq{$_} ++;
    }
  }
  if ($debug) {
    printf STDERR "%s. letter frequency: ", &MySub;
    printf STDERR "  %s\n", join (' ', map { "$_=$CtFreq{$_}" } sort keys %CtFreq);
  }

  # return result
  return \%CtFreq;
}


# develop CDS model data structure and statistics for annotated nt sequences
#
# INTERFACE
# - argument 1: - reference to array of sequence data structures
#                 array will be shrunk to ()!
#               - reference to SeqStreamIn object
#
# - options:
#   -debug      [STD]
#   -IncError   include CDS data that have shown errors
#
# - return val: - reference to data structure
#               - undef if an error occurs
#
# DESCRIPTION
# - Annotation is needed with label 'CDS' marking the sequence range in
#   GenBank fashion. Conversions will be done from:
#   - Experiment format, Dicty-fashion annotation
#   - GFF format, geneID-fashion annotation and (hopefully) most others
# - The input sequences may be gapped and this won't affect the CDS analysis
#   nor will the sequence be changed in this respect. But note: all listed
#   sequence elements will be reported in the original gapped status.
#   Regarding the problems in assigning feature positions in gapped sequences,
#   it's safest to use purified sequences as an input to this function.
#
sub CdsStruct {
  my ($SigSrrd);
  my ($SeqSrcArg, %opt) = @_;
  my ($debug, $dbg2);
  my ($pSeq, @AnnotCds, $pAnnot, $pSeqCds);
  my ($CtCds, %pos, $CtExon, $bRevcompl);
  my (%struct);

  # function constants
  $SigSrrd = 20;

  # function parameters
  $debug = $opt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  ##############################################################################
  # sequence loop control
  # do format conversions first

  while (1) {  # loop over sequences
    if      (ref($SeqSrcArg) eq 'SeqLab::SeqStreamIn') {
      $pSeq = $SeqSrcArg->GetNext();
    } elsif (ref($SeqSrcArg) eq 'ARRAY') {
      $pSeq = shift @$SeqSrcArg;
    } else {
      printf STDERR "%s. ERROR: unknown argument type %s\n", &MySub, ref($SeqSrcArg)||"''";
      return undef;
    }
    unless ($pSeq and %$pSeq) { last }
    $debug and printf STDERR "%s. sequence %s\n", &MySub, $$pSeq{id}||"''";

    # convert from non-CDS-coded annotation formats
    unless (@AnnotCds = grep { $_->{type} eq 'CDS' and defined ($_->{range}) } @{$$pSeq{annot}}) {
      if (0) { }
      elsif ($$pSeq{SrcFmt} eq 'Experiment') {
        &AnnotExpandTscr ($pSeq, -debug=>$debug);
        @AnnotCds = sort { ($a->{range} =~ m/(\d+)/)[0] <=> ($b->{range} =~ m/(\d+)/)[0] }
          map { (exists $_->{CDS}) ? $_->{CDS} : () } values %{$$pSeq{AnnotGrp}};
      }
      elsif ($$pSeq{SrcFmt} eq 'GenBank') {
        # for source format 'GenBank' we should never get here
        printf STDERR "%s. WARNING: sequence %s\n", &MySub, $$pSeq{id}||"''";
      }
      elsif ($$pSeq{SrcFmt} eq 'GFF') {
        @AnnotCds = &CdsFromGff ($pSeq, -debug=>$debug);
      }
    }

    # minimise sequence description entry
    $$pSeq{descr} =~ s/(, )?\d+ letters$//;

    # loop over CDS annotations
    undef $CtCds;
    foreach $pAnnot (@AnnotCds) {
      $debug and printf STDERR "%s. CDS in %s, range %s\n", &MySub, $$pSeq{id}||"''", $$pAnnot{range};

  ##############################################################################
  # check consistency of CDS

      # get CDS sequence
      unless ($pSeqCds = &SeqCplxRange ($pSeq, $$pAnnot{range}, -TrackPos=>1, -debug=>$dbg2)) {
        printf STDERR "%s. seq range ERROR, seq %s, range %s\n", &MySub,
          $$pSeq{id}||"''", $$pAnnot{range};
        next;
      }

      # check resulting translated sequence
      # &TranslNt purifies the sequence string prior to proecessing
      $$pSeqCds{SeqProt} = &TranslNt ($$pSeqCds{sequence});
      if ($$pSeqCds{SeqProt} =~ m/^(\w+)(\*)?$/) {
        unless ($2) {
          printf STDERR "%s. WARNING: CDS doesn't end with stop codon, seq %s, range %s\n", &MySub,
            $$pSeq{id}||"''", $$pAnnot{range};
        }
        $$pSeqCds{SeqProt} = $1;
        unless (substr ($$pSeqCds{SeqProt}, 0, 1) eq 'M') {
          printf STDERR "%s. WARNING: CDS doesn't start with aa M, seq %s, range %s\n", &MySub,
            $$pSeq{id}||"''", $$pAnnot{range};
        }
      }

      # resulting in bad translation
      else {
        printf STDERR "%s. ERROR: CDS shows stop codon at pos. %d of %d, seq %s, range %s\n", &MySub,
          do { $$pSeqCds{SeqProt} =~ m/\*/ and (length($`) * 3); }, length $$pSeqCds{sequence},
          $$pSeq{id}||"''", $$pAnnot{range};
        printf STDERR "  sequence %s\n", $opt{-IncError} ? 'included':'skipped';
        if ($debug||1) {
          printf STDERR "  nt sequence: %s\n", $$pSeqCds{sequence};
          printf STDERR "  protein sequence: %s\n", $$pSeqCds{SeqProt};
        }
        if ($opt{-IncError}) {
          $$pSeqCds{descr} .= ($$pSeqCds{SeqProt} ? ', ':"") . 'ERROR in translation';
        } else {
          next;
        }
      }

  ##############################################################################
  # data sampling
  # - exon
  # - intron
  # - summarise CDS

      undef %pos;
      undef $CtExon;

      # prepare batch of exons from positional information
      $bRevcompl = 0;
      if ($$pAnnot{range} =~ m/^complement\((.+)\)$/) {
        $bRevcompl = 1;
        $$pAnnot{range} = $1;
      }
      if ($$pAnnot{range} =~ m/^complement/) {
        printf STDERR "%s. ERROR: no implementation for innerly complemented annotation positions!\n", &MySub;
        exit 1;
      }
      if ($$pAnnot{range} =~ m/^join\((.+)\)$/) {
        $pos{batch} = [ split (/,/, $1) ];
        $bRevcompl and @{$pos{batch}} = reverse @{$pos{batch}};
      } else {
        $pos{batch} = [ $$pAnnot{range} ];
      }

      # loop: extract gene features
      delete $pos{LastTail};
      foreach (@{$pos{batch}}) {
	$pos{StartEnd} = $_;
	($pos{from}, $pos{to}) = map { s/^[<>]//; $_; }
          split (/\.{2}/, $pos{StartEnd});

        # exon
        push @{$struct{exon}}, {
          id        => $$pSeq{id},
          idfeat    => sprintf ('g%de%d', $CtCds, $CtExon),
          cdsidnum  => $CtCds,
          exonidnum => $CtExon,
          orient    => $bRevcompl ? '-1' : '+1',
          from      => $pos{from},
          to        => $pos{to},
          sequence  => $bRevcompl ?
                       &SeqStrRevcompl (substr ($$pSeq{sequence}, $pos{from}-1, $pos{to}-$pos{from}+1)) :
                       substr ($$pSeq{sequence}, $pos{from}-1, $pos{to}-$pos{from}+1),
          };
        $struct{exon}[-1]{idfeatfull} = $$pSeq{id} .'_'. $struct{exon}[-1]{idfeat};

        # we are at the >= 2nd exon
        # => we start to extract intron features
        if ($pos{LastTail}) {

          # intron
          push @{$struct{intron}}, {
            id        => $$pSeq{id},
            idfeat    => sprintf ('g%di%d', $CtCds, $CtExon-1),
            cdsidnum  => $CtCds,
            intronnum => $CtExon - 1,
            orient    => $bRevcompl ? '-1' : '+1',
            from      => $bRevcompl ? $pos{to}+1 : $pos{LastTail}-1,
            to        => $bRevcompl ? $pos{LastTail}+1 : $pos{from}-1,
            sequence  => $bRevcompl ?
                         &SeqStrRevcompl (substr ($$pSeq{sequence}, $pos{to}, $pos{LastTail}-$pos{to}-1)) :
                         substr ($$pSeq{sequence}, $pos{LastTail}, $pos{from}-$pos{LastTail}-1),
            };
          $struct{intron}[-1]{idfeatfull} = $$pSeq{id} .'_'. $struct{intron}[-1]{idfeat};
          $struct{intron}[-1]{SD} = substr ($struct{intron}[-1]{sequence},  0, 2);
          $struct{intron}[-1]{SA} = substr ($struct{intron}[-1]{sequence}, -2, 2);
          $struct{SeqIntron} .= &SeqentryToFasta ($pSeq, -KeyId=>'idfeatfull');

          # splice donor
          push @{$struct{donor}}, {
            id         => $$pSeq{id},
            idfeat     => $struct{intron}[-1]{idfeat},
            cdsidnum   => $CtCds,
            intronnum  => $struct{intron}[-1]{intronnum},
            orient     => $struct{intron}[-1]{orient},
            from       => $struct{intron}[-1]{from},
            to         => $struct{intron}[-1]{to},
            sequence   => $struct{intron}[-1]{SD}{sequence}
                          . substr ($struct{intron}[-1]{sequence}, 2, $SigSrrd),
            };
          if (! $bRevcompl) {
            substr ($struct{donor}[-1]{sequence}, 0, 0)
              = substr ($$pSeq{sequence}, &Min (0, $pos{from}-$SigSrrd), &Min ($pos{from}, $SigSrrd));
            # quite complicated string re-modeling:
            # what if SigSrrd exceeds left sequence end?
          } else {
            # quite complicated string re-modeling
          }

          # splice acceptor
          push @{$struct{accept}}, {
            id         => $struct{intron}[-1]{id},
            idfeat     => $struct{intron}[-1]{idfeat},
            cdsidnum   => $struct{intron}[-1]{cdsidnum},
            intronnum  => $struct{intron}[-1]{intronnum},
            descr      => $struct{intron}[-1]{descr},
            orient     => $struct{intron}[-1]{orient},
            from       => $struct{intron}[-1]{from},
            to         => $struct{intron}[-1]{to},
            sequence   => substr ($struct{intron}[-1]{sequence}, -$SigSrrd-2, $SigSrrd)
                          . $struct{intron}[-1]{SA}{sequence},
            };
          if (! $bRevcompl) {
            # quite complicated string re-modeling:
            # what if SigSrrd exceeds left sequence end?
          } else {
            # quite complicated string re-modeling
          }
        }

        # continue loop
        $CtExon ++;
        $pos{LastTail} = $bRevcompl ? $pos{from} : $pos{to};
      }

      # summarise CDS
      push @{$struct{cds}}, {
        id       => $$pSeq{id},
        idfeat   => sprintf ('g%d', $CtCds),
        cdsidnum => $CtCds,
        exonnum  => $CtExon,
        descr    => $$pSeqCds{descr},
        orient   => $bRevcompl ? '-1' : '+1',
        range    => $$pAnnot{range},
        sequence => $$pSeqCds{sequence},
        SeqProt  => $$pSeqCds{SeqProt},
        };
      $struct{cds}[-1]{idfeatfull} = $$pSeq{id} .'_'. $struct{cds}[-1]{idfeat};
      $CtCds ++;
    }

  }  # seq loop

  ##############################################################################
  # data summary
  # re-loop sometimes

  # sort data
  @{$struct{cds}} = sort { ($a->{range}=~m/(\d+)/)[0] <=> ($b->{range}=~m/(\d+)/)[0] } @{$struct{cds}};
  @{$struct{exon}} = sort { $a->{from} <=> $b->{from} } @{$struct{exon}};
  @{$struct{intron}} = sort { $a->{from} <=> $b->{from} } @{$struct{intron}};

  # get special samples: intron lengths, # exons per CDS
  $struct{IntronLength} = [ map { length $_ }
    @{ &DataTreeSlc ($struct{intron}, [['','all'], ['sequence']]) }
    ];
  $struct{ExonPerCds} = [
    @{ &DataTreeSlc ($struct{cds}, [['','all'], ['exonnum']]) }
    ];

  # translate into sequence & annotation format:
  # - sequences: introns, CDS stopped, protein
  # - sequences: CDS non-stopped (*** implement me ***)
  # - GFF: simple
  # - GFF: Barcelona (*** implement me ***)
  foreach $pSeq (@{$struct{intron}}) {
    $struct{SeqIntron} .= &SeqentryToFasta ($pSeq, -KeyId=>'idfeatfull');
  }
  foreach $pSeq (@{$struct{cds}}) {
    $struct{SeqCds} .= &SeqentryToFasta ($pSeq, -KeyId=>'idfeatfull');
    $struct{SeqProt} .= &SeqentryToFasta ($pSeq, -KeyId=>'idfeatfull', -KeySeq=>'SeqProt');
  }
  foreach $pSeq (@{$struct{exon}}) {
    $struct{ExonGff} .= sprintf ("%s\n", join ("\t",
      $$pSeq{id},
      'ANNOTATED', 'exon',
      $$pSeq{from}, $$pSeq{to}, '.',
      &SignChar ($$pSeq{orient}, -allow0=>1), '.',
      do { $$pSeq{idfeat} =~ m/^g\d+/ and $&; },
      ));
  }

  # return result
  return \%struct;
}


# work out CDS annotation from GFF format
#
# INTERFACE
# - argument 1: reference to sequence data structure
#
# - options:
#   -debug      [STD]
#
# - return val: array of CDS annotations
#
# DESCRIPTION
# - the target annotation syntax is that of GenBank file format
# - The input sequence may be gapped and this won't affect the CDS analysis
#   nor will the sequence be changed in this respect.
#
# DEBUG, CHANGES, ADDITIONS
# - we may drop this and call &AnnotExpandTsc instead
#
sub CdsFromGff {
  my ($pSeq, %opt) = @_;
  my ($debug);
  my ($pAnnot, $pGrpIdx, $pGrp);
  my (%exon, %pos);
  my (@cds);

  # function parameters
  $debug = $opt{-debug};

  # group features
  foreach $pAnnot (@{$$pSeq{annot}}) {
    $$pAnnot{type} =~ m/^(cds(_exon)?|exon(_cds)?|first|internal|single|terminal)$/i or next;
    $debug and printf STDERR "%s. got exon annotation, seq %s, group %s, range %d..%d\n", &MySub,
      $$pSeq{id}||"''", $$pAnnot{group}||"''", $$pAnnot{offset}, $$pAnnot{end};
    push @{$$pGrpIdx{$$pAnnot{group}}}, $pAnnot;
  }
  # anything to do?
  if (int (keys %$pGrpIdx) == 0 or
     (int (keys %$pGrpIdx) == 1 and (keys %$pGrpIdx)[0] eq '.')) {
    return ();
  }
  # sort
  foreach $pGrp (values %$pGrpIdx) {
    @$pGrp = sort { $a->{offset} <=> $b->{offset} } @$pGrp;
  }

  # loop over groups (CDSs) - sorted
  foreach $pGrp (sort { $a->[0]{offset} <=> $b->[0]{offset} } values %$pGrpIdx) {

    # preliminary start/stop?
    # - we need the exons sorted here (should've been done above)
    $exon{first} = ($$pGrp[0]{orient} < 0) ? $$pGrp[$#{$pGrp}] : $$pGrp[0];
    if ($exon{first}{type} =~ m/^(internal|terminal)$/i) {  # this is geneid syntax
      if ($exon{first}{orient} < 0) {
        $debug and printf STDERR "%s. moving gene offset %d\n", &MySub, -$exon{first}{frame};
        $exon{first}{end}    -= $exon{first}{frame};
      } else {
        $debug and printf STDERR "%s. moving gene offset %d\n", &MySub, $exon{first}{frame};
        $exon{first}{offset} += $exon{first}{frame};
      }
    }
    $exon{last} = ($$pGrp[0]{orient} < 0) ? $$pGrp[0] : $$pGrp[$#{$pGrp}];
    if ($exon{first}{type} =~ m/^(first|internal)$/i) {  # this is geneid syntax
    }

    # join features to CDS
    push @cds, {
      type   => 'CDS',
      range  => join (',', map { $_->{offset}.'..'.$_->{end} }
                sort { $a->{offset} <=> $b->{offset} } @$pGrp),
      text   => "gene=$$pGrp[0]{group}",
      };
    if (int @$pGrp > 1) { $cds[-1]{range} = "join($cds[-1]{range})"; }
    if ($$pGrp[0]{orient} < 0) { $cds[-1]{range} = "complement($cds[-1]{range})"; }
  }

  # return CDS annotations
  return @cds;
}


################################################################################
# molecule masses
################################################################################


# nucleotide Dalton masses
#
my %TabNtMass = (
  A => 329.2,
  C => 305.2,
  G => 345.2,
  T => 304.2,
  U => 290.2,
  );

my %TabNtMassStrands;


# amino acid Dalton masses
#
my %TabProtMass = (
  A =>  71.08,
  B => 114.6,  # mean of asparagine/aspartate
  C => 103.1,
  D => 115.1,
  E => 129.1,
  F => 147.2,
  G =>  57.05,
  H => 137.1,
  I => 113.2,
  K => 128.2,
  L => 113.2,
  M => 131.2,
  N => 114.1,
  P =>  97.12,
  Q => 128.1,
  R => 156.2,
  S =>  87.08,
  T => 101.1,
  U => 150.00, # selenocysteine
  V =>  99.07,
  W => 186.2,
  Y => 163.2,
  Z => 128.6,  # mean of glutamine/glutamate
  );


# convert sequence to mass value (nt or protein)
#
# INTERFACE
# - argument 1: reference to sequence data structure
#
# - options:
#   -debug      [STD]
#   -strands    strandedness of nucleotide sequence
#                0  default: treat sequence as double-stranded
#               -1  treat sequence as single-stranded (anti-sense)
#                1  treat sequence as single-stranded (sense)
#
# - return val: mass value (Dalton)
#
# DESCRIPTION
# - The sequence type may be retrieved from field $$pSeq{SeqType} if
#   it's defined. Be sure that the field contains valid information
# - The sequence string may contain gaps, it does not matter.
#
sub SeqMass {
  my ($pSeq,%opt) = @_;
  my $debug = $opt{-debug};
  my $SeqType = $SeqTypeBasic{$$pSeq{SeqType}||''}
    || &SeqType ($$pSeq{sequence}, -basic=>1);
  my $SeqMass = 0;

  # do mass calculation - nt sequence
  if ($SeqType eq 'nucleotide') {
    my @strands = ($opt{-strands});
    if (!int(@strands) or !defined($strands[0])) { @strands = (-1,1) }

    # first time we analyse a nucleotide sequence?
    # construct secondary dictionary
    if (! %TabNtMassStrands) {
      $TabNtMassStrands{'1'} = $TabNtMassStrands{'+1'} = { %TabNtMass };
      foreach my $itSmb (keys %TabNtMass) {
        $TabNtMassStrands{'-1'}{&SeqStrRevcompl($itSmb)} = $TabNtMass{$itSmb};
      }
    }

    # calculate mass of molecule
    for (my $CtPos=0; $CtPos<length($$pSeq{sequence}); ++$CtPos) {
      my $smb = substr ($$pSeq{sequence}, $CtPos, 1);
      if (exists $TabNtMass{$smb}) {
        foreach my $itStrand (@strands) {
          $SeqMass += $TabNtMassStrands{$itStrand}{$smb};
        }
      } else {
        if ($smb !~ m/[a-z]/i) { next }
        printf STDERR "%s. WARNING: cannot handle symbol %s in nucleotide sequence\n", &MySub,
          $smb;
      }
    }
  }

  # do mass calculation - protein sequence
  elsif ($SeqType eq 'protein') {

    # calculate mass of molecule
    for (my $CtPos=0; $CtPos<length($$pSeq{sequence}); ++$CtPos) {
      my $smb = substr ($$pSeq{sequence}, $CtPos, 1);
      if (exists $TabProtMass{$smb}) {
        $SeqMass += $TabProtMass{$smb};
      } else {
        if ($smb !~ m/[a-z]/i) { next }
        printf STDERR "%s. WARNING: cannot handle symbol %s in nucleotide sequence\n", &MySub,
          $smb;
      }
    }
  }

  # do transformation, complicated version
  else {
    die sprintf "%s. ERROR: no implementation for sequence type $SeqType\n", &MySub;
  }
  
  return $SeqMass;
}


################################################################################
# chemical properties of proteins
################################################################################


# amino acid residue acidity
#
# DESCRIPTION
# - simplified view: approximation of ionic charge for given aa
#
my %TabProtAcid = (
  A =>  0,
  C =>  0.5,
  D =>  1,
  E =>  1,
  F =>  0,
  G =>  0,
  H => -0.5,
  I =>  0,
  K => -1,
  L =>  0,
  M =>  0,
  N =>  0,
  P =>  0,
  Q =>  0,
  R => -1,
  S =>  0,
  T =>  0,
  V =>  0,
  W =>  0,
  Y =>  0.5,
  );


# amino acid residue hydrophobicity
#
# DESCRIPTION
# - from ...
#
my %TabProtHydrophob = (
  A =>  1.8,
  C =>  2.5,
  D => -3.5,
  E => -3.5,
  F =>  2.8,
  G => -0.4,
  H => -3.2,
  I =>  4.5,
  K => -3.9,
  L =>  3.8,
  M =>  1.9,
  N => -3.5,
  P => -1.6,
  Q => -3.5,
  R => -4.5,
  S => -0.8,
  T => -0.7,
  V =>  4.2,
  W => -0.9,
  Y => -1.3,
  );


# return plot of amino acid residue acidity
#
# INTERFACE
# - argument 1: protein sequence
# - return val: - reference to array (table type A1y)
#               - undef if an error occurred
#
sub ProtPlotAcid {
  my ($SeqProt, %opt) = @_;
  my ($aa, @plot);

  # read values from table
  foreach $aa (split //, $SeqProt) {
    push @plot, $TabProtAcid{$aa};
  }

  return \@plot;
}


# return plot of amino acid residue hydrophobicity
#
# INTERFACE
# - argument 1: protein sequence
# - return val: - reference to array (table type A1y)
#               - undef if an error occurred
#
sub ProtPlotHydrophob {
  my ($SeqProt, %opt) = @_;
  my ($aa, @plot);

  # read values from table
  foreach $aa (split //, $SeqProt) {
    push @plot, $TabProtHydrophob{$aa};
  }

  return \@plot;
}


1;
# $Id: SeqAnalysis.pm,v 1.8 2007/09/29 10:12:22 szafrans Exp $
