################################################################################
#
#  kPerl Sequence Laboratory
#  Object Library for IUPAC-Coded Sequence Motif
#
#  copyright (c)
#    Karol Szafranski, 2006
#    Fritz Lipmann Institute Jena, Genome Analysis Group, 2006
#    Institute of Molecular Biotechnology Jena, Dept. Genome Analysis, 2002,2004
#    Institute of Molecular Biotechnology Jena, Dept. Genome Analysis, 1998-2002 (former module MotifRE.pm)
#    Karol Szafranski and Inst. Physiol. Chem., Univ. Dsseldorf, 1997-1998
#      (former module MotifRE.pm)
#    Tobias Schaefer and Inst. Physiol. Chem., Univ. Dsseldorf, 1998
#      (function &MtfProbab, now framed by method &ScoreExpect)
#  author
#    Tobias Schaefer
#    Karol Szafranski, karol.szafranski@leibniz-fli.de
#
################################################################################
#
#  DESCRIPTION
#
# - purpose:
#   perform searches with IUPAC-coded sequence motifs.
#
# - individual description of functions can be found at the beginning of the
#   code blocks
#
################################################################################
#
#  OBJECT OPERATORS
#
#   *** NONE ***
#
#
#  OBJECT METHODS  for external access
#
# - housekeeping
#   new           create object, initialize via ini(@args)
#   ini           initialize object with motif definition data structure
#   AddSwitch     modify object options
#   Clone         return copy of object
#
# - data management
#   Def           read/write definition, several definition categories
#                 possible (option -DefType):
#                   expanded   expanded syntax using standard symbols and
#                              brackets/slashes for wobble positions
#                   IUPAC      (default)
#                   regexp     regular expression
#   def_parse     parse motif definition
#   DefType       return type of motif definition (last name of package)
#   DefSeqType    read sequence type of motif definition, fixed to "nucleotide"
#   ID            read/write motif identifier
#   ScoreExpect   return hit expectancy
#   Width         sequence width of the motif
#   Valid         validity status of the motif definition
#                 *** fake implementation ***
#   revcompl      return object for reverse-complement motif
#
# - functionality
#   Search        return hits for search on sequence using motif
#
#
#  OBJECT DATA STRUCTURE
#  (hash)
#
#   def           reference on motif definition data
#     id            motif identifier
#     motif         IUPAC definition string
#     orient        motif orientation: +1 / -1
#                   always implicitly +1 in file format
#   re            regular expression equivalent of motif
#   revcompl      memory for reverse-complement motif object
#   ScoreExpect   hit expectancy
#   SeqHits       count of hits in scanned sequences
#   SeqScanned    count of scanned sequences
#   switch        hash reference for object switches, cf. method AddSwitch()
#     -debug        print debug protocol to STDERR
#
################################################################################
#
#  FUNCTIONS, DATA
#
#   @ISA
#   @EXPORT_OK
#   %_LibParam
#
# - housekeeping
#   &new   see MainLib::DefaultObjHash.pm
#   &ini
#   &AddSwitch
#   &_LocalSwitch  see MainLib::DefaultObjHash.pm
#   &Clone
#
# - motif definition syntax
#   description: motifs and regular expressions
#   &MtfToSeqarr (not a method)
#
# - data management
#   &Def
#   &def_parse
#   &DefType
#   &DefSeqType
#   &ID
#   &ScoreExpect
#    &MtfSymmetry (not a method)
#    &MtfProbab (not a method)
#   &Thresh
#   &Valid
#   &Width
#   &revcompl
#
# - search
#   description: motif search result data struture  see MotifLib.pm
#   &Search
#   &SearchStrand
#
#
#  STD OPTIONS
#
#  -debug      print debug protocol to STDERR
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - look also for notes in the header of each function block
#
################################################################################

package SeqLab::MotifIUPAC;

# includes
use strict; #use warnings;  # OK 20070206
use MainLib::Data qw(&DataClone &DataPrint);
use MainLib::DefaultObjHash;
use MainLib::Misc qw(&MySub);
use Math::kCalc;
use SeqLab::SeqBench;

# inheritance
our @ISA;
push @ISA, qw(MainLib::DefaultObjHash);

# symbol export
# - only for non-method functions
# - no default export
use Exporter qw(import);
our @EXPORT_OK = qw (&MtfToSeqarr &MtfProbab);

# package-wide constants and variables
my %_LibParam;


################################################################################
# housekeeping
################################################################################


# parametric initialization
#
# INTERFACE
# - argument 1: source argument:
#               - reference to data structure
#                 see explanation on top passage about $this->{def}
#               - reference to an object of __PACKAGE__
#                 This will work like $this->Clone()
#
# - options:    handled by $this->AddSwitch()
#
# - return val: - object reference
#               - undef if an error occured
#
sub ini {
  my ($this,$arg,%opt) = @_;
  %$this = ();
  $this->AddSwitch(%opt);
  my $debug = $this->{switch}{-debug} || $opt{-debug};

  if (0) { }

  # initialize object with definition data structure
  elsif (ref($arg) eq 'HASH') {
    foreach my $key (grep{ exists($arg->{$_}) } qw(id motif orient)) {
      $this->{def}{$key} = $arg->{$key};
    }
    my $mtf = $this->{def}{motif} || $arg->{def};
    my $ret = ($mtf=~m/\W/)? $this->def_parse($mtf) : $this->Def($mtf,-DefType=>'IUPAC');
    unless ($ret) {
      die sprintf "%s. ERROR in initialization (method %s()), definition \"%s\"\n", &MySub,
        $mtf, ($mtf=~m/\W/)? 'def_parse':'Def';
    }
    $this->{def}{orient} ||= +1;
  }

  # initialize object from existing object
  elsif (ref($arg) eq __PACKAGE__) {
    $debug and printf STDERR "%s. initialising with existing object (ref: %s, ID: %s)\n", &MySub,
      ref($arg)||"''", $arg->ID()||"''";
    return $arg->Clone();
  }

  # initialization error
  else {
    die sprintf "%s. ERROR: unknown initialization argument, type %s\n", &MySub,
      ref($arg)||'SCALAR';
  }

  # return
  return $this;
}


# enter object switches
#
# INTERFACE
# - argument 1*: hash of switches
# - return val:  success status (boolean)
#
# DESCRIPTION
# - for description of object switches see above
#
sub AddSwitch {
  my ($this,%oopt) = @_;

  # loop over switches
  my $bErr=0;
  while (my($key,$val) = each(%oopt)) {
    if (0) {}

    #options that we just have to enter
    else {
      if (defined $val) { $this->{switch}{$key} = $val; }
      else       { delete $this->{switch}{$key}; }
    }
  }

  # return success status
  return !$bErr;
}


# return copy of object
#
# INTERFACE
# - return val: object reference (copy)
#
sub Clone {
  my ($this) = @_;
  # save against referenced objects
  my $r; if (exists($this->{def}{revcompl})){ $r=$this->{def}{revcompl} }
  # deep copy
  my $pCopy = &DataClone({ %$this }) or return undef;
  bless $pCopy;
  # tidy up
  if (defined($r)){ $pCopy->{def}{revcompl}=$r }
  delete $pCopy->{TmpPath};
  return $pCopy;
}


################################################################################
# motif definition syntax
################################################################################


# extended plain style motifs and regular expressions
#
# motif in extended plain style
# - a search motif is defined in the style of bracketing alternatives
#   additionally delimited by a slash.
# - example: A(C/G)TT(A/C/T)GT
#
# motif in IUPAC style
# - This syntax is defined by the IUPAC for degenerate nucleotide encoding
#   (wobble bases), cmp. $SyntaxTranslWobble{Iupac2Bases}.
# - example: ASTTHGT
# - find IUPAC definitions in %SeqLab::SeqBench::SyntaxTranslWobble
#
# motif in RegExp style
# - ...
# - example: A[CG]TT[ACT]GT
#


# work out array of sequences from IUPAC-style motif
#
# INTERFACE
# - argument 1: nucleotide motif string in IUPAC code
#
# - options:
#   -strands    1  single-strand forward instances only
#               0  calculate instances for both strands (default)
#              -1  single-strand reverse instances only
#
# - return val: array of sequence strings
#
# DESCRIPTION
# - for a given IUPAC-style motif string all explicit instances
#   are calculated. By default, reverse-complement instances of
#   motif are NOT calculated.
#
# DEBUG, CHANGES, ADDITIONS
# - implementation of the strandedness concept is incomplete, -strands=2
#   is not supported
#
sub MtfToSeqarr {
  my ($sMtf,%opt) = @_;
  my (@append, $CtBase, $CtAppend);

  # loop over motif letters and expand to possible instances
  my @SeqFwd = ('');
  foreach my $letter (split(//,$sMtf)) {
    $CtBase = int @SeqFwd;
    @append = @{$SyntaxTranslWobble{Iupac2Bases}{$letter}};
    $CtAppend = int(@append) || 1;
    @SeqFwd = (@SeqFwd) x $CtAppend;
    for (my $i=0; $i<$CtAppend; $i++) {
      for (my $j=0; $j<$CtBase; $j++) {
        $SeqFwd[$i*$CtBase+$j] .= $append[$i];
      }
    }
  }
  if (int(@SeqFwd)==1 and $SeqFwd[0] eq '') { @SeqFwd=() }

  my @SeqFin;
  # sample forward instances
  if ($opt{-strands} >= 0) {
    push @SeqFin, @SeqFwd;
  }
  # sample reverse-complement instances
  if ($opt{-strands} <= 0) {
    foreach (@SeqFwd) {
      push @SeqFin, &SeqStrRevcompl($_);
    }
  }

  # reduce redundancy arising from symmetrical motifs
  if ($opt{-strands} == 0) {
    my %SeqIdx = map { ($_=>1) } @SeqFin;
    @SeqFin = keys %SeqIdx;
  }

  return @SeqFin;
}


################################################################################
# data management
################################################################################


# read/write motif definition
#
# INTERFACE
# - argument 1*: motif definition string
# - options:
#   -DefType     category of requested/supplied motif definition
#                expanded   expanded syntax using standard symbols and
#                           brackets/slashes for wobble positions
#                IUPAC      (default)
#                regexp     regular expression
# - return val:  motif definition string
#                always IUPAC format in write mode
#
sub Def {
  my ($this,$ArgMtf,%opt) = @_;
  my %lopt = $this->_LocalSwitch(%opt);
  my $debug = $lopt{-debug};

  # set motif defintion
  if ($ArgMtf) {
    if (!$opt{-DefType}) {
      return $this->def_parse($ArgMtf);
    }
    # motif definition category "IUPAC"
    elsif ($opt{-DefType} eq 'IUPAC') {
      $this->{def}{motif} = uc($ArgMtf);
    }
    # motif definition category "expanded"
    elsif ($opt{-DefType} eq 'expanded') {
      my $MtfIupac = uc($ArgMtf);
      my $pTr = $SyntaxTranslWobble{Bases2Iupac};
      while ($MtfIupac=~m/\((.+)\)/) {
        $MtfIupac = $` . $$pTr{join('',sort(split("/",$1)))} . $';
      }
      if ($MtfIupac =~ m/[\(\)\[\]]/) {
        printf STDERR "%s. ERROR: unable to (fully) translate expanded->IUPAC: %s -> %s\n", &MySub,
          $ArgMtf, $MtfIupac;
        return undef;
      }
      return $this->{def}{motif} = $MtfIupac;
    }
    # motif definition category "regexp"
    elsif ($opt{-DefType} eq 'regexp') {
      my $MtfIupac = uc($ArgMtf);
      my $pTr = $SyntaxTranslWobble{Bases2Iupac};
      while ($MtfIupac=~m/\[(.+)\]/) {
        $MtfIupac = $` . $$pTr{join('',sort(split("",$1)))} . $';
      }
      if ($MtfIupac =~ m/[\(\)\[\]]/) {
        printf STDERR "%s. ERROR: unable to (fully) translate expanded->IUPAC: %s -> %s\n", &MySub,
          $ArgMtf, $MtfIupac;
        return undef;
      }
      return $this->{def}{motif} = $MtfIupac;
    }
    # unknown definition category
    else {
      die sprintf "%s. ERROR: unknown definition category \"%s\"\n", &MySub, $opt{-DefType};
    }
  }

  # return motif defintion
  $debug and printf STDERR "%s. internal value (IUPAC syntax): \"%s\"\n", &MySub,
    $this->{def}{motif};
  if ($ArgMtf) { $opt{-DefType}=undef }
  if (0) { }
  # motif definition category "IUPAC"
  elsif (!$opt{-DefType} or $opt{-DefType} eq 'IUPAC') {
    return $this->{def}{motif};
  }
  # motif definition category "expanded"
  elsif ($opt{-DefType} eq 'expanded') {
    my $MtfIupac = uc($this->{def}{motif});
    my @MtfExpand;
    while ($MtfIupac =~ m/\w/g) {
      my $paBase = $SyntaxTranslWobble{Iupac2Bases}{$&};
      my $bBMulti = int(int(@$paBase)>1);
      push @MtfExpand, $bBMulti? '(':(), join('/',@$paBase), $bBMulti? ')':();
    }
    return join('',@MtfExpand);
  }
  # motif definition category "regexp"
  elsif ($opt{-DefType} eq 'regexp') {
    my $MtfIupac = uc($this->{def}{motif});
    my @MtfReSense;
    while ($MtfIupac =~ m/\w/g) {
      my $paBase = $SyntaxTranslWobble{Iupac2Bases}{$&};
      my $bBMulti = int(int(@$paBase)>1);
      push @MtfReSense, $bBMulti? '[':(), @$paBase, $bBMulti? ']':();
    }
    return join('',@MtfReSense);
  }
  # unknown definition category
  else {
    die sprintf "%s. ERROR: unknown definition category \"%s\"\n", &MySub, $opt{-DefType};
  }
  return $this->{def}{motif};
}


# parse motif definition
#
# INTERFACE
# - argument 1: motif definition string
# - return val: motif definition in IUPAC format
#
# DESCRIPTION
# - This method represents an abstraction of Def(), not requiring prior knowledge
#   of the motif argument.
#
sub def_parse {
  my ($this,$ArgMtf) = @_;
  my %lopt = $this->_LocalSwitch();
  my $debug = $lopt{-debug};
  if (!$ArgMtf) { return undef }
  $ArgMtf =~ s/[\s\n\r\d_',\?\|]//g;  # delete white spaces, digits, other stuff
  if (!$ArgMtf) { return undef }
  $ArgMtf =~ tr/a-z/A-Z/;             # upper case
  $debug and printf STDERR "%s. raw argument: \"%s\"\n", &MySub, $ArgMtf;

  if (0) { }
  # motif definition category "expanded"
  elsif ($ArgMtf =~ m/[(\/]/) {
    if ($ArgMtf =~ m/[\[\]]/) {
      die sprintf "%s. ERROR: mixture of hyphenation syntaxes in motif definition \"%s\"\n", &MySub, $ArgMtf;
    }
    return $this->Def($ArgMtf,-DefType=>'expanded');
  }
  # motif definition category "regexp"
  elsif ($ArgMtf =~ m/[\[\]]/) {
    return $this->Def($ArgMtf,-DefType=>'regexp');
  }
  # motif definition category "IUPAC"
  else {
    return $this->Def($ArgMtf,-DefType=>'IUPAC');
  }
}


# read defintion type
#
sub DefType { return (split('::',__PACKAGE__))[-1] }


# sequence type of motif definition
#
# INTERFACE
# - return val: sequence type
#
sub DefSeqType { return 'nucleotide' }


# defintion identifier
#
# INTERFACE
# - argument 1*: motif identifier (write mode)
# - return val:  motif identifier
#
sub ID {
  my ($this, $arg) = @_;
  if (defined $arg) { $this->{def}{id}=$arg }
  # return value
  return $this->{def}{id};
}


# expectancy score
#
# INTERFACE
# - return val: expectancy score
# - options:
#   ...         all of &MtfProbab
#
sub ScoreExpect {
  my ($this) = @_;
  $this->{ScoreExpect} ||= MtfProbab ($this->{def}{motif}, %{$this->{switch}});
  return $this->{ScoreExpect};
}


# degree of symmetry of a IUPAC-style motif
#
# INTERFACE
# - argument 1: nucleotide motif string in IUPAC code
# - return val: symmetry value
#
# DESCRIPTION
# - for a given IUPAC-style motif string all explicit instances are calculated.
#   By comparison of forward with reverse-complement instances the redundancy
#   caused by motif symmetry is calculated.
# - The symmetry measure varies within [0..1], 0 meaning no symmetry, 1 meaning
#   perfect symmetry. The calculation in detail:
#
#     f  := number of forward instances
#     a  := number of combined, unique forward and reverse instances
#     sn := number of symmetrical instances
#     sr := fraction of symmetrical instances in relation to forward instances
#
#        sn = 2 f - a         a within [f .. 2f]
#                          => sn within [0 .. f]
#        sr = sn / f
#        sr = (2 f - a) / f
#     => sr = 2 - a / f
#
sub MtfSymmetry {
  my ($sMtf,%opt) = @_;

  # get possible instances
  my @seq = &MtfToSeqarr ($sMtf, -strands=>1);
  unless (@seq) { die sprintf "%s. ERROR: null-length motif\n", &MySub }
  my @seqall;
  foreach (@seq) {
    push @seqall, $_, &SeqStrRevcompl($_);
  }

  # redundancy, symmetry
  my %idxSeq = map { ($_=>1) } @seqall;
  my @seqauniq = keys %idxSeq;
  my $fSymmetry = 2 - int(@seqauniq) / int(@seq);
  return $fSymmetry;
}


# calculate probability for given IUPAC-style motif
#
# INTERFACE
# - argument 1: nucleotide motif string in IUPAC code
#
# - options:
#   -debug      [STD]
#   -freq       supply relative frequency values for bases A,C,G,T
#               (hash reference)
#   -strands    -1,1  single-strand instances only
#               0     calculate instances for both strands (default)
#
# - return val: - motif probability
#               - undef if an error occurs
#
# DESCRIPTION
# - The calculated probability refers to the motif occurrence in a
#   circular or non-ending, single-stranded sequence (cmp. option -strands).
#
# DEBUG, CHANGES, ADDITIONS
# - implementation of the strandedness concept is incomplete, -strands=2
#   is not supported
# - replace call of &MtfToSeqarr by minimally required calculation, because
#   that function call may be very expensive in time.
#
sub MtfProbab {

  # function constants
  my %BaseProbab = ( A=>0.25, C=>0.25, G=>0.25, T=>0.25 );

  # function parameters
  my ($sMtf,%opt) = @_;
  my $debug = $opt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  $sMtf = &SeqStrPure ($sMtf, -SeqType=>'DNA', -upper=>1) or return undef;
  $sMtf =~ s/N//g;  # especially &MotifToSeqarr works much faster this way
  my $MtfProbab;

  # finish probability look-up table (including wobbles)
  if ($opt{-freq}) {
    %BaseProbab = %{$opt{-freq}};
    $debug and printf STDERR "%s. got rel. frequencies: %s\n", &MySub,
      join (', ', map { "$_=$BaseProbab{$_}" } sort keys %BaseProbab);
  }
  foreach my $CodeWobble (map { $_->[0] } grep { length($_->[1])>1 } @{$SyntaxTranslWobble{'Iupac-Bases'}}) {
    $BaseProbab{$CodeWobble} = &Sum (
      map { $BaseProbab{$_} }
      @{$SyntaxTranslWobble{Iupac2Bases}{$CodeWobble}}
      );
  }
  $debug and printf STDERR "%s. table of rel. base frequencies: %s\n", &MySub,
    join (', ', map { "$_=$BaseProbab{$_}" } sort keys %BaseProbab);

  # calculate combinatorical product
  $MtfProbab = 1;
  foreach (split (//, $sMtf)) {
    $MtfProbab *= $BaseProbab{$_};
  }

  # regard symmetry for double-stranded sequence
  # - for partial symmetry of the motif this correction is just an approximation
  #   in case of a non-homogenous base frequency
  # - CAUTION: effective null-length motifs will cause an error in &MtfSymmetry
  if (($opt{-strands}||0)==0 and length($sMtf)) {
    $MtfProbab *= (2 - &MtfSymmetry($sMtf));
  }

  return $MtfProbab;
}


# score threshold
#
# INTERFACE
# - argument 1*: score threshold (write mode)
# - return val:  score threshold
#
# DESCRIPTION
# - The only threshold scaling that makes sense is an expectancy value.
#
# DEBUG, CHANGES, ADDITIONS
# - It's still unclear how to realise an abstract threshold concept via
#   SeqLab::MotifLib.
#
sub Thresh {
  my ($this, $arg) = @_;

  # set value
  if (defined $arg) {
    $this->{def}{thresh} = $arg;
  }

  # return value
  return $this->{def}{thresh};
}


# check validity of motif definition
#
# INTERFACE
# - return val:  validity (boolean)
#
# DEBUG, CHANGES, ADDITIONS
# - currently, this is a fake implementation
#
sub Valid {
  my ($this) = @_;
  return 1;
}


# width of motif definition
#
# INTERFACE
# - argument 1*: motif width (write mode)
# - return val:  motif width
#
sub Width {
  my ($this,$arg) = @_;

  # set value, change definition
  if (defined $arg) {
    $this->{def}{motif} = substr ($this->{def}{motif}, 0, &Min($arg,length($this->{def}{motif})) );
    delete $this->{def}{re};
    delete $this->{def}{revcompl};
  }

  # return value
  return length($this->{def}{motif});
}


# reverse-complement motif object
#
# INTERFACE
# - return val: reverse-complement motif object
#
sub revcompl {
  my ($this) = @_;
  if (defined($this->{def}{orient}) and $this->{def}{orient}==0){ return $this }
  my $r = $this->{def}{revcompl} || do{
    my $rc = &SeqStrRevcompl($this->{def}{motif});
    if ($rc eq $this->{def}{motif}){
      $this;
    } else {
      my $pMtf = $this->Clone();
      delete $pMtf->{def}{re};
      $pMtf->{def}{orient} *= -1;
      $pMtf->{def}{motif} = &SeqStrRevcompl($this->{def}{motif});
      $pMtf;
    }
  };
  return $this->{def}{revcompl} ||= $r;
}



################################################################################
# search
################################################################################


# identify and locate instances of search pattern
#
# INTERFACE
# - argument 1: query sequence string
#
# - options:
#   -best       return n best scoring hit sites only
#   -debug      [STD]
#   -HitSurrd   lengths of stored surrounding sequences, default: 0
#               This takes effect in &SearchStrand
#   -isPure     no need to purify sequence string
#   -strands    which strand to search
#               0   both strands (default)
#               -1  minus strand only
#               1   plus strand only
#
# - return val: - reference to result data structure
#               - undef if an error occurred
#
# DESCRIPTION
# - the query sequence string will be purified prior to search
#
sub Search {
  my ($this,$sSeqQuery,%opt) = @_;
  my %lopt = $this->_LocalSwitch(%opt);
  my $debug = $lopt{-debug};
  my $strands = $opt{-strands} || 0;

  # work out search sequence
  unless ($lopt{isPure}) {
    $sSeqQuery = &SeqStrPure ($sSeqQuery, -upper=>1);
  }
  unless ($sSeqQuery) { return undef }

  my @MtfHit;
  # locate positively oriented instances
  if ($strands>=0) {
    push @MtfHit, @{ $this->SearchStrand ($sSeqQuery, -strand=>+1, %opt) || [] };
    $debug and printf STDERR "%s. motif %s, seq #%d, strand +1: %d hit%s\n", &MySub,
      $this->ID(), $this->{SeqScanned}||0, int(@MtfHit), (int(@MtfHit)==1)?'':'s';
  }
  # locate negatively oriented instances
  if ($strands<=0) {
    push @MtfHit, @{ $this->SearchStrand ($sSeqQuery, -strand=>-1, %opt) || [] };
    $debug and printf STDERR "%s. motif %s, seq #%d, strand -1 and previous: %d hit%s\n", &MySub,
      $this->ID(), $this->{SeqScanned}||0, int(@MtfHit), (int(@MtfHit)==1)?'':'s';
  }
  # ommitting search for strand-symmetric motifs spares time <2% with restric.mtf

  # combine matches occurring in both orientations, mode $strands==0
  if ($strands==0) {
    @MtfHit = sort { $a->{offset}<=>$b->{offset} } @MtfHit;
    for (my $i=0; $i<int(@MtfHit); ++$i) {
      for (my $j=$i+1; $j<int(@MtfHit) and $MtfHit[$i]{offset}==$MtfHit[$j]{offset}; ++$j) {
        $MtfHit[$i]{orient} = 0;
        splice (@MtfHit, $j, 1);
        --$j;
      }
    }
  }

  # select best scoring hits
  if ($lopt{-best}) {
    # *** implement me ***
  }

  # sort complete list of matches, return result
  $this->{SeqScanned} ++;
  $this->{SeqHits} += int(@MtfHit);
  return \@MtfHit;
}


# identify and locate instances of search pattern in one strand
#
# INTERFACE
# - argument 1: sequence
#
# - options:
#   -debug      [STD]
#   -HitSurrd   lengths of stored surrounding sequences, default: 0
#   -strand     which sequence strand to search, default: +1
#
# - return val: - reference to instance data structure
#               - undef if an error occurred
#
# DESCRIPTION
# - match offset value is stated in biological counting logics
# ? match offset value refers to the beginning of the match in the
#   sense-directed template sequence. The offset of the motif may be at
#   the end of the match if the motif matches as an inverted instance.
#
sub SearchStrand {
  my ($this,$sSeqQuery,%opt) = @_;
  my %lopt = $this->_LocalSwitch(%opt);
  my $debug = $lopt{-debug};
  my $strand = $lopt{-strand} || 1;
  my $HitSurrd = $lopt{-HitSurrd};
  my $reMtf = ($strand==1) ?
    $this->Def(undef,-DefType=>'regexp') : $this->revcompl->Def(undef,-DefType=>'regexp');
  $debug and printf STDERR "%s. motif %s, strand %+d, regexp %s\n", &MySub,
    $this->ID(), $strand, $reMtf;

  # exhaustive search for motif matches
  my @hit;
  while ($sSeqQuery =~ m/$reMtf/gi) {
    my $PosCurr = length($`) + 1;
    pos($sSeqQuery) = $PosCurr;
    my %HitInst = (
      MotifID  => $this->ID(),
      orient   => abs($this->{def}{orient})*$strand,
      offset   => $PosCurr,
      length   => $this->Width(),
      instance => $&,
      ScoreExp => $this->ScoreExpect(),
      );
    if ($HitSurrd) {
      $HitInst{ante} = substr ($`, -&Min($HitSurrd,length($`)), &Min($HitSurrd,length($`)));
      $HitInst{post} = substr ($', 0, $HitSurrd);
    }
    push @hit, { %HitInst };
  }

  # tidy up, return hits
  return \@hit;
}


1;
# $Id: MotifIUPAC.pm,v 1.20 2018/06/05 18:02:56 szafrans Exp $
