################################################################################
#
#  kPerl Dictyostelium ReadWatch Concept
#  Library for Management of a Set of Clone Libraries
#
#  copyright (c)
#  Karol Szafranski on behalf of IMB Jena, Genome Analysis, 2000,2004
#  szafrans@imb-jena.de
#
################################################################################
#
#  DESCRIPTION
#
# - functions beginning with a letter are exported, those with a leading
#   underscore are not. Those are meant for internal use only. Variables with
#   a leading underscore are declared with "my", therefore inaccessible from
#   outside the package.
#
# - individual description of functions at the beginning of the code blocks
#
################################################################################
#
#  FUNCTIONS, DATA
#
#   @ISA
#   @EXPORT
#   %LibGlob  (not exported, but globally accessible)
#   %_LibParam
#
# - clone length
#   $_LibParam{CloneLen}
#   $LibGlob{CloneLen}
#   &CloneLenRc
#   &CloneLenRcFile
#   &CloneLenCutoff
#   &CloneLenEstim
#
# - target specificity
#   $_LibParam{TgtSpecif}
#   $LibGlob{TgtSpecif}
#   &TgtspecifRcPath
#   &TgtspecifRc
#   &TgtspecifLibgrpRegard
#   &TgtspecifLibgrpSelector
#   &TgtspecifLibgrpNum
#   &_TgtspecifLibgrpNumCalc
#   &TgtspecifLibgrpNumEval
#   &TgtspecifLibgrpSpecif
#   &TgtspecifTgtRegard
#   &TgtspecifTgtNum
#   &TgtspecifTgtExpectLib
#   &TgtspecifTgtProbab
#
#
#  STD OPTIONS
#
#   -debug      print debug protocol to STDERR
#   -rc         path of rc file, default: $_LibParam{*}{default}{PathRc}
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - It would be a good idea to turn this code into OO libraries
#   "LibraryCloneLen" and "LibraryTgtSpecif".
#
# - look also for notes in the header of each function block
#
################################################################################

package ReadWatch::Library;

# includes
#use strict; use warnings;  # worked 20040816
use MainLib::Data;
use MainLib::Path qw (%CorePath &PathExpand);
use MainLib::File;
use MainLib::Misc;
use Math::Calc;

# symbol export
our @ISA;
use Exporter;
push @ISA, qw(Exporter);
our @EXPORT = qw (
  &CloneLenRc &CloneLenRcFile &CloneLenCutoff &CloneLenEstim
  &TgtspecifRcPath &TgtspecifRc &TgtspecifLibgrpRegard &TgtspecifLibgrpSelector
    &TgtspecifLibgrpNum &TgtspecifLibgrpNumEval &TgtspecifLibgrpSpecif
    &TgtspecifTgtRegard &TgtspecifTgtNum 
    &TgtspecifTgtExpectLib &TgtspecifTgtProbab
  );

# package-wide constants and variables
our (%LibGlob);
my %_LibParam;


################################################################################
# clone length
################################################################################

# parameters
$_LibParam{CloneLen} = {
  default => {
    # master defaults
    # this will be inserted into rc data if not present
    data   => {
       CutoffSelect => 5300,
       CutoffValid => 5300,
       GaussParam  => { mean=>1500, s =>700, var =>700**2,
                                    sn=>700, varn=>700**2, },
       },
    PathRc => ($CorePath{ReadWatch}{CloneLenRc}=~m/^(~\w*)?\//) ?
      &PathExpand($CorePath{ReadWatch}{CloneLenRc}) :
      $CorePath{ReadWatch}{home} .'/'. $CorePath{ReadWatch}{CloneLenRc},
    },
  DftDispMax => 7000,
  PathRc => undef,
  data   => undef,  # rc data loaded from *.rc file
  };

$LibGlob{CloneLen}{default} = $_LibParam{CloneLen}{default};

$LibGlob{CloneLen}{WarnFileChg} = 0;
  # library's switch to enable verbose warning in &CloneLenRc


# load and return clone length resource data
#
# INTERFACE
# - options:
#   -copy       return copy of data structure
#   -debug      [STD]
#   -rc         [STD]
#
# - return val: - reference(!) to clone length resource data
#               - undef if an error occurred
#
sub CloneLenRc {
  my (%opt) = @_;
  my ($debug);
  my ($PathRc, $pRc);

  # function parameters
  $debug = $opt{-debug};
  $PathRc = $opt{-rc} || $_LibParam{CloneLen}{default}{PathRc};
  $PathRc or return undef;

  # requested rc data present
  # - possibly, warn on change of *.rc file
  if ($LibGlob{CloneLen}{WarnFileChg} and $_LibParam{CloneLen}{PathRc}
    and $PathRc ne $_LibParam{CloneLen}{PathRc}) {
    printf STDERR "%s. WARNING: changing *.rc file:\n  %s -> %s\n", &MySub,
      $_LibParam{CloneLen}{PathRc}, $PathRc;
  }
  if ($_LibParam{CloneLen}{data}
    and $PathRc eq $_LibParam{CloneLen}{PathRc}) {
    $debug and printf STDERR "%s. returning internal data, file %s\n", &MySub, $PathRc||"''";
    $pRc = $_LibParam{CloneLen}{data};
  }
  else {

    # read clone length rc file into $_LibParam{CloneLen}{data}
    unless ($pRc = $_LibParam{CloneLen}{data} = &DataRead($PathRc)) {
      printf STDERR "%s. ERROR: unable to load clone length rc file %s\n", &MySub, $PathRc||"''";
      return undef;
    }
    $_LibParam{CloneLen}{PathRc} = $PathRc;
    # ensure, there's a default library entry
    $$pRc{library}{default} ||= &DataClone($_LibParam{CloneLen}{default}{data});
    # delete some annotations
    delete $$pRc{'#'};
    delete $$pRc{library}{'#'};
    delete $$pRc{DataSource}{'#'};

    # update syntax of definition for displayed range of observed lengths
    if (! map {@{$_||[]}} &DataTreeSlc($$pRc{LibraryGroup},[[0,'all'],['DispRangeLen']])) {
      foreach my $pLibGrp (@{$$pRc{LibraryGroup}}) {
        if (exists $$pLibGrp{DataRangeLen}) {
          $$pLibGrp{DispRangeLen} = $$pLibGrp{DataRangeLen};
          delete $$pLibGrp{DataRangeLen};
        } else {
          $$pLibGrp{DispRangeLen} = [0, $_LibParam{CloneLen}{DftDispMax}];
        }
      }
    }
  }

  # exit SUB successfully
  $debug and printf STDERR "%s. %d library entries in clone length rc file %s\n", &MySub,
    int(keys %{$$pRc{library}}), $PathRc;
  if ($opt{-copy}) { $pRc = &DataClone($pRc); }
  return $pRc;
}


# current path of clone length resource data
#
# INTERFACE
# - return val: path of clone length resource data
#
sub CloneLenRcFile {
  return $_LibParam{CloneLen}{PathRc}
    || $_LibParam{CloneLen}{default}{PathRc};
}


# return clone length cutoff parameter for library
#
# INTERFACE
# - argument 1: clone library identifier
#
# - options:
#   -debug      [STD]
#   -default    use library entry "default" if requested entry ($LibArg)
#               does not exist
#   -rc         [STD]
#
# - return val: - length cutoff parameter value
#               - undef if an error occurred
#
sub CloneLenCutoff {
  my ($LibArg, %opt) = @_;
  my ($debug);
  my ($pRc, $LibEff);

  # function parameters
  $debug = $opt{-debug};
  $pRc = &CloneLenRc(%opt) or return undef;

  # look up library entry
  if (exists ($$pRc{library}{$LibArg})) {
    $LibEff = $LibArg;
  } elsif ($opt{-default}) {
    $LibEff = 'default';
  } else {
    return undef;
  }

  # return data
  return $$pRc{library}{$LibEff}{CutoffValid};
}


# return clone length parameters for library
#
# INTERFACE
# - argument 1: clone library identifier
#
# - options:
#   -copy       copy returned data structure
#   -debug      [STD]
#   -default    use library entry "default" if requested entry ($LibArg)
#               does not exist
#   -rc         [STD]
#
# - return val: - reference to hash of langth parameter fields
#               - undef if an error occurred
#
sub CloneLenEstim {
  my ($LibArg, %opt) = @_;
  my ($debug);
  my ($pRc, $LibEff, $pData);

  # function parameters
  $debug = $opt{-debug};
  $pRc = &CloneLenRc(%opt,-copy=>$opt{-copy}) or return undef;

  # look up library entry
  if (exists ($$pRc{library}{$LibArg})) {
    $LibEff = $LibArg;
  } elsif ($opt{-default}) {
    $LibEff = 'default';
  } else {
    return undef;
  }

  # return data
  $pData = $$pRc{library}{$LibEff}{GaussParam};
  $debug and printf STDERR "%s. library %s, mean clone length %d\n", &MySub,
    "$LibArg/$LibEff", $$pData{mean};
  return $pData;
}


################################################################################
# target specificity
################################################################################

# parameters
$_LibParam{TgtSpecif} = {
  default => {
    PathRc => $CorePath{ReadWatch}{home} .'/'. $CorePath{ReadWatch}{TgtSpecifRc},
    },
  PathRc => undef,
  data   => undef,
  ValidParam => {
    LibgrpNum      => { map{ ($_=>1) }qw(Clone Nt Read) },
    LibgrpNumEval  => { map{ ($_=>1) }qw(Clone    Read) },
    LibgrpSelector => { map{ ($_=>1) }qw(Clone    Read) },
    LibgrpSpecif   => { map{ ($_=>1) }qw(Clone    Read) },
    TgtNum         => { map{ ($_=>1) }qw(      Nt     ) },
    TgtExpectLib   => { map{ ($_=>1) }qw(Clone Nt Read) },
    TgtProbab      => { map{ ($_=>1) }qw(      Nt Read) },
    },
  };
# processed data under $_LibParam{TgtSpecif}{data}:
# TgtExpect     data tree of pre-calculated values from &TgtspecifTgtExpectLib.
#               Data is organized in the fashion
#               {TgtExpect}{$target}{$lib}{$CountCase}
$LibGlob{TgtSpecif}{default} = $_LibParam{TgtSpecif}{default};


# return path of current target specificity resource file
#
# INTERFACE
# - return val: path of target specificity rc file
#
sub TgtspecifRcPath {
  my (%opt) = @_;
  return $_LibParam{TgtSpecif}{PathRc};
}


# load target specificity resource
#
# INTERFACE
# - options:
#   -debug      [STD]
#   -force      force reload of rc file even if data is already present.
#               This usually applies if data is (re)calculated and the
#               data set is meant to be saved back to file without contained
#               extrapolated data fields.
#   -rc         [STD]
#
# - return val: - reference to rc data
#               - undef if an error occurred
#
sub TgtspecifRc {
  my (%opt) = @_;
  my ($debug);
  my ($PathRc);

  # function parameters
  $debug = $opt{-debug};
  $PathRc = $opt{-rc} || $_LibParam{TgtSpecif}{default}{PathRc};

  # rc file data present
  if ($_LibParam{TgtSpecif}{data} and
      $PathRc eq $_LibParam{TgtSpecif}{PathRc} and
      ! $opt{-force}
  ) {
    return $_LibParam{TgtSpecif}{data};
  }

  # read target specificity rc file
  unless (%{ $_LibParam{TgtSpecif}{data} = &DataRead ($PathRc) || {} }) {
    $debug and printf STDERR "%s. ERROR: unable to load target specificity rc file %s\n", &MySub, $PathRc||"''";
    return undef;
  }
  $debug and printf STDERR "%s. loaded target specificity rc file %s\n", &MySub, $PathRc||"''";
  $_LibParam{TgtSpecif}{PathRc} = $PathRc;

  # exit SUB successfully
  return $_LibParam{TgtSpecif}{data};
}


# array of libraries to regard in process context
#
# INTERFACE
# - argument 1: context identifier: see rc file
#
# - options:
#   -debug      [STD]
#   -rc         [STD]
#
# - return val: array of libraries
#
sub TgtspecifLibgrpRegard {
  my ($ContextID, %opt) = @_;
  my ($debug, $pRc);

  # function parameters
  $debug = $opt{-debug};
  $pRc = &TgtspecifRc (%opt) or return ();
  unless (exists $$pRc{LibGroupRegard} and
          exists $$pRc{LibGroupRegard}{$ContextID}) {
    $debug and printf STDERR "%s. ERROR: there's no context ID '%s'\n", &MySub, $ContextID||"''";
    return ();
  }

  # return array reference
  return @{ $$pRc{LibGroupRegard}{$ContextID} };
}


# library's read or clone ID selector
#
# INTERFACE
# - argument 1: library group identifier
# - argument 2: ID selector type: Read, Clone
#
# - options:
#   -debug      [STD]
#   -rc         [STD]
#
# - return val: - selector
#               - undef if an error occurred
#
sub TgtspecifLibgrpSelector {
  my ($lib, $CountCase, %opt) = @_;
  my ($debug, $pRc);

  # function parameters
  $debug = $opt{-debug};
  $pRc = &TgtspecifRc (%opt) or return undef;
  unless (exists $$pRc{LibGroup}{$lib}) {
    $debug and printf STDERR "%s. ERROR: there's no library group %s\n", &MySub, $lib||"''";
    return undef;
  }
  $_LibParam{TgtSpecif}{ValidParam}{LibgrpSelector}{$CountCase} or return undef;

  # calculate expected read number
  return $$pRc{LibGroup}{$lib}{$CountCase.'SelectID'};
}


# return library's read, nt, or clone count
#
# INTERFACE
# - argument 1: library group identifier
# - argument 2: count identifier:
#               Read, Nt, Clone
#
# - options:
#   -debug      [STD]
#   -rc         [STD]
#   -update     use value instead of $RcParam{$lib}{StatTimeOutdate}
#               to decide whether the statistics have to be updated
#
# - return val: - count value
#               - undef if an error occurred
#
sub TgtspecifLibgrpNum {
  my ($lib, $CountCase, %opt) = @_;
  my ($debug, $pRc);
  my ($pLibGrp, $TimeOutdate, $TimeData);

  # function parameters
  $debug = $opt{-debug} = 1;
  $pRc = &TgtspecifRc (%opt) or return undef;
  unless (exists $$pRc{LibGroup}{$lib}) {
    $debug and printf STDERR "%s. ERROR: there's no library group %s\n", &MySub, $lib||"''";
    return undef;
  }
  $_LibParam{TgtSpecif}{ValidParam}{LibgrpNum}{$CountCase} or return undef;

  # resolve library group data sub-structure
  $pLibGrp = $_LibParam{TgtSpecif}{data}{LibGroup}{$lib};

  # update counts
  $TimeOutdate = $opt{-update} || $$pLibGrp{NumTimeOutdate};
  $TimeData = $$pLibGrp{NumTime};
  if ($TimeData + $TimeOutdate < time) {
    $debug and printf STDERR "%s. (re)calculating counts for library group %s\n", &MySub, $lib||"''";
    &_TgtspecifLibgrpNumCalc ($lib, %opt);
  }

  # calculate expected read number
  return $$pLibGrp{'Num'.$CountCase};
}


# calculate library's read, nt, and clone count
#
# INTERFACE
# - argument 1: library group identifier
#
# - options:
#   -debug      [STD]
#   -rc         [STD]
#
# - return val: success status (boolean)
#
sub _TgtspecifLibgrpNumCalc {
  my ($lib, %opt) = @_;
  my ($debug, $pRc);
  my ($pLibGrp, $SlcID, $call, %CloneID);
  my ($buffer, $CountCase, $key);

  # function parameters
  $debug = $opt{-debug};
  $pRc = &TgtspecifRc (%opt, -force=>1) or return undef;
  unless (exists $$pRc{LibGroup}{$lib}) {
    $debug and printf STDERR "%s. ERROR: there's no library group %s\n", &MySub, $lib||"''";
    return undef;
  }

  # resolve library group data sub-structure
  $pLibGrp = $$pRc{LibGroup}{$lib};
  unless (exists $$pLibGrp{NumSource}) {
    $debug and printf STDERR "%s. ERROR: there's no sequence reference for library group %s\n", &MySub, $lib||"''";
    return undef;
  }

  # independent nts / reads
  # - from sequence source file
  $SlcID = &TgtspecifLibgrpSelector ($lib, 'Read', %opt);
  $call = "$CorePath{call}{SeqStat} '-SlcID=$SlcID' $$pLibGrp{NumSource}";
  $buffer = &ReadFile ("$call |");
  $buffer =~ m/sums: ([0-9,]+) entries \(([0-9,]+) nucleotides\)/;
  $$pLibGrp{NumRead} = $1;
  $$pLibGrp{NumNt}   = $2;
  foreach $key ('NumRead', 'NumNt') { $$pLibGrp{$key} =~ s/,//g }

  # independent clones
  # - IDs from sequence source file
  $SlcID = &TgtspecifLibgrpSelector ($lib, 'Clone', %opt);
  $call = "$CorePath{call}{SeqID} '-SlcID=$SlcID' $$pLibGrp{NumSource}";
  %CloneID = map { ($_=>1) } map { m/^(\w+)/ } &ReadFile("$call |");
  $$pLibGrp{NumClone} = int (keys %CloneID);

  # debug
  if ($debug) {
    foreach $CountCase (keys %{$_LibParam{TgtSpecif}{ValidParam}{LibgrpNum}}) {
      printf STDERR "%s. count updated - library group %s, count ID %s: %d\n", &MySub,
        $lib||"''", $CountCase||"''", $$pLibGrp{'Num'.$CountCase};
    }
  }

  # save back rc file, exit SUB
  if ($$pLibGrp{NumRead} and $$pLibGrp{NumNt} and $$pLibGrp{NumClone}) {
    $$pLibGrp{NumTime} = time();
    require FileHandle;
    my ($hOutRc);
    my $FileRc = $_LibParam{TgtSpecif}{PathRc};
    if ($hOutRc = FileHandle->new($FileRc,'w')) {
      $debug and printf STDERR "%s. saving rc data back to file %s\n", &MySub, $FileRc||"''";
    } else {
      printf STDERR "%s. ERROR: unable to write file %s\n", &MySub, $FileRc;
    }
    &DataPrint ($pRc, -handle=>$hOutRc, -NoAddr=>1);
  }
  return 1;
}


# evaluate library-specific read-counts from list of IDs
#
# INTERFACE
# - argument 1: reference to list of read identifiers
# - argument 2: count case identifier:
#               Clone Read
#
# - options:
#   -debug      [STD]
#   -rc         [STD]
#
# - return val: - reference to result hash
#               - undef if an error occurred
#
# DESCRIPTION
# - the list of reads provided in argument 1 usually refers to the
#   members of a contig or
#   sequence cluster. The functions aims to provide probabilities for
#   the target location of this contig/cluster.
#
sub TgtspecifLibgrpNumEval {
  my ($pRead, $CountCase, %opt) = @_;
  my ($debug, $pRc);
  my ($lib, %count);

  # function parameters
  $debug = $opt{-debug};
  $pRc = &TgtspecifRc (%opt) or return undef;
  $_LibParam{TgtSpecif}{ValidParam}{LibgrpNumEval}{$CountCase} or return undef;

  # loop over possible sources ('targets' in rc syntax)
  foreach $lib (&TgtspecifLibgrpRegard ('probab', %opt)) {
    my $regexp = &TgtspecifLibgrpSelector ($lib, $CountCase, %opt);
    $debug and printf STDERR "%s. regexp for library $lib is $regexp\n", &MySub;
    $count{$lib} = int (grep { m/$regexp/ } @$pRead);
  }

  # hash containing target probabilities
  $debug and &DataPrint (\%count, -handle=>\*STDERR);
  return \%count;
}


# library's rel. specificity for a target
#
# INTERFACE
# - argument 1: library group identifier
# - argument 2: - target or target group identifier        
#               - list of target or target group identifiers
# - argument 3: count case identifier:
#               Clone Nt Read
#
# - options:
#   -debug      [STD]
#   -rc         [STD]
#
# - return val: - rel. specificity value
#               - undef if an error occurred
#
sub TgtspecifLibgrpSpecif {
  my ($lib, $target, $CountCase, %opt) = @_;
  my ($debug, $pRc);
  my (@target, $pTgtDat, $pLibGrp);
  my ($RelNonspecif, $NumNtNonspecif);

  # function parameters
  $debug = $opt{-debug};
  $pRc = &TgtspecifRc (%opt) or return undef;
  unless (exists $$pRc{LibGroup}{$lib}) {
    $debug and printf STDERR "%s. ERROR: there's no library group %s\n", &MySub, $lib||"''";
    return undef;
  }
  $_LibParam{TgtSpecif}{ValidParam}{LibgrpSpecif}{$CountCase} or return undef;

  # resolve target group to targets
  unless (@target = &TgtspecifTgtRegard ($target, %opt)) {
    $debug and printf STDERR "%s. ERROR: unknown data format or value in function argument 'target'\n", &MySub;
    return undef;
  }
  if (@target > 1) {
    $debug and printf STDERR "%s. resolving target group '%s'\n", &MySub, $target;
    return &Sum ( map { &TgtspecifLibgrpSpecif ($lib, $_, $CountCase, %opt) }
           @{$$pRc{TgtGroup}{$target}{target}} );
  } else { $target = $target[0]; }
  $pTgtDat = $$pRc{target}{$target};
  # resolve library group data sub-structure
  $pLibGrp = $$pRc{LibGroup}{$lib};

  # calculate rel. specificity
  unless ($$pLibGrp{'Specific'.$CountCase}{$target}) {
    $NumNtNonspecif = &Sum (
      map { $_->{NumNt} }
      grep { ! exists $$pLibGrp{'Specific'.$CountCase}{$_->{id}} }
      values %{$$pRc{target}}
      );
    $debug and printf STDERR "%s. library group %s, non-specific nt amount: %d\n", &MySub, $lib||"''", $NumNtNonspecif;
    $RelNonspecif = 1 - &Sum (values %{$$pLibGrp{'Specific'.$CountCase}});
    $debug and printf STDERR "%s. library group %s, rel. non-specific: %.3f\n", &MySub, $lib||"''", $RelNonspecif;
    $$pLibGrp{'Specific'.$CountCase}{$target} =
      $RelNonspecif * $$pTgtDat{NumNt} / $NumNtNonspecif;
    $debug and printf STDERR "%s. library group %s, target %s, rel. specificity: %.3f\n", &MySub,
      $lib||"''", $target||"''", $$pLibGrp{'Specific'.$CountCase}{$target};
  }

  # calculate expected read number
  return $$pLibGrp{'Specific'.$CountCase}{$target};
}


# get list of valid sources/targets
#
# INTERFACE
# - argument 1: optional (may be blank):
#               - target or target group identifier
#               - list of target or target group identifiers
#               default is all known targets: keys %{$rc{target}}
#
# - options:
#   -rc         [STD]
#
# - return val: list of target specifiers
#
sub TgtspecifTgtRegard {
  my ($ArgTgt, %opt) = @_;
  my ($pRc);
  my (@target, @TgtNew, $ItTgt);

  # function parameters
  $pRc = &TgtspecifRc (%opt) or return undef;

  # turn target argument into list
  if ($ArgTgt) {
    if (! ref($ArgTgt)) {
      $ArgTgt = [ $ArgTgt ];
    }

    # build list of target specifiers
    foreach $ItTgt (@$ArgTgt) {
      if (exists $$pRc{TgtGroup}{$ItTgt}) {
        @TgtNew = @{$$pRc{TgtGroup}{$ItTgt}{target}};
      } elsif (exists $$pRc{target}{$ItTgt}) {
        @TgtNew = ($ItTgt);
      }
      push @target, @TgtNew;
    }

  # default target list (all targets)
  } else {
    @target = keys %{$$pRc{target}};
  }

  # list of targets
  return @target;
}


# target's nt count
#
# INTERFACE
# - argument 1: identifier of target or target group
# - argument 2: count case identifier:
#               Nt
#
# - options:
#   -debug      [STD]
#   -rc         [STD]
#
# - return val: - count value
#               - undef if an error occurred
#
# DEBUG, CHANGES, ADDITIONS
# - include maximum likelihood estimates for read / clone counts.
#   Chain to &TgtspecifTgtExpectLib.
#
sub TgtspecifTgtNum {
  my ($target, $CountCase, %opt) = @_;
  my ($debug, $pRc);

  # function parameters
  $debug = $opt{-debug};
  $pRc = &TgtspecifRc (%opt) or return undef;
  unless (exists $$pRc{target}{$target}) {
    if (exists $$pRc{TgtGroup}{$target}) {
      $debug and printf STDERR "%s. resolving target group '%s'\n", &MySub, $target;
      return &Sum ( map { &TgtspecifTgtNum ($_, $CountCase, %opt) }
             @{$$pRc{TgtGroup}{$target}{target}} );
    }
    $debug and printf STDERR "%s. ERROR: there's no target or target group '%s'\n", &MySub, $target;
    return undef;
  }
  $_LibParam{TgtSpecif}{ValidParam}{TgtNum}{$CountCase} or return undef;

  # update counts in xxx manually!
  # these are estimates coming out of independent methods

  # calculate expected read number
  $debug and printf STDERR "%s. %s's %s count: %d\n", &MySub,
    $target, lc $CountCase, $$pRc{target}{$target}{'Num'.$CountCase};
  return $$pRc{target}{$target}{'Num'.$CountCase};
}


# expected target hits for a library group
#
# INTERFACE
# - argument 1: target identifier
# - argument 2: library group identifier
# - argument 3: count case identifier:
#               Clone Nt Read
#
# - options:
#   -debug      [STD]
#   -rc         [STD]
#
# - return val: - maximum likelihood value
#               - undef if an error occurred
#
sub TgtspecifTgtExpectLib {
  my ($target, $lib, $CountCase, %opt) = @_;
  my ($debug, $pRc);
  my ($pTgtExpect, $LibgrpNum, $RelSpecif);

  # function parameters
  $debug = $opt{-debug};
  $pRc = &TgtspecifRc (%opt) or return undef;
  unless (exists $$pRc{target}{$target}) {
    $debug and printf STDERR "%s. ERROR: there's no target %s\n", &MySub, $target||"''";
    return undef;
  }
  unless (exists $$pRc{LibGroup}{$lib}) {
    $debug and printf STDERR "%s. ERROR: there's no library group %s\n", &MySub, $lib||"''";
    return undef;
  }
  $_LibParam{TgtSpecif}{ValidParam}{TgtExpectLib}{$CountCase} or return undef;
  $pTgtExpect = $$pRc{TgtExpect} ||= {};

  # get resource parameters
  unless (exists $$pTgtExpect{$target}{$lib}{$CountCase}) {
    unless (defined ($LibgrpNum = &TgtspecifLibgrpNum ($lib, $CountCase, %opt))) {
      $debug and printf STDERR "%s. ERROR in \&TgtspecifLibgrpNum\n", &MySub;
      return undef;
    }
    unless (defined ($RelSpecif = &TgtspecifLibgrpSpecif ($lib, $target, $CountCase, %opt))) {
      $debug and printf STDERR "%s. ERROR in \&TgtspecifLibgrpSpecif\n", &MySub;
      return undef;
    }
    $$pTgtExpect{$target}{$lib}{$CountCase} = $LibgrpNum * $RelSpecif;
  }

  # return expectancy for target hits
  return $$pTgtExpect{$target}{$lib}{$CountCase};
}


# source probabilities for sample of reads
#
# INTERFACE
# - argument 1: reference to list of read identifiers
# - argument 2: count case identifier:
#               Clone Nt Read
#
# - options:
#   -debug      [STD]
#   -rc         [STD]
#   -target     - target or target group identifier        
#               - list of target or target group identifiers
#
# - return val: - reference to result hash
#               - undef if an error occurred
#
# DESCRIPTION
# - the list of reads provided in argument 1 usually refers to the
#   members of a contig or sequence cluster.
#   The functions aims to provide probabilities for the target location
#   of this contig/cluster. The provided values reflect log10(P)
#
sub TgtspecifTgtProbab {
  my $Log10 = log (10);
  my ($pRead, $CountCase, %opt) = @_;
  my ($debug, $pRc);
  my (@target, $ItTgt, $lib, $SlcLib, %calc, $ProbabCorr);

  # function parameters
  $debug = $opt{-debug};
  $pRc = &TgtspecifRc (%opt) or return undef;
  $_LibParam{TgtSpecif}{ValidParam}{TgtProbab}{$CountCase} or return undef;

  # loop over possible sources ('targets' in rc syntax)
  unless (@target = &TgtspecifTgtRegard ($opt{-target}, %opt)) {
    $debug and printf STDERR "%s. ERROR: unknown data format or value in option -target\n", &MySub;
    return undef;
  }
  foreach $ItTgt (@target) {

    # calculate expected target hits
    foreach $lib (&TgtspecifLibgrpRegard ('probab', %opt)) {
      $calc{$ItTgt}{ExpectNum}{$lib} = &TgtspecifTgtExpectLib ($ItTgt, $lib, $CountCase, %opt);
    }
    $calc{$ItTgt}{ExpectNumWgt} = 1 / &Sum (values %{$calc{$ItTgt}{ExpectNum}});

    # calculate combinatorial probability
    $calc{$ItTgt}{ProbabRelLog} = 0;
    foreach $lib (&TgtspecifLibgrpRegard ('probab', %opt)) {
      unless ($calc{count}{$lib}) {
        $SlcLib = &TgtspecifLibgrpSelector ($lib, $CountCase, %opt);
        $calc{count}{$lib} = int grep { m/$SlcLib/ } @$pRead
      }
      $calc{$ItTgt}{ProbabRelLog} +=
        log ($calc{$ItTgt}{ExpectNum}{$lib} * $calc{$ItTgt}{ExpectNumWgt})
        * $calc{count}{$lib} / $Log10;
    }
  }

  # correct value for combinatorial probability
  $ProbabCorr = - &Max (map { $_->{ProbabRelLog} } @calc{@target});
  map { $_->{ProbabRelLogCorr} = $_->{ProbabRelLog} + $ProbabCorr } @calc{@target};
  $ProbabCorr = - (log (&Sum (
    map { 10 ** $_ } grep { $_ > -5 } map { $_->{ProbabRelLogCorr} } @calc{@target}))
    / $Log10);
  map { $_->{ProbabRelLogCorr} += $ProbabCorr } @calc{@target};

  # hash containing target probabilities
  $debug and &DataPrint (\%calc, -handle=>\*STDERR);
  return { map { ($_ => sprintf ('%.3f', $calc{$_}{ProbabRelLogCorr})) } @target };
}


1;
# $Id: Library.pm,v 1.14 2004/11/09 23:34:13 karol Exp $
