################################################################################
#
#  kPerl Mathematics Library Group
#  Library for Statistical Calculations
#
#  copyright (c)
#  Karol Szafranski on behalf of IMB Jena, Dept. Genome Analysis, 1999-2004,
#    szafrans@imb-jena.de
#  Karol Szafranski at UPenn Philadelphia, Center for Bioinformatics, 2004,
#    karol@pcbi.upenn.edu
#  Karol Szafranski on behalf of FLI Jena, Genome Analysis Group, 2005,
#    szafrans@fli-leibniz.de
#  Karol Szafranski, 2006-2007, szak@gmx.de
#
################################################################################
#
#  DESCRIPTION
#
# - individual description of functions at the beginning of the code blocks
#
################################################################################
#
#  FUNCTIONS, DATA
#
#   @EXPORT
#   %_LibParam
#
# - random data and randomisation
#   see Math::Random
#
# - sample metrics
#   &SampleMean
#   &SampleMedian
#   &SampleMedianWgt
#   &SampleMetrics
#   &SampleMetricsWgt
#   &DistribEmpir
#
# - (x,y) samples, regression
#   &_SampleXyCalc
#   &RegressLin0
#   &RegressLin
#
# - binomial distribution
#   &BinomRow
#   &BinomQuant
#   &BinomConfidP
#   &BinomConfidX   *** only a stump ***
#
# - Gaussian distribution
#   $_LibParam{gauss}
#   &GaussVal
#   description: nomenclatural notes on 'quantile'
#   &_GaussQuantPlot
#   &GaussQuant
#   &_GaussLimitPlot
#   &GaussLimit
#
#
#  STD OPTIONS
#
#   -debug      print debug protocol to STDERR
#
################################################################################

package Math::Statist;

# includes
use strict; #use warnings;  # OK 20061002
use MainLib::Data;
use MainLib::Path qw(%CorePath);
use MainLib::File qw(&WriteFile);
use MainLib::Misc qw(&MySub);
use Math::Calc;
use Math::Round qw(&nearest);
use Math::Range;

# symbol export
use Exporter qw(import);
our @EXPORT = qw (
  &SampleMean &SampleMedian &SampleMedianWgt &SampleMetrics
    &SampleMetricsWgt &DistribEmpir
  &RegressLin0 &RegressLin
  &BinomRow &BinomQuant &BinomConfidP
  &GaussVal &GaussQuant &GaussLimit
  );

# package-wide constants and variables
my %_LibParam;


################################################################################
# sample metrics
################################################################################


# calculate sample mean value
#
# INTERFACE
# - argument 1: reference to array of sample values
# - return val: mean value
#
# DESCRIPTION
# - this function is implemented in a similar way in &Math::Calc::Mean
#
sub SampleMean {
  my ($pData) = @_;
  # sample size
  my $ValN = int(@$pData);
  unless ($ValN) { die sprintf "%s. ERROR: no data!\n", &MySub }

  # calculate mean value
  return &Sum(@$pData) / $ValN;
}


# calculate median of a sample
#
# INTERFACE
# - argument 1: reference to array of sample values
# - return val: median value
#
sub SampleMedian {
  my ($pData) = @_;
  # sample size
  my $ValN = int(@$pData);
  unless ($ValN) { die sprintf "%s. ERROR: no data!\n", &MySub }
  # sorted data
  my @DataSorted = sort { $a<=>$b } @$pData;

  # middle, median value
  my $middle = int ($ValN / 2);
  my $median = ($ValN % 2) ?
    $DataSorted[$middle] : &Sum(@DataSorted[$middle-1,$middle]) / 2;

  # exit SUB
  return $median;
}


# calculate median of a sample (weighted measurements)
#
# INTERFACE
# - argument 1: reference to array of values each value being a data pair of:
#               0   sample value
#               1   weight (N) of sample element
# - return val: median value
#
sub SampleMedianWgt {
  my ($pData,%opt) = @_;
  # sample size, sum of weights
  my $ValN = int(@$pData);
  unless ($ValN) { die sprintf "%s. ERROR: no data!\n", &MySub }
  my $WgtSum = &Sum(map{ $_->[1] } @$pData);
  unless ($WgtSum > 0) {
    die sprintf "%s. ERROR: got negative or zero weight sum\n", &MySub;
  }
  # sorted data
  my @DataSorted = sort { $a->[0]<=>$b->[0] } @$pData;

  # sample size, median value
  my $middle_aim = $WgtSum / 2;
  my ($i,$middle_cumul);
  for ($i=$middle_cumul=0; $middle_cumul<$middle_aim; ++$i) {
    $middle_cumul += $DataSorted[$i][1];
  }
  -- $i;
  # find the correct median for weighted boundaries
  # - If $DataSorted[$i][1] exactly finishes the half of the weight sum, then
  #   the median should be in between $DataSorted[$i][0] $DataSorted[$i+1][0] -
  #   assuming same weights $DataSorted[$i][1] and $DataSorted[$i+1][1].
  my $median = $DataSorted[$i][0];
  if ($i>0 and ($middle_cumul-$middle_aim)>$DataSorted[$i][1]*0.5) {
    my @border = ($DataSorted[$i-1],$DataSorted[$i]);
    if ($border[1][0] > $border[0][0]) {
      my $center_i = $middle_cumul - 0.5*$DataSorted[$i][1];
      $median -= ($border[1][0]-$border[0][0]) * $border[1][1]
        / ($border[0][1]+$border[1][1])
        * (abs($middle_cumul-$center_i) / (0.5*$DataSorted[$i][1]));
    }
  }
  if ($i<$#DataSorted and ($middle_cumul-$middle_aim)<$DataSorted[$i][1]*0.5) {
    my @border = ($DataSorted[$i],$DataSorted[$i+1]);
    if ($border[1][0] > $border[0][0]) {
      my $center_i = $middle_cumul - 0.5*$DataSorted[$i][1];
      $median += ($border[1][0]-$border[0][0]) * $border[0][1]
        / ($border[0][1]+$border[1][1])
        * (abs($middle_cumul-$center_i) / (0.5*$DataSorted[$i][1]));
    }
  }
}


# calculate sample metrics
#
# INTERFACE
# - argument 1: reference to array of values
#
# - options:
#   -debug      [STD]
#   -median     include median calculus
#   -probndistrib  include probndistrib calculus
#               This includes calculation of the median
#
# - return val: reference to result hash, keys:
#                 n
#                 mean
#               ( median  median
#                         only with option -median !
#                 DevSq   sum of deviation squares
#                 var     variance (n-1)
#                 varn    variance (n)
#                 s       standard deviation (n-1)
#                 sn      standard deviation (n)
#               ( probndistrib  probability that sample arises from normal
#                         distribution based on mean/median distance
#                         *** not implemented ***
#
sub SampleMetrics {
  my ($pData,%opt) = @_;
  my $debug = $opt{-debug};
  $opt{-median} ||= $opt{-probndistrib};
  # sample size
  my $ValN = int(@$pData);
  unless ($ValN) { die sprintf "%s. ERROR: no data!\n", &MySub }

  # arithmetic mean, median
  my $mean = &SampleMean ($pData);
  my $median;
  if ($opt{-median}) { $median = &SampleMedian($pData) }
  my %result = (
    n      => $ValN,
    mean   => $mean,
      $opt{-median} ?
   (median => $median) : (),
    );

  # deviation squares, variance, standard deviation
  $result{DevSq} = &Sum (map{($mean-$_)**2} @$pData);
  $result{sn} = sqrt ($result{varn} = $result{DevSq} / $ValN);
  if ($ValN > 1) {
    $result{s} = sqrt ($result{var} = $result{DevSq} / ($ValN-1));
  } elsif ($debug) {
    printf STDERR "%s. ERROR: only one value in data array\n", &MySub;
  }

  # probability for normal distribution
  if ($opt{-probndistrib} and $ValN>2) {
    # ...
  }

  # return result
  return \%result;
}


# calculate sample metrics (weighted groups of measurements)
#
# INTERFACE
# - argument 1: reference to array of values each value being a data
#               pair of:
#               0   mean of sample point
#               1   weight (relative N) of sample point
#
# - options:
#   -debug      [STD]
#   -median     include median calculus
#
# - return val: - reference result to hash, keys:
#                 n
#                 mean
#               ( median  median
#                         only with option -median !
#                 DevSq   sum of deviation squares
#                 var     variance (n-1)
#                 varn    variance (n)
#                 s       standard deviation (n-1)
#                 sn      standard deviation (n)
#               ( probndistrib  probability that sample arises from normal
#                         distribution based on mean/median distance
#                         *** not implemented ***
#               - undef if an error occurred
#
sub SampleMetricsWgt {
  my ($pData, %opt) = @_;
  my $debug = $opt{-debug};
  $opt{-median} ||= $opt{-probndistrib};
  # sample size, sum of weights
  my $ValN = int(@$pData);
  unless ($ValN) { die sprintf "%s. ERROR: no data!\n", &MySub }
  my $WgtSum = &Sum(map{ $_->[1] } @$pData);
  unless ($WgtSum > 0) {
    die sprintf "%s. ERROR: got negative or zero weight sum\n", &MySub;
  }

  # arithmetic mean, median
  my $mean = &Sum (map { $_->[0] * $_->[1]; } @$pData) / $WgtSum;
  my $median;
  if ($opt{-median} and $ValN>2) {
    $median = &SampleMedianWgt($pData);
  }
  my %result = (
    n      => $ValN,
    weight => $WgtSum,
    mean   => $mean,
      $opt{-median} ?
   (median => $median) : (),
    );
  $debug and printf STDERR "%s. metrics so far:\n", &MySub;
  $debug and &DataPrint (\%result, -handle=>\*STDERR, -space=>2);

  # deviation squares, variance, standard deviation
  $result{DevSq} = &Sum (map { (($mean-$_->[0])**2) * $_->[1] } @$pData);
  if ($result{DevSq} <= 0) {
    printf STDERR "%s. WARNING: dev square sum <= 0 (explicit %s)\n", &MySub, $result{DevSq};
    if ($ValN == 1) {
      printf STDERR "  due to sample size == 1\n", &MySub;
    }
  }
  $result{sn} = sqrt ($result{varn} = $result{DevSq} / $WgtSum);
  if ($ValN > 1) {
    $result{s} = sqrt ($result{var} = $result{DevSq} / $WgtSum * ($ValN / ($ValN-1)));
  } elsif ($debug) {
    printf STDERR "%s. WARNING: sample size == 1\n", &MySub;
  }

  # return result
  return \%result;
}


# derive density distribution plot from a sample
#
# INTERFACE
# - argument 1: either:
#               - reference to array of samples (table type A1, array of values)
#               - reference to array of sample/weight pairs (table type AA)
#
# - options:
#   -debug      [STD]
#   -RangeMin   set data range minimum. Left border extrapolation will be
#               limited to this value.
#   -smooth     smoothen distribution plot. Argument is reference to hash of
#               smoothening parameters:
#               profile   profile shape
#               step      re-plot step size
#               window    profile window size
#               default settings exist for all these parameters
#
# - return val: distribution values as [[x1,y1],[x2,y2],...]
#
# DESCRIPTION
# - This function derives a plot of the case density distribution (2D data,
#   type AA) from the given sample data, normalised to an integral of 1.0.
#   I call it an "atomic resolution histogram" since the data is classified
#   to the minimally possible class sizes. That means: every unique data value
#   will result in a separate class. Each class is represented by a bar-shaped
#   peak, the width of the peak such that it neatly adjoins the neighbouring
#   peaks, the height of the peak such that the integral corresponds to the
#   normalised class size.
# - the input sample data does not need to be sorted.
#
sub DistribEmpir {
  my ($pData, %opt) = @_;
  my $debug = $opt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  # sample size, sum of weights
  my $ValN = int(@$pData);
  unless ($ValN) { die sprintf "%s. ERROR: no data!\n", &MySub }

  ##############################################################################
  # data pre-processing
  my (@DataPrim, @DataCdens, $FakeSpace, %IntervMax);

  # sum of weights, sort data
  # - safe-copy data to @DataPrim
  # - eventually calculate density weight, detect table type AA versus A1
  #   by testing ref($$pData[0])
  my $ValSW;
  if (ref($$pData[0])) {
    foreach (@$pData) { $ValSW+=$_->[1]; }
    @DataPrim = map { [$_->[0],$_->[1]/$ValSW] } sort { $a->[0]<=>$b->[0] } @$pData;
  } else {
    @DataPrim = map { [$_,1/$ValN] } sort { $a<=>$b } @$pData;
  }

  # compile class-condensed data => combine entries having identical x values
  while (defined ($_ = shift @DataPrim)) {
    push @DataCdens, [ @$_ ];
    while (@DataPrim and $DataCdens[-1][0] == $DataPrim[0][0]) {
      $DataCdens[-1][1] += $DataPrim[0][1];
      shift @DataPrim;
    }
  }
  undef @DataPrim;
  if (int(@DataCdens) < 2) {
    die sprintf "%s. ERROR: only one single value emitted in data array\n", &MySub;
  }

  # fake space to conserve order of data pixels at x interval boundaries
  $FakeSpace = &Max (($DataCdens[-1][0] - $DataCdens[0][0]) / $ValN * 1e-5, $DataCdens[-1][0] * 1e-15);
  if (defined($opt{-RangeMin}) and $DataCdens[0][0]<=$opt{-RangeMin}) {
    if (defined($opt{-RangeMin}) and $DataCdens[1][0]<=$opt{-RangeMin}) {
      die sprintf "%s. ERROR: multiple values below -RangeMin\n", &MySub;
    }
    $DataCdens[0][0] = $opt{-RangeMin} + $FakeSpace;
  }

  # maximum interval size, for the two distal data intervals
  # 1. how many distal windows are to be scanned for maximum interval size
  $IntervMax{window} = (int(@DataCdens) - 2);
  if ($IntervMax{window} >= 8) {
    $IntervMax{window} = int ($IntervMax{window} * 0.25);
  }
  # 2. values for maximum interval sizes
  $IntervMax{'-1'} = &Max (map { $DataCdens[$_+1][0]-$DataCdens[$_][0] } 0..$IntervMax{window});
  $IntervMax{'1'}  = &Max (map { $DataCdens[$_+1][0]-$DataCdens[$_][0] } ($#DataCdens-$IntervMax{window}-1)..($#DataCdens-1));
  unshift @DataCdens, [$DataCdens[0][0] - $IntervMax{'-1'}, 0];
  # 3. limit left interval to $opt{-RangeMin}
  if (defined($opt{-RangeMin})) {
    $DataCdens[0][0] = &Max ($DataCdens[0][0],
      $DataCdens[1][0] - 2 * abs($DataCdens[1][0]-$opt{-RangeMin}));
  }
  push @DataCdens, [$DataCdens[-1][0] + $IntervMax{'1'}, 0];

  # debug data pre-processing
  if ($debug) {
    printf STDERR "%s. condensed sample, size %d\n", &MySub, int(@DataCdens);
    foreach (@DataCdens) {
      printf STDERR "%s\t%s\taha\n", $_->[0], $_->[1];
    }
  }

  ##############################################################################
  # calculate distribution

  # loop over central intervals
  my (@PlotDistrib, %border);
  push @PlotDistrib, [&Mean($DataCdens[0][0],$DataCdens[1][0])-$FakeSpace, 0];
  for (my $CtI=1; $CtI<$#DataCdens; $CtI++) {
    $border{'-1'} = &Mean($DataCdens[$CtI-1][0],$DataCdens[$CtI][0]) + $FakeSpace;
    $border{'1'}  = &Mean($DataCdens[$CtI][0],$DataCdens[$CtI+1][0]) - $FakeSpace;
    my $ValY = $DataCdens[$CtI][1] / ($border{'1'}-$border{'-1'});
    push @PlotDistrib, [ $border{'-1'}, $ValY ];
    push @PlotDistrib, [ $border{'1'},  $ValY ];
  }
  push @PlotDistrib, [&Mean($DataCdens[$#DataCdens-1][0],$DataCdens[-1][0])+$FakeSpace, 0];

  # debug distribution
  if ($debug) {
    my $integral;
    for (my $CtI=1; $CtI<$#PlotDistrib; $CtI+=2) {
      $integral += ($PlotDistrib[$CtI+1][0]-$PlotDistrib[$CtI][0]) * $PlotDistrib[$CtI][1];
    }
    printf STDERR "%s. final report:\n", &MySub;
    printf STDERR "  n data:         %d\n", $ValN;
    printf STDERR "  n intervals:    %d\n", (@PlotDistrib-2) / 2;
    printf STDERR "  total integral: %f\n", $integral;
  }

  ##############################################################################
  # smoothening
  if ($opt{-smooth}) {
    require Math::Plot2D;

    # default smoothening parameters
    $IntervMax{MidRange} = $DataCdens[$#DataCdens-$IntervMax{window}][0] - $DataCdens[$IntervMax{window}][0];
    my %smooth = ref($opt{-smooth}) ? %{$opt{-smooth}} : ();
    if ($smooth{window} <= 0) {
      $smooth{window} = $IntervMax{MidRange} * 0.05;
    }
    if (($smooth{step}||=$smooth{StepSize}) <= 0) {
      $smooth{step} = $IntervMax{MidRange} * 3 / $ValN;
    }
    if ($debug) {
      print  STDERR "deriving smoothening parameters\n";
      printf STDERR "  sample size, condensed sample size: %d, %d\n", $ValN, int(@DataCdens);
      printf STDERR "  sample window to calculate IntervMax: %d\n", $IntervMax{window};
      printf STDERR "  x mid sample range (50 %): %f\n", $IntervMax{MidRange};
      printf STDERR "  smoothening step size: %f\n", $smooth{step};
      printf STDERR "  smoothening window: %f\n", $smooth{window};
    }
    my $poDistrib = Math::Plot2D->new(\@PlotDistrib, -debug=>$dbg2);
    @PlotDistrib = map{@{$_||[]}} $poDistrib->SmoothPlot($smooth{step},
      -profile => $smooth{profile},
      -window  => $smooth{window}
      );
  }

  # return result
  return \@PlotDistrib;
}


################################################################################
# (x,y) samples, regression
################################################################################


# (x,y) sample calculi for linear regression etc.
#
# INTERFACE
# - argument 1: reference to array of arrays, each representing an (x,y) pair
# - argument 2: set of required calculations, comma-separated list of result
#               specifiers, possible:
#               N, mean(x), mean(y), sum(x), sum(x2), sum(y), sum(y2), sum(xy),
#               weight
#               default: N, mean(x), mean(y), weight
#               always returned: N, weight
#
# - options:
#   -debug      [STD]
#
# - return val: reference to result data structure (hash) containing calculated
#               values
#
sub _SampleXyCalc {
  my ($pData, $want, %opt) = @_;
  my ($debug, %WantFin, $bWgt);
  my (%WantIt, $pPix, $ItWant);
  my (%ResultIt, %ResultFin);

  # function constants
  my %CalcGlob = (
    N => { order=>1, func=>
      sub { int(@$pData) } },
    weight => { order=>2, func=>
      sub { $bWgt ? &Sum (map{ $_->[2] } @$pData) : $ResultFin{N} } },
    'sum(x)' => { order=>3, func=>
      sub { &Sum (map{ $_->[0] * ($bWgt?$_->[2]:1) } @$pData) } },
    'sum(y)' => { order=>3, func=>
      sub { &Sum (map{ $_->[1] * ($bWgt?$_->[2]:1) } @$pData) } },
    'mean(x)' => { order=>4, func=>
      sub { $ResultFin{'sum(x)'} / $ResultFin{weight} } },
    'mean(y)' => { order=>4, func=>
      sub { $ResultFin{'sum(y)'} / $ResultFin{weight} } },
    'sum(x2)' => { order=>4, func=>
      sub { &Sum (@{$ResultIt{x2}}) } },
    'sum(y2)' => { order=>4, func=>
      sub { &Sum (@{$ResultIt{y2}}) } },
    'sum(xy)' => { order=>4, func=>
      sub { &Sum (@{$ResultIt{xy}}) } },
    'sum(x_dev2)' => { order=>5, func=>
      sub { &Sum (map{ ($_->[0]-$ResultFin{'mean(x)'})**2 } @$pData) } },
    'sum(y_dev2)' => { order=>5, func=>
      sub { &Sum (map{ ($_->[1]-$ResultFin{'mean(y)'})**2 } @$pData) } },
    'sum(x_dev*y_dev)' => { order=>5, func=>
      sub { &Sum (map{ ($_->[1]-$ResultFin{'mean(y)'}) * ($_->[1]-$ResultFin{'mean(y)'}) } @$pData) } },
    );
  my %GlobNeedsGlob = (
    weight    => [ 'N' ],
    'mean(x)' => [ 'N','weight','sum(x)' ],
    'mean(y)' => [ 'N','weight','sum(y)' ],
    );
  my %GlobNeedsIt = (
    'sum(x2)' => [ 'x2' ],
    'sum(y2)' => [ 'y2' ],
    'sum(xy)' => [ 'xy' ],
    );
  my %CalcIt = (
    x2 => sub { $_[0]->[0] **2 * ($bWgt? $_[0]->[2]:1); },
    y2 => sub { $_[0]->[1] **2 * ($bWgt? $_[0]->[2]:1); },
    xy => sub { $_[0]->[0] * $_[0]->[1] * ($bWgt? $_[0]->[2]:1); },
    );

  # function parameters
  $debug = $opt{-debug};
  $want ||= 'mean(x),mean(y)';
  %WantFin = map { ($_=>1) }
    grep { exists($CalcIt{$_}) or exists($CalcGlob{$_}) }
    map { ($_, @{$GlobNeedsGlob{$_}||[]}); }
    qw(N weight), split (/\s*,\s*/, $want);
  %WantIt = map { ($_=>1) } map { @{$GlobNeedsIt{$_}||[]}; } keys %WantFin;
  $bWgt = (@$pData and @{$$pData[0]}>2 and $$pData[0][2]);
  if ($bWgt and grep{ !$_->[2] }@$pData) {
    printf STDERR "%s. WARNING: zero weight in weighted data - ignored\n", &MySub;
    $bWgt = 0;
  }
  if ($debug) {
    printf STDERR "%s. got / calculating\n", &MySub;
    printf STDERR "  weighted data: %s\n", $bWgt ? 'YES':'NO';
    printf STDERR "  iterative jobs: %s\n", join (',', keys %WantIt);
    printf STDERR "  global jobs: %s\n", join (',', sort { $CalcGlob{$a}{order} <=> $CalcGlob{$b}{order} } keys %WantFin);
  }

  # do iterative calculations
  # - loop over data pixels
  # - loop over requested iterative calculations
  foreach $pPix (@$pData) {
    foreach $ItWant (keys %WantIt) {
      push @{$ResultIt{$ItWant}}, &{$CalcIt{$ItWant}} ($pPix);
    }
  }

  # do global calculations
  foreach $ItWant (sort { $CalcGlob{$a}{order} <=> $CalcGlob{$b}{order} } keys %WantFin) {
    $ResultFin{$ItWant} = @$pData ? &{$CalcGlob{$ItWant}{func}} : undef;
  }

  # exit SUB
  if ($debug) {
    printf STDERR "%s. result data structure\n", &MySub;
    &DataPrint (\%ResultIt, -handle=>\*STDERR, -space=>2);
    &DataPrint (\%ResultFin, -handle=>\*STDERR, -space=>2);
  }
  return \%ResultFin;
}


# linear regression on (x,y) sample, constrained for (0,0)
#
# INTERFACE
# - argument 1: reference to array of arrays, each representing an (x,y) pair
#
# - options:
#   -debug      [STD]
#
# - return val: reference to result data structure (hash):
#               n       sample size
#               ax      slope for variating x
#               ay      slope for variating y
#               bx      y shift for variating x, always zero
#               by      y shift for variating y, always zero
#               correl_Rsqr
#                       coefficient of determination (y over x regression)
#
sub RegressLin0 {

  # function constants
  my $ValFake = 1e-20;

  # function paramters
  my ($pData,%opt) = @_;
  my $debug = $opt{-debug};
  unless (int(@$pData)) { die sprintf "%s. ERROR: no data!\n", &MySub }

  # sample calculations
  my $pSampleXy = &_SampleXyCalc ($pData, 'N,mean(x),mean(y),sum(x2),sum(y2),sum(xy),sum(y_dev2)');
  unless ($$pSampleXy{weight}) {
    die sprintf "%s. ERROR: zero data pool - no regression possible\n", &MySub;
  }
  $$pSampleXy{'sum(xy)'} ||= $ValFake;
  $$pSampleXy{'sum(x2)'} ||= $ValFake;

  # calculation of regression line
  my %result = (
    n  => $$pSampleXy{N},
    ay => $$pSampleXy{'sum(y2)'} / $$pSampleXy{'sum(xy)'},
    ax => $$pSampleXy{'sum(xy)'} / $$pSampleXy{'sum(x2)'},
    );
  $result{bx} = $result{by} = $result{xox} = $result{xoy} = 0;
  # coefficient of determination
  my $ssqr = 0;
  foreach (@$pData) {
    $ssqr += ($_->[1] - ($_->[0]*$result{ay})) **2;
  }
  $result{correl_Rsqr} = ($$pSampleXy{'sum(y_dev2)'}-$ssqr) / ($$pSampleXy{'sum(y_dev2)'}||$ValFake);

  # sample size, mean value
  $debug and printf STDERR "%s. result data structure\n", &MySub;
  $debug and &DataPrint (\%result, -handle=>\*STDERR, -space=>2);
  return \%result;
}


# linear regression on (x,y) sample
#
# INTERFACE
# - argument 1: reference to array of arrays, each representing an (x,y) pair
#
# - options:
#   -debug      [STD]
#
# - return val: reference to result data structure (hash):
#               n       sample size
#               ax      slope for variating x
#               ay      slope for variating y
#               bx      y shift for variating x
#               by      y shift for variating y
#               xox     x axis crossing for variating x
#               xoy     x axis crossing for variating y
#               correl  correlation coefficient r (Pearson)
#               correl_Rsqr
#                       coefficient of determination (y over x regression)
#
sub RegressLin {

  # function constants
  my $ValFake = 1e-20;

  # function parameters
  my ($pData,%opt) = @_;
  my $debug = $opt{-debug};
  unless (int(@$pData)) { die sprintf "%s. ERROR: no data!\n", &MySub }

  # sample calculations
  my $pSampleXy = &_SampleXyCalc ($pData, 'N,weight,mean(x),mean(y),sum(x2),sum(y2),sum(xy),sum(y_dev2)');
  unless ($$pSampleXy{weight}) {
    die sprintf "%s. ERROR: zero data pool - no regression possible\n", &MySub;
  }

  # calculation of regression line
  my %result;
  $$pSampleXy{'N*mean(x)*mean(y)'} = $$pSampleXy{N} * $$pSampleXy{'mean(x)'} * $$pSampleXy{'mean(y)'};
  $result{n} = $$pSampleXy{N};
  $result{ay} = ($$pSampleXy{'sum(xy)'} - $$pSampleXy{'N*mean(x)*mean(y)'}) /
               (($$pSampleXy{'sum(x2)'} - $$pSampleXy{N} * $$pSampleXy{'mean(x)'}**2)||$ValFake);
  $result{ax} = ($$pSampleXy{'sum(y2)'} - $$pSampleXy{N} * $$pSampleXy{'mean(y)'}**2) /
               (($$pSampleXy{'sum(xy)'} - $$pSampleXy{'N*mean(x)*mean(y)'})||$ValFake);
  $result{by} = $$pSampleXy{'mean(y)'} - $result{ay} * $$pSampleXy{'mean(x)'};
  $result{bx} = $$pSampleXy{'mean(y)'} - $result{ax} * $$pSampleXy{'mean(x)'};
  $result{xoy} = - $result{by} / ($result{ay}||$ValFake);
  $result{xox} = - $result{bx} / ($result{ax}||$ValFake);
#  # Pearson's correlation coefficient using pre-computed sum values, cf.
#  # http://davidmlane.com/hyperstat/A51911.html
#  $result{correl_} = ($$pSampleXy{'sum(xy)'} - $$pSampleXy{'N*mean(x)*mean(y)'}) /
#               sqrt ( (($$pSampleXy{'sum(x2)'} - $$pSampleXy{N} * $$pSampleXy{'mean(x)'}**2)||$ValFake)
#                    * (($$pSampleXy{'sum(y2)'} - $$pSampleXy{N} * $$pSampleXy{'mean(y)'}**2)||$ValFake) );
  # However, the following formula is simplest, using the pre-computed slope
  # values:
  $result{correl} = &Sign ($result{ay}) * sqrt ($result{ay} / ($result{ax}||$ValFake));
  # coefficient of determination
  my $ssqr = 0;
  foreach (@$pData) {
    $ssqr += ($_->[1] - ($_->[0]*$result{ay}+$result{by})) **2;
  }
  $result{correl_Rsqr} = ($$pSampleXy{'sum(y_dev2)'}-$ssqr) / ($$pSampleXy{'sum(y_dev2)'}||$ValFake);

  # sample size, mean value
  $debug and printf STDERR "%s. result data structure\n", &MySub;
  $debug and &DataPrint (\%result, -handle=>\*STDERR, -space=>2);
  return \%result;
}


################################################################################
# binomial distribution
################################################################################


# calculate binomial distribution
#
# INTERFACE
# - argument 1: sample size n
# - argument 2: probability p
#
# - options:
#   -accur      accuracy for the calculation of cumulative probabilities,
#               scaled in order of decimal digits, default 5.
#   -debug      [STD]
#   -ForceMinX  force calculation down to specified x value irrespective of
#               integral accuracy threshold
#   -ForceMaxX  force calculation up to specified x value irrespective of
#               integral accuracy threshold
#
# - return val: - wantscalar: array reference for distribution values
#               - wantarray: array reference for distribution values, array
#                 reference for cumulative distribution values (Psi values)
#               - undef if an error occurred
#
# DEVELOPER'S NOTES
# - Working around calculation n over k:
#   The job is done by iteration starting at the expected x value with a
#   preliminary probability assignment. The probability of the neighbouring
#   x values will be calculated from the previous one by applying the progress
#   factor p * 1/q * ... Having all (significantly contributing) case probabil-
#   ities calculated, they will be rescaled to yield a cumulative sum of
#   exactly 1.0.
# - accuracy:
#   Due to the approach, the approximated probabilities will be systematically
#   smaller than the real probabilities, in the deviation range defined by
#   the accuracy parameter. This is because (nearly) never the complete
#   integral is calculated but rather only the significant portion of it.
#   By normalising to 1.0 instead of the read 0.99x the rescaled values will
#   be smaller than the real ones.
# - Minimising computational effort for iteration:
#   Starting from the expect value, the neighbouring x values will be
#   calculated successively until the expected probability integral drops
#   below 10^(-$accur) * P(E(x)).
#
sub BinomRow {
  my ($ValN,$ValP,%opt) = @_;
  my $debug = $opt{-debug};
  if ($ValP<=0 or $ValP>=1) {
    printf STDERR "%s. ERROR: bad argument p: %f\n", &MySub, $ValP;
    return undef; 
  }
  my $accur = $opt{-accur} || 5;
  my $AccurRel = 10**(-$accur);

  # non-scaled probability values
  # start at (or "around") expect value with any priming probability
  my $PosExpect = int ($ValP*$ValN);
  my @probab;
  my $ProbNonsclSum = $probab[$PosExpect] = 1;

  # calculate neighbours iteratively
  # cumulate sum of non-scaled probabilities
  my $ProbScl = (1-$ValP) / $ValP;
  my ($PosI,%CalcN);
  for ($PosI=$PosExpect; $PosI>0; $PosI--) {
    $probab[$PosI-1] = $probab[$PosI] * $ProbScl * ($PosI/($ValN-$PosI+1));
    $ProbNonsclSum += $probab[$PosI-1];
    if (exists($opt{-ForceMinX}) and ($PosI-1)>$opt{-ForceMinX}) { next }
    if ($PosI-1 and 0.5*$probab[$PosI-1]*($PosI-1)/$ProbNonsclSum < $AccurRel) {
      $debug and printf STDERR "%s. reached accuracy limit at low X = $PosI\n", &MySub;
      last;
    }
  }
  $CalcN{min} = $PosI;
  $ProbScl = 1 / $ProbScl;
  for ($PosI=$PosExpect; $PosI<$ValN; $PosI++) {
    $probab[$PosI+1] = $probab[$PosI] * $ProbScl * (($ValN-$PosI)/($PosI+1));
    $ProbNonsclSum += $probab[$PosI+1];
    if (exists($opt{-ForceMaxX}) and ($PosI+1)<$opt{-ForceMaxX}) { next }
    if ($ValN-$PosI-1 and 0.25*$probab[$PosI+1]*($ValN-$PosI-1)/$ProbNonsclSum < $AccurRel) {
      $debug and printf STDERR "%s. reached accuracy limit at high X = $PosI\n", &MySub;
      last;
    }
  }
  $CalcN{max} = $PosI;

  # scaled probability values
  for ($PosI=$CalcN{min}; $PosI<=$CalcN{max}; $PosI++) {
    $probab[$PosI] /= $ProbNonsclSum;
  }

  # cumulative probability (Psi)
  if (wantarray) {
    my $ProbCumul=0;
    my @psi;
    for ($PosI=$CalcN{min}; $PosI<=$ValN; $PosI++) {
      $psi[$PosI] = $ProbCumul += $probab[$PosI];
    }
    return (\@probab, \@psi);
  }
  # simple probability column
  else {
    return \@probab;
  }
}


# for given p and n events, determine probability integral of an x value range
#
# INTERFACE
# - argument 1: sample size n
# - argument 2: probability p
# - argument 3: value range for number of events x, format either:
#               - string "Xmin..Xmax"
#               - array reference [Xmin, Xmax]
#
# - options:
#   -accur      accuracy for the calculation of cumulative probabilities,
#               scaled in order of decimal digits, default 5.
#   -debug      [STD]
#
# - return val: - integral ("quantile") = P(X e[x_min,x_max]|n,p)
#               - undef if an error occurred
#
sub BinomQuant {
  my ($ValN,$ValP,$ArgX,%opt) = @_;
  my $debug = $opt{-debug};
  if ($ValN <= 0) {
    printf STDERR "%s. ERROR: bad argument n: %s\n", &MySub, $ValN;
    return undef;
  }
  if ($ValP<0 or $ValP>1) {
    printf STDERR "%s. ERROR: bad argument p: %s\n", &MySub, $ValP;
    return undef;
  }
  # expand and verify x range parameter
  my $pRgX = ref($ArgX) ? Math::Range->new($ArgX) : Math::Range->new_parsed($ArgX);
  if ((grep { int($_)!=$_ or $_<0 or $_>$ValN } @$pRgX) or $$pRgX[0]>$$pRgX[1]) {
    printf STDERR "%s. ERROR: bad argument x: %s => %s\n", &MySub, $ArgX, $pRgX;
    return undef;
  }

  # calculation binomial distribution
  my $pTable = &BinomRow($ValN,$ValP,%opt) or return undef;
  return &Sum(@{$pTable}[$$pRgX[0]..$$pRgX[1]]);
}


# for observed x out of n events, determine p that just includes the case in
# the resulting confidence interval
#
# INTERFACE
# - argument 1: sample size n
# - argument 2: x = positive events corresponding to requested probability p
# - argument 3: is x upper/lower boundary of confidence interval? As -1, 1.
#
# - options:
#   -accur      accuracy of requested p (in digits), default: 2
#   -alpha      alpha error defining the confidence interval, default: 0.05
#               corresponding to confidence 0.95.
#   -debug      [STD]
#   -GaussAboveSD
#               calculate by Gauss approximation above SD value, default: 5
#   -SuggDistP  suggest a p for the "distal" interval boundary. The suggested
#               value should be unequal x/n and MUST be <= x/n (lower boundary
#               = -1) or >= x/n (upper boundary = 1)
#
# - return val: - determined p
#               - undef if an error occurred
#
# DESCRIPTION
# - The confidence interval is defined here in the context of a one-sided
#   question. The specified x is included into the interval, i.e.:
#   [x..n]  for lower boundary ("-1")
#   [0..x]  for upper boundary ("1")
# - "Distal" and "proximal" are defined here in relation to x/n on the possible
#   p value range e [0.0,1.0].
#
# DEBUG, CHANGES, ADDITIONS
# - Interpolate final result
#
sub BinomConfidP {
  my %LimVal = ('-1'=>0,'1'=>1,'+1'=>1);
    # used either as array index or left/right limit of p value range
  my ($ValN,$ValX,$bound,%opt) = @_;
  my $debug = $opt{-debug};
  #my $dbg2  = $debug ? $debug-1 : undef;
  my $accur = $opt{-accur} || 2;
  my $AccurRel = 0.5 * 10**(-$accur);
  my $alpha = $opt{-alpha} || 0.05;
  my $GaussAboveSD = $opt{-GaussAboveSD} || 5;
  my $MLP = $ValX / $ValN;
  if ($ValN <= 0) {
    printf STDERR "%s. ERROR: bad argument n: %s\n", &MySub, $ValN;
    return undef;
  }
  if ($ValX<0 or $ValX>$ValN) {
    printf STDERR "%s. ERROR: bad argument x: %s, n = %s\n", &MySub, $ValX, $ValN;
    return undef;
  }
  if (abs($bound) != 1) {
    printf STDERR "%s. ERROR: bad boundary argument: %s\n", &MySub, $bound;
    return undef;
  }
  if ($LimVal{$bound} == $MLP) {
    printf STDERR "%s. ERROR: bad boundary argument: %s, ML(p) = x/n = %s\n", &MySub,
      $bound, $MLP;
    return undef;
  }
  $debug and printf STDERR "%s. confidence interval: %s..%s, fit to probability %s\n", &MySub,
    ($bound==1)?(0,"x=".$ValX):("x=".$ValX,"n=".$ValN), 1 - $alpha;
  if (sqrt ($MLP * (1-$MLP) * $ValN) > $GaussAboveSD) {
    $debug and printf STDERR "%s. do Gauss approximation, SD = %s\n", &MySub,
      sqrt ($MLP * (1-$MLP) * $ValN);
    #return BinomConfidPGauss ($ValN, $ValX, $bound, %opt);
  }

  my (@interv);

  # one interval border (proximal) corresponds to ML(p), defined by given x
  $interv[$LimVal{+$bound}]{ValP} = $MLP;
  $interv[$LimVal{+$bound}]{alpha} = 0.5;
  $debug and printf STDERR "%s. proximal interval boundary (code %d): %f\n", &MySub,
    $LimVal{+$bound}, $interv[$LimVal{+$bound}]{ValP};

  # distal interval border located towards boundaries {0,1}
  # provided by caller? Check consistency!
  if ($opt{-SuggDistP}) {
    if (&Sign($interv[$LimVal{+$bound}]{ValP}-$opt{-SuggDistP}) != $bound) {
      printf STDERR "%s. ERROR: bad argument to -SuggDistP: %s, x/n = %s, boundary %s\n", &MySub,
        $opt{-SuggDistP}, $MLP, $bound;
      return undef;
    }
    $interv[$LimVal{-$bound}] = {
      ValP  => $opt{-SuggDistP},
      alpha => 1 - &BinomQuant ($ValN, $opt{-SuggDistP},
               ($bound==1)?[0,$ValX]:[$ValX,$ValN] ),
      };
    $debug and printf STDERR "%s. distal interval boundary (code %d) via -SuggDistP: %s, alpha %s\n", &MySub,
      $LimVal{-$bound}, $interv[$LimVal{-$bound}]{ValP}, $interv[$LimVal{-$bound}]{alpha};

    # provided distal boundary does not work, double interval
    while ($interv[$LimVal{-$bound}]{alpha} > $alpha) {
      $interv[$LimVal{-$bound}]{ValP} = $interv[$LimVal{-$bound}]{ValP}
        + ($interv[$LimVal{-$bound}]{ValP}-$interv[$LimVal{+$bound}]{ValP});
      $debug and printf STDERR "%s. moving distal interval boundary (code %d): %s\n", &MySub,
        $LimVal{-$bound}, $interv[$LimVal{-$bound}]{ValP};
      if ($interv[$LimVal{-$bound}]{ValP}<0 or $interv[$LimVal{-$bound}]{ValP}>1) {
        undef $interv[$LimVal{-$bound}];
        last; 
      }
      $interv[$LimVal{-$bound}]{alpha} = 1 - &BinomQuant ($ValN,
        $interv[$LimVal{-$bound}]{ValP}, ($bound==1)?[0,$ValX]:[$ValX,$ValN]);
    }
  }
  if (! defined($interv[$LimVal{-$bound}])) {
    $interv[$LimVal{-$bound}] = { ValP=>$LimVal{-$bound}, alpha=>0 };
    $debug and printf STDERR "%s. distal interval boundary (code %d, maximum): %f\n", &MySub,
      $LimVal{-$bound}, $interv[$LimVal{-$bound}]{ValP};
  }

  # bisect interval
  while (($interv[1]{ValP}-$interv[0]{ValP})/$interv[1]{ValP} > $AccurRel) {
    $interv[2] = { ValP => &Mean ($interv[0]{ValP}, $interv[1]{ValP}) };
    $interv[2]{alpha} = 1 - &BinomQuant ($ValN, $interv[2]{ValP},
      ($bound==1)?[0,$ValX]:[$ValX,$ValN]);

    # bisection alpha > target alpha => distal p
    if ($interv[2]{alpha} > $alpha) {
      $interv[$LimVal{$bound}] = $interv[2];
    }
    # bisection alpha < target alpha => proximal p
    else {
      $interv[$LimVal{-$bound}] = $interv[2];
    }
    $debug and printf STDERR "%s. interval shrunk: %s..%s | alpha %s..%s\n", &MySub,
      $interv[0]{ValP}, $interv[1]{ValP}, $interv[0]{alpha}, $interv[1]{alpha};
  }

  # interpolate
  # - ($interv[1]{ValP}-$interv[0]{ValP}) is p interval difference, directed
  #   from $interv[0]{ValP} towards correct p
  # - ($alpha-$interv[0]{alpha}) / ($interv[1]{alpha}-$interv[0]{alpha}) is
  #   sign-less factor that can be used to scale difference direction vector
  $interv[2] = { ValP => $interv[0]{ValP} +
    ($alpha-$interv[0]{alpha}) / ($interv[1]{alpha}-$interv[0]{alpha})
    * ($interv[1]{ValP}-$interv[0]{ValP}) };
  return $interv[2]{ValP};
}


# for given distribution B(n,p), determine the most distal X value that is
# included in the confidence interval
#
# INTERFACE
# - argument 1: sample size n
# - argument 2: p = binomial probability
# - argument 3: search x for upper/lower boundary of confidence interval?
#               As -1, 1.
#
# - options:
#   -accur      accuracy of requested p (in digits), default: 2
#   -alpha      alpha error defining the confidence interval, default: 0.05
#               corresponding to confidence 0.95.
#   -debug      [STD]
#
# - return val: - determined p
#               - undef if an error occurred
#
# DESCRIPTION
# - The confidence interval is defined here in the context of a one-sided
#   question. The specified x is included into the interval, i.e.:
#   [x..n]  for lower boundary ("-1")
#   [0..x]  for upper boundary ("1")
# - "Distal" and "proximal" are defined here in relation to x/n on the possible
#   p value range e [0.0,1.0].
#
sub BinomConfidX {
}


################################################################################
# Gaussian distribution
################################################################################

# parameters
$_LibParam{gauss}{normalise} = 1 / sqrt (2 * $const{pi});
$_LibParam{gauss}{QuantPlot} = undef;
$_LibParam{gauss}{QuantPlotStepCalc} = 0.00001;
$_LibParam{gauss}{QuantPlotStepKeep} = 0.01;
$_LibParam{gauss}{QuantPlotResol} = 1e-15;


# calculate Gaussian distribution value
#
# INTERFACE
# - argument 1: parameter my
# - argument 2: parameter s
# - argument 3: x value
# - return val: - Gaussian distribution value
#               - undef if an error occurs
#
sub GaussVal {
  my ($ValMy,$ValS,$ValX,%opt) = @_;
  ($ValS > 0) or return undef;
  my $normalise = $_LibParam{gauss}{normalise} / $ValS;

  # calculate value
  my $ValGauss = $normalise * exp -(($ValX-$ValMy)**2 / (2 * $ValS**2));
  return $ValGauss;
}


# nomenclatural notes on 'quantile'
#
# - quantile    synonymous to percentile, but specification in the value
#               range [0,1] e R
#               Letter notation is Z (in variable context) or z (in value
#               context). Frequent question: P(Z<=z)
# - percentile  x value in the Gaussian distribution where you get a certain
#               percentual partition
# - quartile    one the special quantiles: 0.25, 0.50, 0.75
#


# provide reference to Gaussian quantile library
#
# INTERFACE
# - return val: reference to quantile library object (anchored at
#               $_LibParam{gauss}{QuantPlot})
#
sub _GaussQuantPlot {
  $_LibParam{gauss}{QuantPlot} ||= do{
    require Math::Plot2D;
    Math::Plot2D->new($CorePath{statist}{GaussQuantPlot})
      or die sprintf "%s. ERROR: unable to load Gaussian quantile library %s\n", &MySub,
        $CorePath{statist}{GaussQuantPlot};
  };
}


# calculate Gaussian quantile value
#
# INTERFACE
# - argument 1: multiple of s
#               sign of value is without effect
#
# - options:
#   -accuracy   required accuracy. A low value may quicken the calculation   
#               *** not implemented ***
#   -debug      [STD]
#   -double     quantile for double-sided testing, default one-sided
#
# - return val: - Gaussian quantile value (0.5 <= val <= 1)
#               - undef if an error occurs
#
sub GaussQuant {
  my ($ValTimesS, %opt) = @_;
  my ($debug, $dbg2, $pQuantLib, @QuantLibXtr);
  my ($pBound, $ValQuant, $CalcStep);

  # function parameters
  $debug = $opt{-debug};
  $dbg2  = $debug ? $debug-1 : undef;
  $ValTimesS = abs ($ValTimesS);
  unless ($pQuantLib = &_GaussQuantPlot(-debug=>$dbg2)) {
    printf STDERR "%s. ERROR: unable to load quantile library\n", &MySub;
    exit 1;
  }
  $CalcStep = $_LibParam{gauss}{QuantPlotStepCalc};

  # just for debug: rough estimate of quantile
  if ($debug) {
    $ValQuant = $pQuantLib->Interpolate($ValTimesS);
    printf STDERR "%s. quantile %.5f for x %.5f - from linear interpolation\n", &MySub,
      $ValQuant, $ValTimesS;
  }

  # TimesS value interval from quantile library
  # level 1: $BoundLeft / $BoundRight
  # level 2: 0 = Num, 1 = X, 2 = Y
  $pBound = $pQuantLib->Bound($ValTimesS);

  # extrapolate values
  # - direction of extrapolation is always towards increasing $ValTimesS
  # - add values to library
  while (! defined($$pBound[1]) or $$pBound[1][1]<$ValTimesS) {
    $debug and printf STDERR "%s. doing quantile extrapolation\n", &MySub;
    $$pBound[1] = &DataClone ($$pBound[0]);
    # iterate until interval brackets TimesS value
    while ($$pBound[1][1] < $ValTimesS) {
      $$pBound[0] = &DataClone ($$pBound[1]);
      $$pBound[1][2] = $$pBound[0][2] +
        $const{euler} ** (-0.5 * (($$pBound[0][1]+0.5*$CalcStep) ** 2))
        / sqrt (2 * $const{pi})
        * $CalcStep;
      $$pBound[1][1] = &nearest ($CalcStep, $$pBound[0][1]+$CalcStep);
      if (&ModuloExt ($$pBound[1][1], $_LibParam{gauss}{QuantPlotStepKeep})
        < $_LibParam{gauss}{QuantPlotResol}
      ) {
        push @QuantLibXtr, [$$pBound[1][1], $$pBound[1][2]];
      }
    }
    if (@QuantLibXtr) {
      &WriteFile ($CorePath{statist}{GaussQuantPlot},
        join ('', map { "$_->[0]\t$_->[1]\n" } @QuantLibXtr),
        -append=>1);
      undef $_LibParam{gauss}{QuantPlot};
    }
  }

  # interpolate value iteratively
  # - if queried X is framed by Xs in library, but frame borders differ by
  #   more than iterative library resolution ('QuantPlotStepCalc').
  # - start at nearest border and iterate into interval with step size =
  #   library resolution ('QuantPlotStepCalc'). We don't store newly calculated
  #   values in the library file (differentiation between 'QuantPlotStepCalc'
  #   and 'QuantPlotStepKeep').
  unless (defined ($ValQuant = (map { $_->[2] }
    grep { abs ($_->[1] - $ValTimesS) < $_LibParam{gauss}{QuantPlotStepCalc} }
    $$pBound[0], $$pBound[1])[0] )
  ) {
    $pBound = &DataClone ($pBound);
    $debug and printf STDERR "%s.  interpolating iteratively\n", &MySub;
    if (abs ($ValTimesS - $$pBound[0][1]) < abs ($ValTimesS - $$pBound[1][1])) {
      $debug and printf STDERR "  starting from x = %.5f\n", $$pBound[0][1];
      $debug and printf STDERR "  iterating rightwards\n";
      # position right interval border onto left border
      $$pBound[1] = &DataClone ($$pBound[0]);
      # iterate until interval brackets TimesS value
      while ($$pBound[1][1] < $ValTimesS) {
        $$pBound[0] = &DataClone ($$pBound[1]);
        # shift right interval border (left of $ValTimesS) rightwards
        $$pBound[1][2] = $$pBound[0][2] +
          # Gaussian value for ($$pBound[0][1]+0.5*$CalcStep)
          &GaussVal (0.0, 1.0, $$pBound[0][1] + 0.5*$CalcStep)
          # linear integral assumed for distance of $CalcStep
          * $CalcStep;
        $$pBound[1][1] = &nearest ($CalcStep, $$pBound[0][1]+$CalcStep);
      }
    }
    else {
      $debug and printf STDERR "  starting from x = %.5f\n", $$pBound[1][1];
      $debug and printf STDERR "  iterating leftwards\n";
      # position left interval border onto right border
      $$pBound[0] = &DataClone ($$pBound[1]);
      # iterate until interval brackets TimesS value
      while ($$pBound[0][1] > $ValTimesS) {
        # position right interval border leftmost, but right of $ValTimesS
        $$pBound[1] = &DataClone ($$pBound[0]);
        # shift left interval border (right of $ValTimesS) leftwards
        $$pBound[0][2] = $$pBound[1][2] -
          # Gaussian value for ($$pBound[1][1]-0.5*$CalcStep)
          &GaussVal (0.0, 1.0, $$pBound[1][1] - 0.5*$CalcStep)
          # linear integral assumed for distance of $CalcStep
          * $CalcStep;
        $$pBound[0][1] = &nearest ($CalcStep, $$pBound[1][1]-$CalcStep);
      }
    }
  }

  # interpolate value linearly
  unless (defined ($ValQuant = (map { $_->[2] }
    grep { abs ($_->[1] - $ValTimesS) < $_LibParam{gauss}{QuantPlotResol} }
    $$pBound[0], $$pBound[1])[0] )
  ) {
    $debug and printf STDERR "%s. doing quantile interpolation linearly\n", &MySub;
    $ValQuant = $$pBound[0][2] + ($$pBound[1][2] - $$pBound[0][2]) *
      (($ValTimesS - $$pBound[0][1]) / ($$pBound[1][1] - $$pBound[0][1]));
  }

  # return result
  if ($opt{-double}) { $ValQuant -= 1-$ValQuant }
  return $ValQuant;
}


# provide reference to Gaussian quantile library
#
# INTERFACE
# - return val: reference to limit library object
#
sub _GaussLimitPlot {
  my (%opt) = @_;

  # retrieve quantile library object
  # `require Math::Plot2D;' is implicitly done in &_GaussQuantPlot
  my $pLib = &_GaussQuantPlot();
  # swap x/y
  $pLib = Math::Plot2D->new( [map{[$_->[1],$_->[0]]} map{@{$_||[]}} $pLib->Data()] );
}


# calculate TimesS limit for Gaussian quantile value
#
# INTERFACE
# - argument 1: quantile
#
# - options:
#   -accuracy   required accuracy. A low value may fasten the calculation   
#               *** not implemented ***
#   -debug      [STD]
#
# - return val: - multiple of s that limiting the quantile
#               - undef if an error occurs
#
sub GaussLimit {
  my ($ValQuant,%opt) = @_;
  my $debug = $opt{-debug};
  my $dbg2  = $debug ? $debug-1 : undef;
  if ($ValQuant<=0 or $ValQuant>=1) {
    die sprintf "%s. argument ERROR: ridiculous quantile %s\n", &MySub, $ValQuant;
  }
  if ($ValQuant <= 0.5) { $ValQuant = 1 - $ValQuant }
  my $pLimitLib = &_GaussLimitPlot (-debug=>$dbg2);
  unless ($pLimitLib) {
    die sprintf "%s. ERROR: unable to get limit table from quantile library\n", &MySub;
  }

  # first-pass estimate of limit
  my $ValTimesS = $pLimitLib->Interpolate ($ValQuant);
  $debug and printf STDERR "%s. linear interpolation: quantile %.5f -> limit %.5f\n", &MySub,
    $ValQuant, $ValTimesS;

  # this works quite well so far
  return $ValTimesS;

  # TimesS value interval from quantile library
  # - find interval boundaries for valid calculus
  # - interpolated value is the lower interval boundary for the true TimesS
  #   value. We get the upper boundary by regarding the maximum curvature.
  #   *** not implemented ***
  my $curve = 0.001;
  my @bound;
  GaussLimitCurve: {
    $bound[0] = $ValTimesS;
    $bound[1] = $pLimitLib->Bound ($ValQuant);
    if ($ValQuant < &GaussQuant ($bound[1])) {
      $curve *= 1.5;
      printf STDERR "%s. WARNING: curvature overflow\n", &MySub;
      printf STDERR "  quantile: %s\n", $ValQuant;
      printf STDERR "  interpolated limit: %s e [%s,%s]\n", $ValTimesS, @bound;
    }
  }

  # return result
  return $ValTimesS;
}


1;
# $Id: Statist.pm,v 1.24 2007/08/24 21:40:56 sza Exp $
