################################################################################
#
#  kPerl Alignment Laboratory
#  Object Library for Assembly (Re)Sources
#  *** building place ***
#
#  copyright (c)
#    Institute of Molecular Biotechnology Jena, Dept. Genome Analysis, 2002
#  author
#    Karol Szafranski, karol.szafranski@leibniz-fli.de
#
################################################################################
#
#  DESCRIPTION
#
# - this class is an abstract description and access interface
#   for assembly data (re)sources
#
# - individual description of functions at the beginning of the code blocks
#
# - switches are always set library-wide, i.e. all active objects are affected.
#   The way to set switches is just an assignment to the library switch hash,
#   like:
#     $SeqAlign::AssemblySrc::LibGlob{switch}{-debug} = 1;
#
################################################################################
#
#  OBJECT OPERATORS
#
#   *** NONE ***
#
#
#  OBJECT METHODS  for external access
#
# - housekeeping
#   new           create object, initialize via ini(@args)
#   ini           initialize object (and object switches)
#
# - functionalities
#   Path          read/write path
#   Type          format type of assembly source
#   Supports      functionality support of assembly source
#                 functionality specifiers:
#                   sequence
#                   ContigMv
#
#
#  OBJECT DATA STRUCTURE
#  (hash)
#
#   path          path of represented assembly source
#   type          format type of assembly source if already determined
#
################################################################################
#
#  FUNCTIONS, DATA
#
#   @ISA
#   %LibGlob
#   %_LibParam
#
# - housekeeping
#   $LibGlob{switch}
#   &new  see MainLib::DefaultObjHash.pm
#   &ini  called by &new
#   &AddSwitch
#   &_LocalSwitch  see MainLib::DefaultObjHash.pm
#
# - functionalities
#   &Path
#   &Type
#   $_LibParam{support}
#   &Supports
#
#
#  STD OPTIONS
#
#   -debug      print debug protocol to STDERR
#
################################################################################

package SeqAlign::AssemblySrc;

# includes
#use strict; use warnings;  # OK 200xxxxx
use MainLib::DefaultObjHash;
use MainLib::Path;
use MainLib::FileTmp qw(&PathUnique);
use MainLib::Data qw(&DataPrint);
use MainLib::Misc qw(&MySub);
use Math::kCalc qw(&Sum);
use database::DbPlain qw(&PlainToTable);
use SeqAlign::Gap;

# inheritance
our @ISA;
push @ISA, qw(MainLib::DefaultObjHash);

# package-wide constants and variables
our (%LibGlob);
my %_LibParam;


################################################################################
# housekeeping
################################################################################


# library-wide switch control
$LibGlob{switch} = {};


# initialize from path
#
# INTERFACE
# - argument 1: path of assembly source
# - return val: - object reference
#               - undef if an error occurred
#
sub ini {
  my ($this, $PathArg) = @_;
  my ($debug);
  my ($PathProj, %field);

  # initialize object, keep it blessed
  %$this = ();

  # expand path
  $PathArg or return undef;
  $this->{path} = &PathExpand ($PathArg);

  # exit SUB successfully
  return $this;
}


# enter object switches
#
# INTERFACE
# - return val: success status (boolean)
#
sub AddSwitch {
  my ($this, %oopt) = @_;

  # loop over switches
  while (my($key,$val) = each(%oopt)) {
    if (0) { }

    # we don't enter -TabType into object switches
    elsif ($key eq '-TabType') { next }

    #options that we just have to enter
    else {
      if (defined $val) { $this->{switch}{$key} = $val; }
      else       { delete $this->{switch}{$key}; }
    }
  }

  # return success status
  return 1;
}


################################################################################
# functionalities
################################################################################


# read/write path
#
# INTERFACE
# - argument 1*: path of assembly source (write mode)
# - return val:  path of assembly source
#
sub Path {
  my ($this, $PathArg) = @_;
  my ($debug);
  my ($PathProj, %field);

  # write mode
  if ($PathArg) {
    $this->ini ($PathArg);
  }

  # return path
  return $this->{path};
}


# determine type of assembly source
#
# INTERFACE
# - return val: type of assembly source
#
sub Type {
  my ($this, %opt) = @_;
  my ($class, $debug, $dbg2, $time);
  my ($ItField, $CtI, $CtJ);
  my ($pRefBase, $pReferee, $pRead);
  my ($pAnnot, $AnnotPrev, $AnnotNum, $CtAnnot);

  # function parameters
  $class = ref ($this);
  $debug = $LibGlob{switch}{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  ##############################################################################
  # chain through possible types of contig data source

  # source is GAP4 database
  if (-B $ArgSrc) {
    $opt{-timer} and $time = &Sum((times)[0,2]); 
    $pGapSeqStruct = &GapSeqStruct ($ArgSrc, %opt);
    $$pGapSeqStruct{source} = [ {path=>$ArgSrc,type=>'GAP4 DB'} ];
    $opt{-timer} and printf STDERR "%s. CPU time for retrieving GAP4 data (\&GapSeqStruct): %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    if ($opt{-annotation}) {
      $opt{-timer} and $time = &Sum((times)[0,2]); 
      $pGapDataStruct = &GapDataStruct ($ArgSrc, %opt, -nice=>1);
      $opt{-timer} and printf STDERR "%s. CPU time for retrieving GAP4 data (\&GapDataStruct): %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    }
  }

  # source is contig index
  elsif (($ArgSrc=~m/Read.tab/ and -T $ArgSrc and $ArgSrc=$`) or -T ($ArgSrc.'Read.tab')) {
    if ($opt{-annotation}) {
      printf STDERR "%s. ERROR: unable to get primary annotation data from non-GAP4-DB source\n", &MySub;
      return undef;
    }
    $$pGapSeqStruct{source} = [ {path=>$ArgSrc,type=>'contig source index'} ];
    unless ($$pGapSeqStruct{contig} = &PlainToTable ($ArgSrc.'Contig.tab', -TabType=>'AH',
      -ColLabel => ['contig_id','contig_idnum','contig_source','contig_length','read_num'],
      -comments => 1,
      -debug    => $dbg2)) {
      printf STDERR "%s. ERROR: no data in file %s\n", &MySub, $ArgSrc.'Contig.tab';
      return undef;
    }
    unless ($$pGapSeqStruct{read} = &PlainToTable ($ArgSrc.'Read.tab', -TabType=>'AH',
      -ColLabel => ['read_id','contig_source','contig_id','contig_idnum','read_offset','read_length','read_orient','read_true'],
      -comments => 1,
      -debug    => $dbg2)) {
      printf STDERR "%s. ERROR: no data in file %s\n", &MySub, $ArgSrc.'Read.tab';
      return undef;
    }

    # create first-pass data structure
    # loop over contigs
    foreach $pCtg (@{$$pGapSeqStruct{contig}}) {

      # rename fields
      %$tmp = (contig_id=>'id',contig_idnum=>'idnum',contig_source=>undef,contig_length=>'length',read_num=>undef);
      while (@_ = each %$tmp) {
        $_[1] and $$pCtg{$_[1]} = $$pCtg{$_[0]};
        delete $$pCtg{$_[0]};
      }

      # effective offset/end of contig (neglecting foreign reads)
      $$pCtg{read} = { map { ( $_->{read_id} => $_ ) }
        map { $_->{contig} = $pCtg; $_; }
        grep { $_->{contig_id} eq $$pCtg{id} }
        @{$$pGapSeqStruct{read}} };
    }
    # loop over reads
    foreach $pRead (@{$$pGapSeqStruct{read}}) {

      # rename fields
      $$pRead{pos}{'-1'} = $$pRead{read_offset};
      $$pRead{pos}{'1'} = $$pRead{read_offset} + $$pRead{read_length} - 1;
      %$tmp = (read_id=>'id',contig_source=>undef,contig_id=>undef,contig_idnum=>undef,
        read_offset=>undef,read_length=>'length',read_orient=>'orient',read_true=>'trace');
      while (@_ = each %$tmp) {
        $_[1] and $$pRead{$_[1]} = $$pRead{$_[0]};
        delete $$pRead{$_[0]};
      }
    }
    $debug and &DataPrint ($pGapSeqStruct, -handle=>\*STDERR);
  }

  # source is GAP4 showrelationships report
  elsif (-T $ArgSrc) {
    if ($opt{-annotation}) {
      printf STDERR "%s. ERROR: unable to get primary annotation data from non-GAP4-DB source\n", &MySub;
      return undef;
    }
    $pGapSeqStruct = &GapSeqStruct ($ArgSrc, %opt);
    $$pGapSeqStruct{source} = [ {path=>$ArgSrc,type=>'GAP4 show relationships'} ];
  }

  # source is GAP4 directed assembly file tree
  elsif (-d $ArgSrc and -e "$ArgSrc/.isGapDirectedTree") {
    $SrcType = 'GapDirectedTree';
  }

  # undefined contig source type
  else {
    printf STDERR "%s. ERROR: undefined contig set source type\n", &MySub;
    $SrcType = undef;
  }

  # exit SUB
  return $this->{type} = $SrcType;
}


# read/write path of assembly source
#
# INTERFACE
# - return val: type of assembly source
#
sub Path {
  my ($this, %opt) = @_;
  my ($class, $debug, $dbg2, $time);
  my ($ItField, $CtI, $CtJ);
  my ($pRefBase, $pReferee, $pRead);
  my ($pAnnot, $AnnotPrev, $AnnotNum, $CtAnnot);

  # function parameters
  $class = ref ($this);
  $debug = $LibGlob{switch}{-debug};
  $dbg2  = $debug ? $debug-1 : undef;

  ##############################################################################
  # chain through possible types of contig data source

  # source is GAP4 database
  if (-B $ArgSrc) {
    $opt{-timer} and $time = &Sum((times)[0,2]); 
    $pGapSeqStruct = &GapSeqStruct ($ArgSrc, %opt);
    $$pGapSeqStruct{source} = [ {path=>$ArgSrc,type=>'GAP4 DB'} ];
    $opt{-timer} and printf STDERR "%s. CPU time for retrieving GAP4 data (\&GapSeqStruct): %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    if ($opt{-annotation}) {
      $opt{-timer} and $time = &Sum((times)[0,2]); 
      $pGapDataStruct = &GapDataStruct ($ArgSrc, %opt, -nice=>1);
      $opt{-timer} and printf STDERR "%s. CPU time for retrieving GAP4 data (\&GapDataStruct): %.3f s\n", &MySub, &Sum((times)[0,2])-$time;
    }
  }

  # source is contig index
  elsif (($ArgSrc =~ m/Read.tab/ and -T $ArgSrc and $ArgSrc = $`) or -T ($ArgSrc .'Read.tab')) {
    if ($opt{-annotation}) {
      printf STDERR "%s. ERROR: unable to get primary annotation data from non-GAP4-DB source\n", &MySub;
      return undef;
    }
    $$pGapSeqStruct{source} = [ {path=>$ArgSrc,type=>'contig source index'} ];
    unless ($$pGapSeqStruct{contig} = &PlainToTable ($ArgSrc.'Contig.tab', -TabPath=>'AH',
      -ColLabel => ['contig_id','contig_idnum','contig_source','contig_length','read_num'],
      -comments => 1,
      -debug    => $dbg2)) {
      printf STDERR "%s. ERROR: no data in file %s\n", &MySub, $ArgSrc.'Contig.tab';
      return undef;
    }
    unless ($$pGapSeqStruct{read} = &PlainToTable ($ArgSrc.'Read.tab', -TabPath=>'AH',
      -ColLabel => ['read_id','contig_source','contig_id','contig_idnum','read_offset','read_length','read_orient','read_true'],
      -comments => 1,
      -debug    => $dbg2)) {
      printf STDERR "%s. ERROR: no data in file %s\n", &MySub, $ArgSrc.'Read.tab';
      return undef;
    }

    # create first-pass data structure
    # loop over contigs
    foreach $pCtg (@{$$pGapSeqStruct{contig}}) {

      # rename fields
      %$tmp = (contig_id=>'id',contig_idnum=>'idnum',contig_source=>undef,contig_length=>'length',read_num=>undef);
      while (@_ = each %$tmp) {
        $_[1] and $$pCtg{$_[1]} = $$pCtg{$_[0]};
        delete $$pCtg{$_[0]};
      }

      # effective offset/end of contig (neglecting foreign reads)
      $$pCtg{read} = { map { ( $_->{read_id} => $_ ) }
        map { $_->{contig} = $pCtg; $_; }
        grep { $_->{contig_id} eq $$pCtg{id} }
        @{$$pGapSeqStruct{read}} };
    }
    # loop over reads
    foreach $pRead (@{$$pGapSeqStruct{read}}) {

      # rename fields
      $$pRead{pos}{'-1'} = $$pRead{read_offset};
      $$pRead{pos}{'1'} = $$pRead{read_offset} + $$pRead{read_length} - 1;
      %$tmp = (read_id=>'id',contig_source=>undef,contig_id=>undef,contig_idnum=>undef,
        read_offset=>undef,read_length=>'length',read_orient=>'orient',read_true=>'trace');
      while (@_ = each %$tmp) {
        $_[1] and $$pRead{$_[1]} = $$pRead{$_[0]};
        delete $$pRead{$_[0]};
      }
    }
#    &DataPrint ($pGapSeqStruct);
  }

  # source is GAP4 showrelationships report
  elsif (-T $ArgSrc) {
    if ($opt{-annotation}) {
      printf STDERR "%s. ERROR: unable to get primary annotation data from non-GAP4-DB source\n", &MySub;
      return undef;
    }
    $pGapSeqStruct = &GapSeqStruct ($ArgSrc, %opt);
    $$pGapSeqStruct{source} = [ {path=>$ArgSrc,type=>'GAP4 show relationships'} ];
  }

  # source is GAP4 directed assembly file tree
  elsif (-d $ArgSrc and -e "$ArgSrc/.isGapDirectedTree") {
    $SrcPath = 'GapDirectedTree';
  }

  # undefined contig source type
  else {
    printf STDERR "%s. ERROR: undefined contig set source type\n", &MySub;
    $SrcPath = undef;
  }

  # exit SUB
  return $this->{type} = $SrcPath;
}


# library-wide switch control
$_LibParam{support} = {
  GAP4  => { topology=>1 },
  index => { topology=>1 },
  };


# test functionality support of assembly source
#
# INTERFACE
# - argument 1: path of assembly source
# - return val: - object reference
#               - undef if an error occurred
#
sub Supports {
  my ($this, $ArgFunc) = @_;

  # exit SUB successfully
  return $_LibParam{support}{$this->Type()}{$ArgFunc};
}


1;
# $Id: AssemblySrc.pm,v 1.13 2018/06/05 18:02:56 szafrans Exp $
