################################################################################
#
#  kPerl Core Library Group
#  Library for Data Structure Management and I/O
#
#  copyright (c)
#    Karol Szafranski, 2005
#    UPenn Philadelphia, Center for Bioinformatics, 2004
#    Institute of Molecular Biotechnology Jena, Dept. Genome Analysis, 1999-2004
#  author
#    Karol Szafranski, karol.szafranski@leibniz-fli.de
#
################################################################################
#
#  DESCRIPTION
#
# - &DataPrint / &DataRead concept:
#
#   aims:
#   - output nearly all kind of Perl data, including nested data structures
#     -> &DataPrint.
#   - reload &DataRead's output and restore the data
#     -> &DataRead.
#   - In contrast to Data::Dumper which serves quite the same purpose, the
#     output shall have a most human-readable form, reflecting the data to
#     minds that know nothing about Perl.
#   capabilities:
#   - I/O of data sub-structures includes HASH, ARRAY, as well as OO data.
#     Objects will be restored by &DataRead, given that dynamic loading of
#     the required modules is possible via
#       eval "require $package"
#     Otherwise, you'll have to include the required modules prior to calling
#     &DataRead.
#   - Certain sections of the plain representation (output of &DataPrint) can
#     be re-loaded (via &DataRead) to the corresponding data substructures.
#     There're no special manipulations required to perform this. The
#     filehandle given to &DataRead just has to point to the beginning of the
#     desired substructure. After return from &DataRead the filehandle will
#     point to the end of the substructure. An example can be found in
#     &SeqLab::SeqFormat::SeqentryPopStruct where this technique is used for
#     stream-fashioned reading of single hash substructures from an array of
#     hashes.
#   - Large strings containing line breaks are correctly restored from
#     &DataPrint output using &DataRead.
#   - Data sub-structures that are cross-linked in a cyclic fashion (i.e.
#     cyclic graphs) are written and recovered without causing endless loops or
#     loss of information.
#   limits:
#   - tied data (objects) are lost in output via &DataPrint
#   - hash keys cannot contain whitespace characters
#   - scalar data (hash values and array elements) cannot contain '{' as the
#     final character.
#   - we cannot dump object sub-structures that have an overloaded stringify
#     operator. This is because we rely on the expression "$pSubData" to get the
#     reference type and physical address of the object.
#
# - functions beginning with a letter are exported, those with a leading
#   underscore are not. Those are meant for internal use only. Variables with
#   a leading underscore are declared with "my", therefore inaccessible from
#   outside the package.
#
# - some words about "volatile" input filehandles. These are, for example,
#   filehandles corresponding to an input pipe.
#   ...
#
# - Each function has a comment header that describes at least the calling
#   interface, and possibly more.
#
################################################################################
#
#  FUNCTIONS, DATA
#
#   @EXPORT
#   %_LibParam
#
# - I/O of plain format
#   &DataRead
#   &_DataReadArray
#   $_LibParam{PerlAddr}
#   $_LibParam{ParseSub}
#   &_DataReadRef
#   &_DataReadPlain
#   $_LibParam{str2val}
#   &_DataReadValue
#   &DataPrint
#   &DataPrintValue
#
# - I/O of XML format
#   &DataFromXML
#   &DataXmlFind
#   &DataXmlFlat
#
# - handling and manipulation
#   &unique
#   &ListMaxfirst
#   &DataClone
#   &DataTreeSlc
#   &DataTreeDiff
#   &DataDecross
#
#
#  STD OPTIONS
#
#   -debug      print debug protocol to STDERR
#
################################################################################
#
#  DEBUG, CHANGES, ADDITIONS
#
# - A current limitation of the concept is that scalar data (hash values and
#   array elements) cannot contain '{' as the final character. It would be
#   easy to recognize these "bad" values and to output them as a PLAINxxx{ }
#   substructure, with the ability to restore them identically on re-reading by
#   &DataRead.
#
# - look also for notes in the header of each function block
#
################################################################################

package MainLib::Data;

# includes
use strict; #use warnings;  # OK 20110626
use FileHandle;
use MainLib::File qw(&ReadFile);
use MainLib::Misc qw(&MySub);

# symbol export
use Exporter qw(import);
our @EXPORT = qw (
  &DataRead &DataPrint
  &DataFromXML &DataXmlFind &DataXmlFlat
  &unique &ListMaxfirst &DataClone &DataTreeSlc &DataTreeDiff &DataDecross
  );

# package-wide constants and variables
my %_LibParam;


################################################################################
# I/O of plain format
################################################################################


# parse data structure from file / plain text
#
# INTERFACE
# - argument 1: any argument type that works with &FileHandleCreate [planned,
#               all inline so far]
#               - file argument, or piped call
#               - string "-" referring to filehandle STDIN
#               - filehandle reference (GLOB, FileHandle).
#               - reference to array of plain lines
#                 The referenced data will be left unchanged
#               - reference to plain text to be parsed
#                 This feature is still used (e.g. in Align.pl)
#
# - options:
#   -debug      [STD]
#  (-RefDef     index of references, for cross-linking
#  (-RefMiss    index of missing references
#  (-sub        current data structure sublevel
#               the uppermost, un-bracketed data level will be regarded as
#                 level #0
#               note: subroutine modifications will not affect local %opt
#
# - return val: - wantscalar:
#                 - reference to data structure
#               - wantarray: array of:
#                 - reference to data structure
#                 - file handle reference for remaining input
#               - undef if an error occurs (mixture of hash/array type)
#
# DESCRIPTION
# - limitations:
#   - hash keys that originally contain whitespaces won't be parsed correctly.
#     Actually, the plain data format, in it's current definition, whitespaces
#     are used to mark the data field borders.
#     For the same reason, the first entry of an array (element 0) cannot
#     contain whitespaces. Scalar values with leading whitespaces, either hash
#     values or array elements, cannot be recovered correctly.
#   - scalar data (hash keys and array elements) cannot contain '{' as the
#     final character.
# - If arg1 is an existing handle, no close() will be done after the procedure.
#   The handle will point to the file position where parsing has ended.
#
# DEVELOPER'S NOTES
# - This procedure needs about 90 us computation time per line plain format
#   (including I/O) - machine "hume"=UltraSparc-II at 20040308.
#
sub DataRead {
  my ($ArgIn,%opt) = @_;
  my $debug = $opt{-debug};
  $opt{-RefDef} ||= {};
  $opt{-RefMiss} ||= {};
  $opt{-sub} ||= 0;

  # set up input source
  my $hIn;
  if      (ref($ArgIn) eq 'ARRAY') {
    require FileHandle::Unget;
    $hIn = FileHandle::Unget->new();
    foreach (reverse (@$ArgIn)) { $hIn->ungets($_) }
      # this code also works with both FileHandle::InBuffer and
      # FileHandle::Unget. Otherwise, it would be simpler to write
      # `$hIn->ungets(@$ArgIn);'
  } elsif (ref($ArgIn) eq 'SCALAR') {
    require FileHandle::Unget;
    $hIn = FileHandle::Unget->new();
    $hIn->ungets($$ArgIn);
  } elsif (ref($ArgIn) =~ m/\b(GLOB|FileHandle)/) {
    $hIn = $ArgIn;
  } elsif (! ref($ArgIn)) {
    if ($ArgIn eq '-') {
      $hIn = FileHandle->new_from_fd('STDIN','r');
    } else {
      unless ($hIn = FileHandle->new($ArgIn,'r')) {
        printf STDERR "%s. ERROR while trying to open file/process %s\n", &MySub, $ArgIn||"''";
        return undef;
      }
    }
  } else {
    printf STDERR "%s. ERROR: unknown argument type (%s reference)\n", &MySub, ref($ArgIn);
    return undef;
  }
  # up from here, $hIn is either a filehandle GLOB reference, or an input
  # FileHandle object

  # loop over input lines
  my ($pData,$bClosed);
  while (defined (my $line=<$hIn>)) {
    chomp $line;
#    $debug and printf STDERR "%s. parsing line: `%s'\n", &MySub, $line;
    if (0) { }

    # end of current data sub-structure (must be HASH because ARRAY would be parsed
    #   elsewhere)
    # return data reference and remaining input
    elsif ($line =~ m/^\s*\}$/) {
#      $debug and printf STDERR "%s. closing HASH data structure, sublevel %d, input line %d\n",
#        &MySub, $opt{-sub}, $.;
      $bClosed = 1; last;
    }

    # data sub-structure => determine data type, parse via recursion
    elsif ($line =~ m/(\S+)?\{$/) {
      my $key = $`;
      my $pRef = &_DataReadRef($1) or return undef;
        # error messaging was done in &_DataReadRef

      # validate consistency of parent data node (ARRAY/HASH)
      my $ArchCurr;
      if ($key=~m/\S+/) { $key=$&; $ArchCurr='HASH'; }
      else { undef $key; $ArchCurr='ARRAY'; }
      if ($ArchCurr eq 'ARRAY') {
        if ($pData) {
          printf STDERR "%s. ERROR: change from architecture hash (type %s) to array (type %s), input line %d\n", &MySub,
            ref($pData), $$pRef{type}, $.;
          next;
        }
        if (ref($hIn) !~ m/\bFileHandle::(InBuffer|Unget)/) {
#          $debug and printf STDERR "%s. changing handle from %s to FileHandle::Unget\n", &MySub,
#            ref($hIn)||"''";
          require FileHandle::Unget;
          $hIn = FileHandle::Unget->new($hIn);
        }
        $hIn->ungets("$line\n");
#        $debug and printf STDERR "%s. propagating to &_DataReadArray, sub-structure %s (architecture %s)\n", &MySub,
#          $$pRef{type}, $$pRef{arch};
        $pData = &_DataReadArray ($hIn, %opt);
        $bClosed = 1; last;
      }

      # parse data sub-structure
#      $debug and printf STDERR "%s. parsing sub-structure %s (architecture %s) recursively\n", &MySub,
#        $$pRef{type}, $$pRef{arch};
      ${$pData||={}}{$key} = '__init_ref__';
      $$pData{$key} = &{$$pRef{pfunc}} ($hIn, (grep{$_}$$pRef{psize}),
        %opt, -sub=>$opt{-sub}+1);

      # bless reference to object
      if ($$pRef{type} ne $$pRef{arch}) {
        if (eval "require $$pRef{type}") {
          bless ($$pData{$key}, $$pRef{type});
        } else {
          printf STDERR "%s. WARNING: unable to load package %s at runtime, leaving it type %s\n", &MySub,
            $$pRef{type}, $$pRef{arch};
        }
      }

      # register reference
      if ($$pRef{addr}) { $opt{-RefDef}{$$pRef{addr}} = $$pData{$key}; }
    }

    # hash entry
    # - the value is allowed to have trailing whitespaces
    elsif ($line =~ m/^\s*(\S+)\s+(\S.*)$/) {
      my ($key,$val) = ($1,$2);

      # resolve own reference label
      if ($key eq '__my_ref__') {
        my $pRef = &_DataReadRef($val);
        if ($$pRef{addr}) {
          $opt{-RefDef}{$$pRef{addr}} = $pData ||= {};
        }
        if ($$pRef{type} ne $$pRef{arch}) {
          if (eval "require $$pRef{type}") {
            bless (($pData||={}), $$pRef{type});
          } else {
            printf STDERR "%s. WARNING: unable to load package %s at runtime, leaving it type %s\n", &MySub,
              $$pRef{type}, $$pRef{arch};
          }
        }
      }
      # enter value to current hash
      # - resolve magic values (yielding scalars)
      # - resolve cross-references
      else {
        $val = &_DataReadValue ($val);
        if (ref (my $pRef=$val)) {
          $$pData{$key} = "__missed_ref__:$$pRef{addr}";
          if ($$pRef{addr}) {
            push @{ $opt{-RefMiss}{$$pRef{addr}}||=[] }, \${$pData}{$key};
              # syntax "\{ $$pData{$key} }" would not yield a reference to the HASH entry
          }
        } else {
          $$pData{$key} = $val;
        }
      }
    }

    # (first) array entry in root data structure
    # - the value is allowed to have trailing whitespaces
    # => proceed recursion in &_DataReadArray
    elsif (!$pData and $line =~ m/^\s*(\S+\s*)$/) {
      if (ref($hIn) !~ m/\bFileHandle::(InBuffer|Unget)/) {
#        $debug and printf STDERR "%s. changing handle from %s to FileHandle::Unget\n", &MySub,
#          ref($hIn)||"''";
        require FileHandle::Unget;
        $hIn = FileHandle::Unget->new($hIn);
      }
      $hIn->ungets("$line\n");
#      $debug and printf STDERR "%s. propagating to &_DataReadArray\n", &MySub;
      $pData = &_DataReadArray ($hIn, %opt);
      $bClosed = 1; last;
    }

    # no match, but non-empty line?
    elsif ($line =~ m/\S/) {
      printf STDERR "%s. no regexp matching in non-empty line %d, probably ERROR\n", &MySub, $.;
    }
  }
  $pData ||= $bClosed ? {} : undef;

  # define previously missing references
  foreach my $RefMissKey (keys %{$opt{-RefMiss}}) {
    if (exists $opt{-RefDef}{$RefMissKey}) {
      foreach my $RefMissVal (@{$opt{-RefMiss}{$RefMissKey}}) {
        $$RefMissVal = $opt{-RefDef}{$RefMissKey};
      }
      delete $opt{-RefMiss}{$RefMissKey};
    }
  }
  if (!$opt{-sub} and int(keys %{$opt{-RefMiss}})
    and $_=(caller(1))[3] and (split(/::/))[-1] ne 'DataRead'
  ) {
    printf STDERR "%s. ERROR: still missing references, sublevel %d\n", &MySub, $opt{-sub};
    print  STDERR map {"  $_\n"} keys %{$opt{-RefMiss}};
  }

  # input source exhausted
  if ($opt{-sub} and !$bClosed) {
    printf STDERR "%s. ERROR: premature end of file, sublevel %d\n", &MySub, $opt{-sub};
  }
  if ($debug and eof($hIn)) {
    printf STDERR "%s. end of input, type %s, sublevel %d\n", &MySub,
      ref($pData), $opt{-sub};
  }
  return wantarray ? ($pData,$hIn) : $pData;
}


# parse array data structure from file / plain text
#
# INTERFACE
# - argument 1: filehandle reference
# - options:
#   ...         all of &DataRead
# - return val: - reference to data structure
#               - undef if an error occurs (mixture of hash/array type)
#
sub _DataReadArray {
  my ($hIn, %opt) = @_;
  my $debug = $opt{-debug};

  # loop over input lines
  my $pData = [];
  my $bClosed;
  while (defined (my $line=<$hIn>)) {
    chomp $line;
#    $debug and printf STDERR "%s. parsing line: `%s'\n", &MySub, $line;
    if (0) { }

    # end of current data sub-structure
    # return data reference and remaining input
    elsif ($line =~ m/^\s*\}$/) {
#      $debug and printf STDERR "%s. closing ARRAY data structure, sublevel %d\n",
#        &MySub, $opt{-sub};
      $bClosed = 1; last;
    }

    # data sub-structure => determine data type, parse via recursion
    elsif ($line =~ m/^\s*(\S+)?\{$/) {
      my $pRef = &_DataReadRef($1) or return undef;
        # error messaging was done in &_DataReadRef

      # parse data sub-structure
#      $debug and printf STDERR "%s. parsing sub-structure %s (architecture %s) recursively\n", &MySub,
#        $$pRef{type}, $$pRef{arch};
      $$pData[int @$pData] = &{$$pRef{pfunc}} ($hIn, (grep{$_}$$pRef{psize}),
        %opt, -sub=>$opt{-sub}+1);

      # bless reference to object
      if ($$pRef{type} ne $$pRef{arch}) {
        if (eval "require $$pRef{type}") {
          bless ($$pData[-1], $$pRef{type});
        } else {
          printf STDERR "%s. WARNING: unable to load package %s at runtime, left %s\n", &MySub,
            $$pRef{type}, $$pRef{arch};
        }
      }

      # register reference
      if ($$pRef{addr}) { $opt{-RefDef}{$$pRef{addr}} = $$pData[-1] }
    }

    # array entry
    elsif ($line =~ m/^\s*(.+)$/) {
      my $val = $1;

      # resolve magic values, enter value to current hash
      push @$pData, (my $pRef = &_DataReadValue($val));
        # references are turned to a &_DataReadRef return structure

      # process references, possible:
      # __my_ref__
      # __prev_ref__
      if (ref ($pRef)) {
        if ($val=~m/^__my_ref__/) {
          # A meaningful "__my_ref__" entry needs to be the first one in the ARRAY.
          # It's removed from the data ARRAY by pop().
          if (!int(@$pData) and $$pRef{addr}) {
            $opt{-RefDef}{$$pRef{addr}} = $pData;
          }
          pop @$pData;
        } else {
          push @{ $opt{-RefMiss}{$$pRef{addr}}||=[] }, \${$pData}[-1];
            # syntax "\{ $$pData[-1] }" would not yield a reference to the ARRAY entry
        }
      }
    }

    # no match, but non-empty line?
    elsif ($line =~ m/\S/) {
      printf STDERR "%s. no regexp matching in non-empty line %d, probably ERROR:\n", &MySub, $.;
    }
  }

  # input source exhausted
  if ($opt{-sub} and !$bClosed) {
    printf STDERR "%s. ERROR: premature end of file, sublevel %d\n", &MySub, $opt{-sub};
  }
  if ($debug and eof($hIn)) {
    printf STDERR "%s. end of input, type %s, sublevel %d\n", &MySub,
      ref($pData), $opt{-sub};
  }
  return $pData;
}


$_LibParam{PerlAddr} = '\(0x[0-9a-fA-F]{6,8}\)';

$_LibParam{ParseSub} = {
  ''    => \&DataRead,
  ARRAY => \&_DataReadArray,
  HASH  => \&DataRead,
  PLAIN => \&_DataReadPlain,
  };

# parse stringified reference from plain data structure format
#
# INTERFACE
# - argument 1: stringified reference, as found in &DataRead input
# - options:
#   -debug      [STD]
# - return val: - reference to hash of reference properties
#                  arch    data architecture type
#                  type    data type, may be data architecture type, but
#                          differs for blessed data
#                  pfunc   appropriate &DataRead* parsing function
#                  addr    data address, whithout framing brackets
#                  psize   string size for data type PLAIN
#               - undef if an error occurs
#
# DESCRIPTION
# - this function not only parses a reference statment into an attribute data
#   structure but it loads also encountered OO packages via require.
#
sub _DataReadRef {
  my ($sRef,%opt) = @_;
  $sRef ||= '';
  my $debug = $opt{-debug};

  my ($RefType,$RefAddr,$RefArch);

  # match data type 'PLAIN' (internal scalar subcategory)
  if ($sRef =~ m/^(PLAIN)(\d+)$/) {
    return {
      arch  => $1,
      type  => $1,
      pfunc => $_LibParam{ParseSub}{$1},
      psize => $2,
      };
  }

  # match data type and address
  # - the calling function guarantees the non-existence of whitespace characters
  elsif ($sRef =~ m/^([^\(\)]*)(\(\w+\))?$/) {
    $RefType = $1 || 'HASH';
    $RefAddr = $2 || '';

    # data address (=> extract, validate)
    if ($RefAddr =~ m/^$_LibParam{PerlAddr}$/o) {
      $RefAddr =~ s/^\W?(\w+)\W?$/$1/;
    } else {
      undef $RefAddr;
    }

    # one of Perl's standard data types
    if ($_LibParam{ParseSub}{$RefType}) {
      return {
        arch  => $RefType,
        type  => $RefType,
        pfunc => $_LibParam{ParseSub}{$RefType},
        addr  => $RefAddr,
        };
    }

    # custom data type
    $RefType =~ m/^([^ \t=]+)(?:\=(ARRAY|HASH))?$/;
    if ($2) {
      $RefType = $1;
      $RefArch = $2;
    } else {
      # the format of data references seems to be specific for certain Perl
      # versions
      $debug and printf STDERR "%s. WARNING: reference type %s does not split into type/architecture\n", &MySub,
        $RefType;
#      #######################################################################
#      # do we really need this, or are we just producing a work-around for a
#      # bug with Math::Range? See bug archive 20040910*.
#      #######################################################################
#      # load package and determine architecture via $RefType->isa()
#      if (eval "require $RefType") {
#        if (eval "$RefType->isa('HASH')") { $RefArch='HASH'; }
#        if (eval "$RefType->isa('ARRAY')") { $RefArch='ARRAY'; }
#        printf STDERR "%s. WARNING: unable to determine architecture via isa(), package %s\n", &MySub,
#          $RefType;
#      } else {
#        printf STDERR "%s. WARNING: unable to load package %s at runtime\n", &MySub,
#          $RefType;
#      }
#      # default (implicit): $RefArch = 'HASH';
    }
    return {
      arch   => $RefArch,
      type   => $RefType,
      addr   => $RefAddr,
      pfunc  => $_LibParam{ParseSub}{$RefArch},
      };
  }

  # match data type and address
  else {
    printf STDERR "%s. ERROR: regexp matching failed, string %s\n", &MySub, $sRef;
    return undef;
  }
}


# parse text field in data structure from file / plain text
#
# INTERFACE
# - argument 1: filehandle reference
# - argument 2: string length
# - options:
#   ...         all of &DataRead
# - return val: - string
#               - undef if an error occurs
#
sub _DataReadPlain {
  my ($hIn, $len, %opt) = @_;
  my ($txt);

  # input source is GLOB reference or FileHandle object
  # read $len characters
  if (ref($hIn) =~ m/^(GLOB|FileHandle)/) {
    read $hIn, $txt, $len;
  }
  # unknown input argument
  else {
    printf STDERR "%s. ERROR: cannot work on input handle type %s\n", &MySub, ref($hIn)||"''";
    exit 1;
  }

  # read until end of block (find closing brackets)
  while (<$hIn>) { m/\}\s*$/ and last; }

  # successfull return
  return $txt;
}


$_LibParam{str2val} = {
  __undef__ => undef,
  __empty_string__ => '',
  __my_ref__   => sub { return ($_[0]=~m/:/) ? &_DataReadRef($'):''; },
  __prev_ref__ => sub { return ($_[0]=~m/:/) ? &_DataReadRef($'):''; },
  };

# process plain value in data structure context
#
# INTERFACE
# - argument 1: value string
# - options:
#   ...         all of &DataRead
# - return val: translated value
#
sub _DataReadValue {
  my ($sVal, %opt) = @_;
  my ($val);

  # evaluate string
  if ($sVal=~m/^__\w+__/ and exists($_LibParam{str2val}{$&})) {
    $val = $_LibParam{str2val}{$&};
  } else {
    return $sVal;
  }

  # string needs to be processed
  if (ref ($val)) {
    return &$val ($sVal);
  }
  # string evaluates to SCALAR
  else {
    return $val;
  }
}


# print data structure in plain format
#
# INTERFACE
# - argument 1: reference to data structure
#
# - options:
#   -debug      [STD]
#  (-had        (only for internal use:) reference to hash of data
#               sub-structures that have already been output.
#   -handle     output filehandle, default STDOUT
#   -NoAddr     do not output reference addresses, omit labels for hash
#               references. This option takes effect in &DataPrintValue. It
#               implies -NoMyRef=>1 without the possibility to interfere.
#   -NoMyRef    do not output the top data node reference (address)
#   -space      number of indent space characters at the beginning of the lines
#               in the current recursion depth.
#   -SpaceChar  indent character for next data sub-level, default: space=` '.
#               Indent character TAB (= "\t") implies -SpaceIncr=>1 as the
#               default setting, but this may be overridden by explicitly
#               supplying a value for that option. The argument value of
#               -SpaceChar is acutally not tested for being a single character
#               as the whole function will work with any string given.
#   -SpaceIncr  indent increment for next data sub-level, default: 2
#               It takes effect on recursion via sub-function &DataPrintValue,
#               and possibly &DataPrint again.
#
# DESCRIPTION
# - This function will output a plain, man-readable format of the data structure
#   that fully represents the original data and can be used for file-based
#   mirroring of the data structure.
# - An endless loops due to cross-linking of data sub-structures is avoided
#   by memorizing each data sub-structure that has been output. This strategy
#   will also prevent multiple output of multiply referenced leafs of the data
#   structure - possibly paranoid. Parsing via function &DataRead will restore
#   the cross-linked data sub-structures as multiply linked, but physically
#   unique nodes, just as it was before.
# - For OO packages that are meant to be output by &DataPrint that use package
#   MainLib::Data themselves (via `use' or `require') and, be sure to exclude
#   import of this same function as it would cause an endless recursion.
# - An OO package may "overload" this function by defining a method of the same
#   name. This way it's possible to implement dynamic "freezing" of the object
#   prior to output. The object can then propagate a non-blessed copy to
#   &DataPrint for doing the actual output. It's recommended that the object
#   method DataPrint() grabs all arguments as %opt and propagates the option
#   hash to the downstream call of &DataPrint. However, it may be necessary to
#   delete option `-had' which represents the internal reference registry of
#   &DataPrint. If the object uses temporary copies of itself to be output,
#   then data substructure reference addresses may occur multiple times in the
#   (now virtual) overall data tree although the nodes are actually not
#   repeated.
# - limits:
#   - Hash keys containing the space/tab character or any control codes won't
#     be re-parsed properly since the space character is the field delimiter.
#     The same problems occur with keys being undef or empty string.
#   - String values ending with "{" or "}" character will cause errors on
#     re-parsing.
#
# DESCRIPTION
# - OO package like MemHandle allows to direct the output from this handle-
#   oriented function to a string.
#
# DEVELOPER'S NOTES
# - This procedure needs about 65 us computation time per line plain format
#   (including I/O) - machine UltraSparc-II ("hume") at 20040308.
# - Sorting of hash entries causes about 9 % of computation time, in this
#   function.
#
sub DataPrint {
# behaves like a &DataPrintStructcontent
  my ($pData,%opt) = @_;
  my $debug = $opt{-debug};
  my $hOut = $opt{-handle} || \*STDOUT;
  my $iSpace = $opt{-space} ||= 0;
  my $sSpace = length($opt{-SpaceChar}||'') ? $opt{-SpaceChar}:' ';
  $opt{-SpaceIncr} ||= ($sSpace eq "\t") ? 1 : 2;

  # initialize dictionary of data sub-structure references
  my ($bFirst);
  if (! exists($opt{-had})) {
    $bFirst = 1;
    $opt{-had} = { "$pData"=>1 };
  }

  # chain according to type of referenced data
  my $DataType=ref($pData);
  if ($DataType and $DataType ne 'CODE' and $DataType ne 'REF' and $DataType ne 'SCALAR') {
  DataPrintOutref: {

    # array
    if ($DataType eq 'ARRAY') {
      # output root reference at the very beginning
      if ($bFirst and !$opt{-NoMyRef} and !$opt{-NoAddr}) {
        print  $hOut ($sSpace) x $iSpace, '__my_ref__:', "$pData", "\n";
      }
      # loop over array entries
      foreach my $e (@$pData) {
        print  $hOut ($sSpace) x $iSpace;
        &DataPrintValue ($e,%opt);
      }
    }

    # hash-like data sub-structure (hopefully)
    elsif ($DataType eq 'HASH') {
      # output root reference at the very beginning
      if ($bFirst and !$opt{-NoMyRef} and !$opt{-NoAddr}) {
        print  $hOut ($sSpace) x $iSpace, '__my_ref__', '  ', "$pData", "\n";
      }
      # loop over sorted hash entries
      foreach my $k (sort{ lc($a) cmp lc($b) } keys %$pData) {
        unless (length $k) {
          printf STDERR "%s. ERROR: empty hash key would cause errors, skipped\n", &MySub;
          printf STDERR "  value \"%s\"\n", $pData->{''};
          next;
        }
        print  $hOut ($sSpace) x $iSpace, $k, '  ';
        &DataPrintValue ($$pData{$k},%opt);
      }
    }

    else {
      if ($pData->isa('ARRAY')) { $DataType = 'ARRAY'; redo DataPrintOutref; }
      elsif ($pData->isa('HASH')) { $DataType = 'HASH'; redo DataPrintOutref; }
      # unknown referenced data type => treat like scalar
      else {
        printf STDERR "%s. WARNING: don't know how to handle data type %s\n", &MySub, ref($pData);
        last DataPrintOutref;
      }
    }

    # exit SUB
    return;
  } } # end "DataPrintOutref" ; end if($DataType=...)

  # data is a SCALAR or is printed as a SCALAR
  # - Normally, we never get here. Typically, scalar output is done in
  #   &DataPrintValue.
  # - CODE and SCALAR refs go here
  print  $hOut ($sSpace) x $iSpace;
  &DataPrintValue ("$pData",%opt);
}


# print data structure in plain format - subfunction for value
#
# INTERFACE
# - argument 1: reference to data structure
# - options:
#   ...         [all of &DataPrint]
#
# DEVELOPER'S NOTES
# - This function has an interface to OO data. If available, it calls
#   $pObj->DataPrint(%opt). Note that the interface method "DataPrint()" is not
#   semantically equivalent to function "&MainLib::Data::DataPrint", rather
#   to function "&MainLib::Data::DataPrintValue".
#
sub DataPrintValue {
  my ($DataVal,%opt) = @_;
  my $debug = $opt{-debug};
  my $hOut = $opt{-handle} || \*STDOUT;
  my $iSpace = $opt{-space} ||= 0;
  my $sSpace = length($opt{-SpaceChar}||'') ? $opt{-SpaceChar}:' ';
  # $opt{-had} was initialized in &DataPrint
  # $opt{-SpaceIncr} was initialized in &DataPrint

  # chain according to type of referenced data
  # - the data type determines the output format
  my $DataType=ref($DataVal);
  if ($DataType) {

    # recurring data sub-structure in cross-linked graph
    # => treat like scalar
    if ($opt{-had}{$DataVal}) {
      $DataVal = "__prev_ref__:$DataVal";
    }

    elsif ($DataType ne 'CODE' and $DataType ne 'REF' and $DataType ne 'SCALAR') {
    $opt{-had}{$DataVal} = 1;
    DataPrintValOutref: {

      # array sub-structure
      if ($DataType eq 'ARRAY') {
        printf $hOut ("%s{\n", $opt{-NoAddr}?ref($DataVal):$DataVal);
        &DataPrint ($DataVal, %opt, -space=>$iSpace+$opt{-SpaceIncr});
        print  $hOut ($sSpace) x $iSpace, "}\n";
      }

      # hash-like data sub-structure (hopefully)
      # print framework, output sub-structure via &DataPrint
      elsif ($DataType eq 'HASH') {
        printf $hOut ("%s{\n", $opt{-NoAddr}?'':$DataVal);
        &DataPrint ($DataVal, %opt, -space=>$iSpace+$opt{-SpaceIncr});
        print  $hOut ($sSpace) x $iSpace, "}\n";
      }

      # data sub-structure is an object => output depends on data architecture
      else {
        if (0) { }
        elsif ($DataVal->can('DataPrint')) {
          $debug and printf STDERR "%s. calling object's method DataPrint()\n", &MySub;
          $DataVal->DataPrint(%opt);
        }
        elsif ($DataVal->isa('ARRAY')) { $DataType = 'ARRAY'; redo DataPrintValOutref; }
        elsif ($DataVal->isa('HASH')) { $DataType = 'HASH'; redo DataPrintValOutref; }
        # unknown referenced data type => treat like scalar
        else {
          printf STDERR "%s. WARNING: don't know how to handle data type %s\n", &MySub, $DataType;
          last DataPrintValOutref;
        }
      }

      # exit doors I+II (sub-object, sub-structure)
      return;
    } } # end "DataPrintValOutref"
  }

  # entry is a SCALAR or is printed as a SCALAR (passed previous block)
  # - CODE, REF, and SCALAR refs go here
  if (defined($DataVal) and ($DataVal=~m/^\s/ or $DataVal=~m/\n/)) {
    print  $hOut 'PLAIN', length($DataVal), "{\n", $DataVal, "\n";
    print  $hOut ($sSpace) x $iSpace, "}\n";
  } else {
    if (! defined($DataVal)) { $DataVal = '__undef__'; }
    elsif (! length($DataVal)) { $DataVal = '__empty_string__'; }
    printf $hOut "%s\n", $DataVal;
  }

  # exit door III (scalar)
  return;
}


################################################################################
# I/O of XML format
################################################################################


# parse data structure from XML
#
# INTERFACE
# - argument 1: - file argument (may be system call)
#               - filehandle reference
#
# - options:
#   -debug      [STD]
#
# - return val: - reference to data structure
#               - undef if an error occurs
#
# DESCRIPTION
# - Currently, overlapping tags are supported so far that they have to be
#   strictly nested. That means they have to follow a strict tree organization.
#
sub DataFromXML {
  my ($InputArg, %opt) = @_;
  my $debug = $opt{-debug};

  # read source
  my $doc = &ReadFile ($InputArg);

  # initialize data structure
  my $pField = my $pRoot = {
    label => 'DataFromXmlRoot',
    pos   => pos($doc),
    value => undef,
    };

  # loop over tags
  my @buffer;
  while ($doc =~ m|<(/)?(\w+)|g) {

    # close current field
    if (length ($1)) {
      # beware nested tags!
      if ($$pField{label} ne $2) {
        printf STDERR "%s. ERROR: nested tag types at pos. %d, types %s vs. %s\n", &MySub,
          pos($doc), $$pField{label}, $2;
        next;
      }

      # enter plain field
      if (! defined $$pField{value}) {
        $$pField{value} = substr ($doc,
          $$pField{pos}, pos($doc)-length($&)-$$pField{pos});
      }

      # close array of fields
      else {
        # nothing to do here!
      }

      # turn back on tag tree
      push @{${$$pField{root}}}, $pField;
      delete $$pField{root};
      !$debug and delete $$pField{pos};
      unless (@buffer) {
        printf STDERR "%s. stack ERROR\n", &MySub;
        return $pRoot;
      }
      $pField = pop @buffer;
    }

    # open new field
    else {
      $$pField{value} ||= [];
      push @buffer, $pField;
      $pField = {
        root  => \($$pField{value}),
        label => $2,
        value => undef,
        };
      $doc =~ m|>|g;
      $$pField{pos} = pos ($doc);
    }
  }

  # input source exhausted
  return $pRoot;
}


# parse data structure from XML
#
# INTERFACE
# - argument 1:  - reference to XML data structure
#
# - options:
#   -debug       [STD]
#   -LevelNum    current tree level, cmp. switch -LimitNum
#   -LimitFound  don't continue recursion if the current entry proofs positive
#   -LimitNum    don't continue recursion beyond specified tree level
#
# - return val:  - reference to array of XML data substructures
#                - undef if an error occurs
#
sub DataXmlFind {
  my ($pData, $pTest, %opt) = @_;
  my $debug = $opt{-debug};
  unless (ref($pData) eq 'HASH') { return undef }
  unless (ref($pTest) eq 'CODE') { return undef }

  # test current entry
  my @win;
  if (&$pTest ($pData)) {
    @win = ($pData);
    $debug and printf STDERR "%s. matching item on level %d\n", &MySub, $opt{-LevelNum};
  }

  # continue recursion
  if (exists($$pData{value}) and ref($$pData{value}) eq 'ARRAY' and (
        ! $opt{-LimitNum} or
        ($opt{-LimitNum} and $opt{-LevelNum} < $opt{-LimitNum}) or
        ! $opt{-LimitFound} or
        ($opt{-LimitFound} and ! int (@win))
  )) {
    $opt{-LevelNum} ++;
    $debug and printf STDERR "%s. continuing recursion to level %d\n", &MySub, $opt{-LevelNum};
    foreach my $pDataSub (@{$$pData{value}}) {
      push @win, @{ &DataXmlFind ($pDataSub, $pTest, %opt) || [] };
    }
  }

  return \@win;
}


# flatten XML data structure to nested arrays of field labels
#
# INTERFACE
# - argument 1: reference to XML data structure
#
# - options:
#   -debug      [STD]
#
# - return val: reference to data structure
#
sub DataXmlFlat {
  my ($pData,%opt) = @_;
  my $debug = $opt{-debug};

  # end node tag
  if (! ref($$pData{value})) {
    return [ [ $$pData{label} ] ];
  }

  # loop over contained tags
  my $pFlat;
  foreach my $pEntry (@{$$pData{value}}) {
    push @$pFlat, @{ &DataXmlFlat ($pEntry) || [] };
  }

  # exit SUB
  return [ [ $$pData{label}, $pFlat ] ];
}


################################################################################
# handling and manipulation
################################################################################


# return unique entries of an array
#
# INTERFACE
# - argument 1*: array
# - return val:  array containing unique entries
#
# DESCRIPTION
# - the order of the entries will be preserved
#
sub unique {
  my (@data) = @_;
  my ($entry, %unique, @unique);

  # change entries to uniqueness
  foreach $entry (@data) {
    if (exists $unique{$entry}) { next }
    $unique{$entry} = $entry;
    push @unique, $entry;
  }

  # exit SUB
  return @unique;
}


# return maximum of first entries of an array
#
# INTERFACE
# - argument 1: reference to array
# - argument 2: maximum array size to be returned
# - options:
#   -ElemExceed append entry to the returned list if limit was exceeded,
#               default entry: '...'
#   -join       join selected list elements to one single string
# - return val: selected array entries (non-referenced)
#
# DESCRIPTION
# - This function is quite useful for verbose output that shall not exceed
#   acceptable sizes.
#
sub ListMaxfirst {
  my ($pData, $SizeMax, %opt) = @_;

  # check consistency of arguments
  if (($SizeMax=int($SizeMax)) <= 0) { return () }
  unless (ref($pData) and ref($pData) eq 'ARRAY') { return () }

  # apply limit
  my @reddata;
  if (int(@$pData) <= $SizeMax) {
    @reddata = @$pData;
  } else {
    @reddata = @$pData[0..($SizeMax-1)];
    if (defined($opt{-ElemExceed})) {
      push @reddata, $opt{-ElemExceed}||'...';
    }
  }

  # final formatting
  if (exists($opt{-join}) and defined($opt{-join})) {
    @reddata = ( join($opt{-join},@reddata) );
  }
  return @reddata;
}


# return clone of data structure
#
# INTERFACE
# - argument 1: reference to data structure
# - options:
#   -debug      [STD]
#  (-had        (only for internal use:) reference to hash of data
#               sub-structures that have already been copied.
# - return val: reference to data structure clone
#
sub DataClone {
  my ($pDataSrc, %opt) = @_;
  my $debug = $opt{-debug};
  $opt{-had}{$pDataSrc} = $pDataSrc;

  # chain according to type of referenced data
  # - the data type determines the iteration mode to copy the data node
  my $pDataClone;
  my $DataType=ref($pDataSrc);
  if ($DataType and $DataType ne 'CODE' and $DataType ne 'SCALAR') {
  DataCloneRef: {

    # data type ARRAY
    if ($DataType eq 'ARRAY'){
#      $debug and printf STDERR "%s. found data reference %s\n", &MySub, ref($pDataSrc);
      $pDataClone = [ @$pDataSrc ];
      for (my $CtI=0; $CtI<@$pDataClone; $CtI++) {
        if (ref($$pDataClone[$CtI])) {
#          $debug and printf STDERR "%s. found data substructure in array, element %d, type %s\n", &MySub,
#            $CtI, ref($$pDataClone[$CtI]);
          if ($opt{-had}{$$pDataClone[$CtI]}) {
            $$pDataClone[$CtI] = $opt{-had}{$$pDataClone[$CtI]};
          } else {
            $$pDataClone[$CtI] = &DataClone ($$pDataClone[$CtI], %opt);
          }
        }
      }
      if (ref($pDataSrc) ne $DataType) { bless ($pDataClone,ref($pDataSrc)) }
    }

    # data type HASH
    elsif ($DataType eq 'HASH') {
#      $debug and printf STDERR "%s. found data reference %s\n", &MySub, ref($pDataSrc);
      $pDataClone = { %$pDataSrc };
      foreach my $CtKey (keys %$pDataClone) {
        if (ref($$pDataClone{$CtKey})) {
#          $debug and printf STDERR "%s. found data substructure in hash, key %s, type %s\n", &MySub,
#            $CtKey, ref($$pDataClone{$CtKey});
          if ($opt{-had}{$$pDataClone{$CtKey}}) {
            $$pDataClone{$CtKey} = $opt{-had}{$$pDataClone{$CtKey}};
          } else {
            $$pDataClone{$CtKey} = &DataClone ($$pDataClone{$CtKey}, %opt);
          }
        }
      }
      if (ref($pDataSrc) ne $DataType) { bless ($pDataClone,ref($pDataSrc)) }
    }

    # data type REF, *** not implemented ***
    elsif ($DataType eq 'REF') {
      printf STDERR "%s. do not know what to do with reference type %s\n", &MySub,
        ref($pDataSrc);
        # method can() does not work on this reference since it's not blessed
      last DataCloneRef;
    }

    # data sub-structure is an object => output depends on data architecture
    else {
      $debug and printf STDERR "%s. data type %s\n", &MySub, $DataType||"''";
      if (0) { }
      elsif ($pDataSrc->can('Clone')) {
        $debug and printf STDERR "%s. calling object's method Clone()\n", &MySub;
        $pDataClone = $pDataSrc->Clone(%opt);
      }
      elsif ($pDataSrc->can('DataClone')) {
        $debug and printf STDERR "%s. calling object's method DataClone()\n", &MySub;
        $pDataClone = $pDataSrc->DataClone(%opt);
      }
      elsif ($pDataSrc->isa('ARRAY')) { $DataType = 'ARRAY'; redo DataCloneRef; }
      elsif ($pDataSrc->isa('HASH')) { $DataType = 'HASH'; redo DataCloneRef; }
      # unknown referenced data type => treat like scalar
      else {
        printf STDERR "%s. WARNING: don't know how to handle data type %s\n  caller: %s -> %s -> me\n", &MySub,
          $DataType, (caller(2))[3], (caller(1))[3];
        last DataCloneRef;
      }
    }

    # exit doors I+II (sub-object, sub-structure)
    return $pDataClone;
  } } # end "DataCloneRef"

  # entry is a SCALAR or is cloned to a SCALAR
  # - CODE and SCALAR refs go here
#  $debug and printf STDERR "%s. found data type SCALAR, value %s\n", &MySub, $pDataSrc;
  $pDataClone = $pDataSrc;

  # exit door III (scalar)
  return $pDataClone;
}


# select from data structure
#
# INTERFACE
# - argument 1: - reference to data structure
# - argument 2: - reference to array of selectors, each selector being an array:
#                 0  select argument
#                 1  selection type:
#                    all      no selection (selector ignored)
#                    exact    exact match (hashes and arrays). This is default
#                             for all data types.
#                    !exact   entries which do not exactly match, for hashes
#                             only
#                    range    for arrays only [not implemented yet]
#                    regexp   regular expression match (hashes)
#                    !regexp  negative regular expression match (hashes)
#
# - options:
#   -debug      [STD]
#   -unique     change array of returned sub-structures to uniqueness of
#               each entry. This applies when cross-sharing of data references
#               occurs throughover the tree.
#               The order of the entries won't be preserved.
#
# - return val: - reference to array of data sub-structures
#               - undef if an error occurs
#
sub DataTreeSlc {
  my ($pData,$pSlcArr,%opt) = @_;
  my $debug = $opt{-debug};
  if (int(@_)<2 or ref($pSlcArr) ne 'ARRAY') { return undef }
  my @SlcFurther = @$pSlcArr;
  my ($SlcArg,$SlcType) = @{ shift @SlcFurther };

  # data type 'ARRAY'
  my @win;
  if (ref($pData) eq 'ARRAY') {
    $SlcType ||= 'exact';
#    $debug and printf STDERR "%s. data type %s, selector type %s, selector '%s'\n", &MySub,
#      ref($pData), $SlcType, $SlcArg;
    if (0) { }
    elsif ($SlcType eq 'all') {
      @win = grep { defined($_) } @$pData;
    }
    elsif ($SlcType eq 'exact') {
      if ($SlcArg < @$pData) {
        @win = grep { defined($_) } $$pData[$SlcArg];
      }
    }
    else {
      printf STDERR "%s. WARNING: unknown selector type %s on data type %s\n", &MySub,
        $SlcType, ref($pData)||"''";
      return undef;
    }
  }

  # data type 'HASH'
  elsif (ref($pData) eq 'HASH') {
    $SlcType ||= 'exact';
#    $debug and printf STDERR "%s. data type %s, selector type %s, selector '%s'\n", &MySub,
#      ref($pData), $SlcType, $SlcArg;
    if (0) { }
    elsif ($SlcType eq 'all') {
      @win = grep { defined($_) } values %$pData;
    }
    elsif ($SlcType eq 'exact') {
      if (exists $$pData{$SlcArg}) {
        @win = grep { defined($_) } $$pData{$SlcArg};
      }
    }
    elsif ($SlcType eq '!exact') {
      my @KeyList = grep { $_ ne $SlcArg } keys %$pData;
      @win = grep { defined($_) } @{$pData}{@KeyList};
    }
    elsif ($SlcType eq 'regexp') {
      my @KeyList = grep m/$SlcArg/, keys %$pData;
      @win = grep { defined($_) } @{$pData}{@KeyList};
    }
    elsif ($SlcType eq '!regexp') {
      my @KeyList = grep { $_!~m/$SlcArg/ } keys %$pData;
      @win = grep { defined($_) } @{$pData}{@KeyList};
    }
    else {
      printf STDERR "%s. WARNING: unknown selector type %s on data type %s\n", &MySub,
        $SlcType, ref($pData)||"''";
      return undef;
    }
  }

  # unknown data type
  else {
    $debug and printf STDERR "%s. ERROR: don't know how to handle data type %s\n", &MySub,
      ref($pData)||"''";
    return undef;
  }

  # selection step debug
  $debug and printf STDERR "%s. %d entries in ware bag\n", &MySub, int @win;

  # continue recursion into next tree level
  if (@SlcFurther) {
    @win = map { @{ &DataTreeSlc ($_, \@SlcFurther, %opt) } } @win;
  }

  # change entries to uniqueness
  if ($opt{-unique}) {
    my %unique = map { ("$_"=>$_) } @win;
    @win = values %unique;
  }

  # input source exhausted
  return \@win;
}


# compare two data structures and report differences
#
# INTERFACE
# - argument 1: - reference to data structure A
# - argument 2: - reference to data structure B
#
# - options:
#   -debug      [STD]
#
# - return val: - difference status (boolean)
#                 *** this may change in the future ***
#               - undef if an error occurs
#
sub DataTreeDiff {
  my (@apData, %opt);
     ($apData[0], $apData[1], %opt) = @_;
  my $debug = $opt{-debug};
  if (int(@_)<2) { return undef }

  my ($dnum, @dval, @dval2, $dct, $bErr);
  
  # different data types?
  if (ref($apData[0]) ne ref($apData[1])) { return 1 }

  # data type 'ARRAY'
  if (ref($apData[0]) eq 'ARRAY') {
    $debug and printf STDERR "%s. data type %s\n", &MySub, ref($apData[0]);
    # compare scalar values
    foreach $dnum (0, 1) {
      $dval[$dnum] = join ("\n", map{ ref($_)?'__ref__':$_ }@{$apData[$dnum]} );
    }
    if ($dval[0] ne $dval[1]) { return 1 }
    # compare references (recursion)
    foreach $dnum (0, 1) {
      $dval[$dnum] = [ grep{ ref($_) }@{$apData[$dnum]} ];
    }
    $bErr = 0;
    for ($dct=0; $dct<int(@{$dval[0]}); ++$dct) {
      $bErr ||= &DataTreeDiff ($dval[0][$dct], $dval[1][$dct], %opt);
    }
    return $bErr;
  }

  # data type 'HASH'
  elsif (ref($apData[0]) eq 'HASH') {
    $debug and printf STDERR "%s. data type %s\n", &MySub, ref($apData[0]);
    # compare keys
    foreach $dnum (0, 1) {
      $dval2[$dnum] = [ sort keys %{$apData[$dnum]} ];
      $dval[$dnum] = join ("\n", @{$dval2[$dnum]} );
    }
    if ($dval[0] ne $dval[1]) { return 1 }
    # map keys to values, compare scalar values
    foreach $dnum (0, 1) {
      $dval[$dnum] = [ map{ $apData[$dnum]{$_} }@{$dval2[$dnum]} ];
      $dval2[$dnum] = $dval[$dnum];
      $dval[$dnum] = join ("\n", map{ ref($_)?'__ref__':$_ }@{$dval2[$dnum]} );
    }
    if ($dval[0] ne $dval[1]) { return 1 }
    # compare references (recursion)
    foreach $dnum (0, 1) {
      $dval[$dnum] = [ grep{ ref($_) }@{$dval2[$dnum]} ];
    }
    $bErr = 0;
    for ($dct=0; $dct<int(@{$dval[0]}); ++$dct) {
      $bErr ||= &DataTreeDiff ($dval[0][$dct], $dval[1][$dct], %opt);
    }
    return $bErr;
  }

  # scalar data
  elsif (! ref($apData[0])) {
    if ($apData[0] ne $apData[1]) { return 1 }
  }

  # unknown data type
  else {
    $debug and printf STDERR "%s. ERROR: don't know how to handle data type %s\n", &MySub,
      ref($apData[0])||"''";
    return undef;
  }

  # input source exhausted
  return 0;
}


# undo cross-linking in data structure
#
# INTERFACE
# - argument 1: - reference to data structure
# - options:
#   -debug      [STD]
#
# DESCRIPTION
# - In a nested data structure, all references will be reduced to 1 per
#   referenced node.
# - CAUTION: Only apply this function if you're sure that ALL referenced data
#   (including objects) may be decomposed.
#
# DEVELOPER'S NOTES
# - A data structure is indexed on function entry. This way, data cannot be
#   lost until recursion is completed.
#
sub DataDecross {
  my ($pData, %opt) = @_;
  my $debug = $opt{-debug};
  $opt{-had} ||= {};
  $opt{-had}{"$pData"} = 1;

  # chain according to type of referenced data
  my $DataType=ref($pData);
  DataDecrossRef: {
  if ($DataType and $DataType ne 'CODE' and $DataType ne 'SCALAR') {
    if ($DataType !~ m/^(ARRAY|HASH)/) {
      if ($pData->isa('ARRAY')) { $DataType = 'ARRAY' }
      elsif ($pData->isa('HASH')) { $DataType = 'HASH' }
      elsif ($pData->can('Decross')) {
        $pData->Decross(%opt);
        last DataDecrossRef;
      }
      # unknown referenced data type, surely object reference
      # hope, that the destructor works properly
      else { last DataDecrossRef }
    }

    # loop over ARRAY elements
    if ($DataType eq 'ARRAY') {
      for (my $CtI=0; $CtI<@$pData; ++$CtI) {
        ref($$pData[$CtI]) or next;
        if ($opt{-had}{$$pData[$CtI]}) {
          $$pData[$CtI] = 0;
        } else {
          &DataDecross ($$pData[$CtI], %opt);
        }
      }
    }

    # loop over pairs of a HASH
    elsif ($DataType eq 'HASH') {
      while (my ($key,$val) = each(%$pData)) {
        ref($val) or next;
        if ($opt{-had}{$val}) {
          $$pData{$key} = 0;
        } else {
          &DataDecross ($val, %opt);
        }
      }
    }
  } } # end "DataDecrossRef"
}


1;
# $Id: Data.pm,v 1.44 2018/06/05 18:02:56 szafrans Exp $
