Treefam Tree
SummaryPackage variablesSynopsisDescriptionGeneral documentationMethods
Summary
 Treefam::Tree
Package variables
No package variables defined.
Included modules
Carp
Scalar::Util qw ( weaken )
Treefam::DBConnection
Treefam::Node
Synopsis
 use Treefam::DBConnection;

 my $dbc = new Treefam::DBConnection ();

 my $trh = $dbc->get_TreeHandle();

 my $tree = $trh->get_by_id('TF101001','FULL');

 # find duplications
 my @nodes = $tree->get_nodes_by_tag_value(-D=>'y');

 # find nodes with bootstrap>=50
 my @nodes = $tree->get_nodes_by_tag_value(-B=>'>=50');
Description
 Representation of a Treefam tree.
 Treefam trees are stored in nhx format.
 Current NHX tags valid in Treefam are:

    B     bootstrap value
    S     taxon name
    D     'Y' for duplication, 'N' for speciation, or not defined
    O     sequence ID
    G     gene ID
    E     gene loss stored in a string like "$-Eutheria-DROME"
Methods
IDDescriptionCode
_parse
No description
Code
_remove_node
No description
Code
_string
No description
Code
delete_nodeDescriptionCode
familyDescriptionCode
get_all_nodesDescriptionCode
get_distanceDescriptionCode
get_genesDescriptionCode
get_last_common_ancestorDescriptionCode
get_leaf_by_geneDescriptionCode
get_leavesDescriptionCode
get_nodes_by_tag_valueDescriptionCode
get_speciesDescriptionCode
get_species_treeDescriptionCode
get_subtreeDescriptionCode
newDescriptionCode
nhxDescriptionCode
rootDescriptionCode
scoreDescriptionCode
typeDescriptionCode
Methods description
IDcode    nextTop
 Arg: optional, Treefam family ID
 Description: Gets/sets Treefam family ID for the tree
 Returntype: string
delete_nodecodeprevnextTop
 Arg: a Treefam::Node object
 Description: Removes given node (and its children) from the tree.
 Returntype: the modified Treefam::Tree object
familycodeprevnextTop
 Arg: (optional) Treefam::Family object or family AC
 Description: Gets/sets family the tree belongs to
 Returntype: Treefam::Family object
get_all_nodescodeprevnextTop
 Description: Gets all nodes in the tree
 Returntype: list of Treefam::Node objects
get_distancecodeprevnextTop
 Arg: list of 2 Treefam::Node objects or Treefam::Gene objects
 Description: Gets distance between 2 nodes as cumulative branch
              length
 Returntype: double
get_genescodeprevnextTop
 Description: Gets genes that are in the tree
 Returntype: list of Treefam::Gene objects
get_last_common_ancestorcodeprevnextTop
 Arg: pair of Treefam::Node or Treefam::Gene objects
 Description: Gets last common ancestor of the given nodes/genes
 Returntype: a Treefam::Node object
get_leaf_by_genecodeprevnextTop
 Arg: Treefam::Gene object
 Description: Gets leaf node for the given gene
 Returntype: Treefam::Node object
get_leavescodeprevnextTop
 Description: Gets leaves of the tree
 Returntype: list of Treefam::Node objects
get_nodes_by_tag_valuecodeprevnextTop
 Arg: -key => value
 Description: Gets all nodes for which the 'key' tag matches 'value'
 Returntype: list of Treefam::Node objects
get_speciescodeprevnextTop
 Arg: (optional) type of species name: latin or swcode
      (5 letters Swissprot code)
 Description: Gets a list of species that have at least one gene in
              the tree, defaults to latin name of species
 Returntype: list of strings
get_species_treecodeprevnextTop
 Arg: (optional) complete or short
 Description: Gets species tree for the species in this Treefam
              tree. If arg is set to 'complete', the full tree up
              to the species root and with all intermediate nodes
              is returned. If arg is set to 'short', returns a tree
              with only the leaves and the internal nodes that have
              more than one child. This is the default behaviour.
 Returntype: Treefam::Tree object
get_subtreecodeprevnextTop
 Arg: list of 2 Treefam::Gene or Treefam::Node objects that define
      the subtree
 Description: Gets a subtree rooted at the last common ancestor of
              the given genes/nodes
 Returntype: Treefam::Tree
newcodeprevnextTop
 Arg1: Treefam::DBConnection
 Arg2: family ID or 'species' to get a species tree
 Arg3: type of tree (only if Arg2 is a family ID)
 Arg4: optional, tree in nhx format
 Description: Creates a new tree object.
 Returntype: Treefam::Tree
nhxcodeprevnextTop
 Arg: optional, string
 Description: Tree in nhx format
 Returntype: string
rootcodeprevnextTop
 Arg: optional, a Treefam::Node object
 Description: Gets/sets root of the tree
 Returntype: a Treefam::Node object
scorecodeprevnextTop
 Arg: (optional) double
 Description: Gets/sets score for the tree, for example maximum
              likelihood
 Returntype: double
typecodeprevnextTop
 Arg: optional, family type
 Description: Gets/sets tree type: FULL, SEED or CLEAN
 Returntype: string
Methods code
IDdescriptionprevnextTop
sub ID {
  my $self = shift;
  $self->{'ID'} = shift if @_;
  return $self->{'ID'};
}
_parsedescriptionprevnextTop
sub _parse {
  # code from li Heng
my ($self, $array, $stack, $str, $name, $dist, $nhx) = @_; if ($str eq '(') { push(@$stack, $str); } elsif ($name) { my %hash; if ($name =~ /^\)/) { my (@s, $t); while (($t = pop(@$stack))) { last if (ref($t) ne 'HASH'); push(@s, $t); } unless (defined($t)) { warn('ERROR: unmatched ")"'); $self->{_error} = 1; return; } foreach (@s) { push(@{$hash{C}}, $_); $_->{P} =\% hash; weaken($_->{P}); } $hash{N} = substr($name, 1) if (length($name) > 1); } else { $hash{N} = $name; } $hash{dist} = substr($dist, 1) if ($dist); while ($nhx && $nhx=~/(:([^\s=:]+)=([^:=\[\]]+))/g) { $hash{$2}=$3; $nhx =~s/$1//;
} push(@$stack,\% hash); push(@$array,\% hash); } return $str;
}
_remove_nodedescriptionprevnextTop
sub _remove_node {
  my ($self,$node) = @_;
  unless ($node) {
    croak "ERROR: Node required";
  }
  my $parent = $node->parent;
  my $internalID = $node->internalID;
  while (my $child=shift(@{$node->{C}})) {
    $self->_remove_node($child);
  }
  # remove node from list of its parent's children
@{$parent->{C}} = grep { $_->internalID ne $internalID } @{$parent->{C}}; undef %{$node}; undef $node;
}
_stringdescriptionprevnextTop
sub _string {
  # code from Li Heng
my ($self, $root) = @_; my $str; if ($root->{C}) { $str = '('; for my $p (reverse @{$root->{C}}) { $str .= $self->_string($p) . ",\n"; } chop($str); chop($str); # chop the trailing ",\n"
$str .= "\n)"; $str .= $root->{N} if ($root->{N}); # node name
$str .= ":" . $root->{dist} if ($root->{dist}); # length
# nhx output
$str .= '[&&NHX'; foreach my $p (keys %{$self->{_valid_tags}}) { $str .= ":$p=" . $root->{$p} if ($root->{$p}); } $str .= ']'; } else { # leaf
$str = $root->{N}; $str .= ":" . $root->{dist} if ($root->{dist}); $str .= '[&&NHX'; foreach my $p (keys %{$self->{_valid_tags}}) { $str .= ":$p=" . $root->{$p} if ($root->{$p}); } $str .= ']'; } return $str;
}
delete_nodedescriptionprevnextTop
sub delete_node {
  my ($self,$node) = @_;
  unless ($node) {
    croak "ERROR: Node required";
  }
  my $status = 0;
  my $parent = $node->parent;
  my $internalID = $node->internalID;
  $self->_remove_node($node);

  # delete parent node if it is left with 1 child,
# child becomes child of grand-parent keeping its original distance from it
if ($parent->children && scalar($parent->children)==1) { my ($child) = $parent->children; my $branch_length; if ($child->branch_length && $parent->branch_length) { $branch_length = $child->branch_length + $parent->branch_length; $child->branch_length($branch_length); } # add child to grand-parent's children
my $gp = $parent->parent; my $parentID = $parent->internalID; my @chldrn; if ($gp) { foreach my $n($gp->children) { if ($n->internalID eq $parentID) { push @chldrn,$child; $child->{P} = $gp; weaken($child->{P}); undef %{$n}; undef $n; $status++; } else { push @chldrn,$n; } } $gp->children(@chldrn); } } # remove root if it has one child
while (scalar($self->{_root}->children) ==1) { my ($child) = $self->{_root}->children; $self->{_root} = $child; } # removed undefined nodes from list
@{$self->{_nodes}} = grep {$_->internalID} @{$self->{_nodes}}; return $self;
}
familydescriptionprevnextTop
sub family {
  my $self = shift;
  my $family = shift if @_;
  my $dbc = $self->{'DBConnection'};
  my $famh = $dbc->get_FamilyHandle();
  my $familyID;
  if (!$family) {
    $familyID = $self->ID;
  }
  else {
    if (ref($family)) {
      return $family;
    }
    else {
      $familyID = $family;
    }
  }
  return $famh->get_by_id($familyID) if $familyID;
}
get_all_nodesdescriptionprevnextTop
sub get_all_nodes {
  my $self = shift;

  return @{$self->{_nodes}};
}
get_distancedescriptionprevnextTop
sub get_distance {
  my ($self,$node1,$node2) = @_;
  if (ref($node1) eq 'Treefam::Gene') {
    $node1 = $self->get_leaf_by_gene($node1);
  }
  if (ref($node2) eq 'Treefam::Gene') {
    $node2 = $self->get_leaf_by_gene($node2);
  }
  unless (ref($node1) eq 'Treefam::Node' && ref($node2) eq 'Treefam::Node') {
    croak "ERROR: Two nodes are required";
  }
  my $distance = $node1->branch_length;
  my $lca = $self->get_last_common_ancestor($node1,$node2);
  foreach my $node ($node1->get_all_ancestors) {
    last if ($node->internalID eq $lca->internalID);
    $distance += $node->branch_length;
  }
  $distance += $node2->branch_length;
  foreach my $node ($node2->get_all_ancestors) {
    last if ($node->internalID eq $lca->internalID);
    $distance += $node->branch_length;
  }
  return $distance;
}
get_genesdescriptionprevnextTop
sub get_genes {
  my $self = shift;
  my @genes;
  foreach my $leaf($self->get_leaves) {
    my $gene = $leaf->gene;
    push @genes, $gene if $gene;
  }
  return @genes;
}
get_last_common_ancestordescriptionprevnextTop
sub get_last_common_ancestor {
  my $self = shift;
  my @input = @_ if @_;
  my $dbc = $self->{'DBConnection'};
  my @nodes;
  foreach my $i(@input) {
    if (ref($i)=~/Gene/) {
      my $geneID = $i->ID;
      my ($node) = $self->get_nodes_by_tag_value(-G=>$geneID);
      croak "Gene not found" unless $node;
      push @nodes,$node if ($node);
    }
    else {
      push @nodes,$i;
    }
  }
  croak "ERROR: Treefam::Node objects required" unless (@nodes);

  # after code in Bio::Tree::TreeFunctionsI
my %seen_parent; my $parent = $nodes[0]; while ($parent){ $seen_parent{$parent->internalID} = $parent; $parent = $parent->parent; } $parent = $nodes[1]; while ( $parent ){ if ( $seen_parent{$parent->internalID} ){ return $seen_parent{$parent->internalID}; } $parent = $parent->parent; } carp("Can't find last common ancestor"); return undef;
}
get_leaf_by_genedescriptionprevnextTop
sub get_leaf_by_gene {
  my $self = shift;
  my $gene = shift;
  if (!$gene || ref($gene) ne 'Treefam::Gene') {
    croak "Treefam::Gene required";
  }
  my @leaves = $self->get_leaves;
  foreach my $leaf(@leaves) {
    my @tags = $leaf->get_all_tags();
    foreach my $t(@tags) {
      next unless ($t eq 'G' || $t eq 'O');
      my $v = $leaf->get_tag_value($t);
      if ($v eq $gene->ID || $v eq $gene->sequence_id) {
	return $leaf;
      }
    }
  }

  return undef;
}
get_leavesdescriptionprevnextTop
sub get_leaves {
  my $self = shift;

  # Leaves are nodes with no children
return grep { !$_->{C} } @{$self->{_nodes}};
}
get_nodes_by_tag_valuedescriptionprevnextTop
sub get_nodes_by_tag_value {
  my ($self,$tag,$value) = @_;
  $tag=~s/^-//;
my @all_nodes = $self->get_all_nodes; my @found_nodes; foreach my $node(@all_nodes) { my @tags = $node->get_all_tags(); foreach my $t(@tags) { next unless ($t eq $tag); my $v = $node->get_tag_value($t); # look for exact matches but also allow for things like 'B>=70'
next unless ( lc($v) eq lc($value) || ($tag eq 'B' && $t eq 'B' && $value=~/\D/ && eval("$v$value"))); push @found_nodes,$node; last; } } return @found_nodes;
}
get_speciesdescriptionprevnextTop
sub get_species {
  my $self = shift;
  my $type = shift if @_;
  my @species;
  # Go gene by gene as the tree might be different from the database one
my %seen; foreach my $gene($self->get_genes) { push @species,$gene->species($type) unless $seen{$gene->species($type)}++; } return @species;
}
get_species_treedescriptionprevnextTop
sub get_species_tree {
  my $self = shift;
  my $type = shift if @_;
  my $dbc = $self->{'DBConnection'};
  my $species_tree = Treefam::Tree->new($dbc,undef,'species',undef);
  @{$species_tree->{_nodes}} = ();
  my %seen;
  my @node_names = $self->get_species('latin');
  my $dbh = $dbc->{'database_handle'};
  my $query = qq(SELECT taxon_id FROM spec_names WHERE name= ?);
  my $sth1 = $dbh->prepare($query);
  $query = qq(SELECT t2.name FROM spec_nodes t1 LEFT JOIN spec_nodes t2 ON t1.parent_id=t2.taxon_id WHERE t1.taxon_id =?);
  my $sth2 = $dbh->prepare($query);
  while (my $name = shift(@node_names)) {
    my $node;
    if ($seen{$name}) {
      $node = $seen{$name};
    }
    else {
      $node = Treefam::Node->new($dbc);
      $node->name($name);
#      $node->taxon($name);
$node->branch_length(0.1); $seen{$name} = $node; } push @{$species_tree->{_nodes}},$node; $sth1->execute($name); my ($taxid) = $sth1->fetchrow_array(); $sth2->execute($taxid); my ($n) = $sth2->fetchrow_array(); if ($n) { my $parent; if ($seen{$n}) { $parent = $seen{$n}; } else { $parent = Treefam::Node->new($dbc); $parent->name($n); # $parent->taxon($n);
$parent->branch_length(0.1); push @node_names,$n; $seen{$n} = $parent; } $node->parent($parent); push @{$parent->{C}},$node; } else { # no parent, this is the root
$species_tree->{_root} = $node; } } if (!defined($type)|| (lc($type) ne 'complete' && lc($type) ne 'full')) { foreach my $node (@{$species_tree->{_nodes}}) { # delete node if it has only 1 child,
# child becomes child of grand-parent
if ($node->children && scalar($node->children)==1) { my ($child) = $node->children; my $parent = $node->parent; my $nodeID = $node->internalID; my @chldrn; if ($parent) { foreach my $n ($parent->children) { if ($n->internalID eq $nodeID) { push @chldrn,$child; $child->{P} = $parent; weaken($child->{P}); undef %{$n}; undef $n; } else { push @chldrn,$n; } } $parent->children(@chldrn); } } } # remove root if it has one child
while (scalar($species_tree->{_root}->children) ==1) { my ($child) = $species_tree->{_root}->children; $species_tree->{_root} = $child; } } @{$species_tree->{_nodes}} = grep { $_->internalID } @{$species_tree->{_nodes}}; return $species_tree;
}
get_subtreedescriptionprevnextTop
sub get_subtree {
  my $self = shift;
  my @nodes = @_;
  my $dbc = $self->{'DBConnection'};
  my $nhx;
  if (scalar(@nodes)==2) {
    my $node = $self->get_last_common_ancestor(@nodes);
    $nhx = $self->_string($node);
  }
  elsif (scalar(@nodes)==1) {
    $nhx = $self->_string($nodes[0]);
  }
  else {
    croak "ERROR: One or two nodes required";
  }

  return new Treefam::Tree($dbc,$self->ID,$self->type,$nhx);
}
newdescriptionprevnextTop
sub new {
  my ($class,$dbc,$familyID,$type,$nhx) = @_;
  my $self = {};

  if (!$nhx && $familyID) {
    my $dbh = $dbc->{'database_handle'};
    my $query = qq( SELECT tree FROM trees WHERE AC= ? AND type= ?);
    my $sth= $dbh->prepare ($query);
    $sth->execute($familyID,$type);
    my ($nhx) = $sth->fetchrow_array();
  }

  $self->{'DBConnection'} = $dbc;
  weaken($self->{'DBConnection'});

  $self->{'ID'} = $familyID if $familyID;
  $self->{'type'} = $type if $type;
  $self->{'nhx'} = $nhx if $nhx;
  %{$self->{_valid_tags}} = (O=>1, Sd=>1, B=>1, S=>1, G=>1, E=>1, D=>1, Com=>1);
  # parse tree (using Li Heng's code)
if ($nhx) { $self->{_error} = 0; @{$self->{_nodes}} = (); my ($array, @stack); $array =\@ {$self->{_nodes}}; my $pos; my $str = $nhx; $_ = (($pos = index($str, ';')) >= 0)? substr($str, 0, $pos) : $str; s/\s//g; while ($_=~/(\(|((\)?[^,;:\[\]\(\)]+|\))(:[\d.eE\-]+)?(\[&&NHX[^\[\]]*\])?))/g) { my $st = &_parse($self,$array,\@stack,$1,$3,$4,$5); } if (@stack != 1) { my $count = @stack; warn(qq(ERROR: unmatched "(" ($count))); $self->{_error} = 1; @stack = (); } if ($self->{_error} == 0) { $self->{_root} = shift(@stack); weaken($self->{_root}); } else { @{$self->{_nodes}} = (); delete($self->{_root}); } if ($self->{_root}) { my $j = 0; foreach my $p (@{$self->{_nodes}}) { ++$j unless ($p->{C}); } $self->{_n_leaf} = $j; } # make all nodes Treefam::Node objects
foreach my $n (@{$self->{_nodes}}) { $n = Treefam::Node->new($dbc,$n); } } bless ($self, $class); return $self;
}
nhxdescriptionprevnextTop
sub nhx {
  my $self = shift;
  if (@_) {
    $self->{'nhx'} = shift;
  }
  else {
    # do this each time because tree may have been modified
# between 2 calls to this function.
$self->{'nhx'} = $self->_string($self->root). ";\n"; } return $self->{'nhx'};
}
rootdescriptionprevnextTop
sub root {
  my $self = shift;
  if (@_) {
    $self->{_root}  = shift;
    weaken($self->{_root});
  }

  return $self->{_root};
}
scoredescriptionprevnextTop
sub score {
  my $self = shift;
  $self->{'score'} = shift if @_;
  return $self->{'score'};
}
typedescriptionprevnextTop
sub type {
  my $self = shift;
  $self->{'type'} = shift if @_;
  return $self->{'type'};
}
General documentation
CONTACTTop
 jkh1@sanger.ac.uk