Treefam
Tree
Summary
Package variables
No package variables defined.
Included modules
Carp
Scalar::Util qw ( weaken )
Synopsis
use Treefam::DBConnection;
my $dbc = new Treefam::DBConnection ();
my $trh = $dbc->get_TreeHandle();
my $tree = $trh->get_by_id('TF101001','FULL');
# find duplications
my @nodes = $tree->get_nodes_by_tag_value(-D=>'y');
# find nodes with bootstrap>=50
my @nodes = $tree->get_nodes_by_tag_value(-B=>'>=50');
Description
Representation of a Treefam tree.
Treefam trees are stored in nhx format.
Current NHX tags valid in Treefam are:
B bootstrap value
S taxon name
D 'Y' for duplication, 'N' for speciation, or not defined
O sequence ID
G gene ID
E gene loss stored in a string like "$-Eutheria-DROME"
Methods
Methods description
Arg: optional, Treefam family ID
Description: Gets/sets Treefam family ID for the tree
Returntype: string |
Arg: a Treefam::Node object
Description: Removes given node (and its children) from the tree.
Returntype: the modified Treefam::Tree object |
Arg: (optional) Treefam::Family object or family AC
Description: Gets/sets family the tree belongs to
Returntype: Treefam::Family object |
Description: Gets all nodes in the tree
Returntype: list of Treefam::Node objects |
Arg: list of 2 Treefam::Node objects or Treefam::Gene objects
Description: Gets distance between 2 nodes as cumulative branch
length
Returntype: double |
Description: Gets genes that are in the tree
Returntype: list of Treefam::Gene objects |
Arg: pair of Treefam::Node or Treefam::Gene objects
Description: Gets last common ancestor of the given nodes/genes
Returntype: a Treefam::Node object |
Arg: Treefam::Gene object
Description: Gets leaf node for the given gene
Returntype: Treefam::Node object |
Description: Gets leaves of the tree
Returntype: list of Treefam::Node objects |
Arg: -key => value
Description: Gets all nodes for which the 'key' tag matches 'value'
Returntype: list of Treefam::Node objects |
Arg: (optional) type of species name: latin or swcode
(5 letters Swissprot code)
Description: Gets a list of species that have at least one gene in
the tree, defaults to latin name of species
Returntype: list of strings |
Arg: (optional) complete or short
Description: Gets species tree for the species in this Treefam
tree. If arg is set to 'complete', the full tree up
to the species root and with all intermediate nodes
is returned. If arg is set to 'short', returns a tree
with only the leaves and the internal nodes that have
more than one child. This is the default behaviour.
Returntype: Treefam::Tree object |
Arg: list of 2 Treefam::Gene or Treefam::Node objects that define
the subtree
Description: Gets a subtree rooted at the last common ancestor of
the given genes/nodes
Returntype: Treefam::Tree |
Arg1: Treefam::DBConnection
Arg2: family ID or 'species' to get a species tree
Arg3: type of tree (only if Arg2 is a family ID)
Arg4: optional, tree in nhx format
Description: Creates a new tree object.
Returntype: Treefam::Tree |
Arg: optional, string
Description: Tree in nhx format
Returntype: string |
Arg: optional, a Treefam::Node object
Description: Gets/sets root of the tree
Returntype: a Treefam::Node object |
Arg: (optional) double
Description: Gets/sets score for the tree, for example maximum
likelihood
Returntype: double |
Arg: optional, family type
Description: Gets/sets tree type: FULL, SEED or CLEAN
Returntype: string |
Methods code
sub ID
{
my $self = shift;
$self->{'ID'} = shift if @_;
return $self->{'ID'};} |
sub _parse
{ my ($self, $array, $stack, $str, $name, $dist, $nhx) = @_;
if ($str eq '(') {
push(@$stack, $str);
}
elsif ($name) {
my %hash;
if ($name =~ /^\)/) {
my (@s, $t);
while (($t = pop(@$stack))) {
last if (ref($t) ne 'HASH');
push(@s, $t);
}
unless (defined($t)) {
warn('ERROR: unmatched ")"');
$self->{_error} = 1;
return;
}
foreach (@s) {
push(@{$hash{C}}, $_);
$_->{P} =\% hash;
weaken($_->{P});
}
$hash{N} = substr($name, 1) if (length($name) > 1);
}
else {
$hash{N} = $name;
}
$hash{dist} = substr($dist, 1) if ($dist);
while ($nhx && $nhx=~/(:([^\s=:]+)=([^:=\[\]]+))/g) {
$hash{$2}=$3;
$nhx =~s/$1//; }
push(@$stack,\% hash);
push(@$array,\% hash);
}
return $str; } |
sub _remove_node
{
my ($self,$node) = @_;
unless ($node) {
croak "ERROR: Node required";
}
my $parent = $node->parent;
my $internalID = $node->internalID;
while (my $child=shift(@{$node->{C}})) {
$self->_remove_node($child);
}
@{$parent->{C}} = grep { $_->internalID ne $internalID } @{$parent->{C}};
undef %{$node};
undef $node;} |
sub _string
{ my ($self, $root) = @_;
my $str;
if ($root->{C}) {
$str = '(';
for my $p (reverse @{$root->{C}}) {
$str .= $self->_string($p) . ",\n";
}
chop($str); chop($str); $str .= "\n)";
$str .= $root->{N} if ($root->{N}); $str .= ":" . $root->{dist} if ($root->{dist}); $str .= '[&&NHX';
foreach my $p (keys %{$self->{_valid_tags}}) {
$str .= ":$p=" . $root->{$p} if ($root->{$p});
}
$str .= ']';
}
else { $str = $root->{N};
$str .= ":" . $root->{dist} if ($root->{dist});
$str .= '[&&NHX';
foreach my $p (keys %{$self->{_valid_tags}}) {
$str .= ":$p=" . $root->{$p} if ($root->{$p});
}
$str .= ']';
}
return $str; } |
sub delete_node
{
my ($self,$node) = @_;
unless ($node) {
croak "ERROR: Node required";
}
my $status = 0;
my $parent = $node->parent;
my $internalID = $node->internalID;
$self->_remove_node($node);
if ($parent->children && scalar($parent->children)==1) {
my ($child) = $parent->children;
my $branch_length;
if ($child->branch_length && $parent->branch_length) {
$branch_length = $child->branch_length + $parent->branch_length;
$child->branch_length($branch_length);
}
my $gp = $parent->parent;
my $parentID = $parent->internalID;
my @chldrn;
if ($gp) {
foreach my $n($gp->children) {
if ($n->internalID eq $parentID) {
push @chldrn,$child;
$child->{P} = $gp;
weaken($child->{P});
undef %{$n};
undef $n;
$status++;
}
else {
push @chldrn,$n;
}
}
$gp->children(@chldrn);
}
}
while (scalar($self->{_root}->children) ==1) {
my ($child) = $self->{_root}->children;
$self->{_root} = $child;
}
@{$self->{_nodes}} = grep {$_->internalID} @{$self->{_nodes}};
return $self;} |
sub family
{
my $self = shift;
my $family = shift if @_;
my $dbc = $self->{'DBConnection'};
my $famh = $dbc->get_FamilyHandle();
my $familyID;
if (!$family) {
$familyID = $self->ID;
}
else {
if (ref($family)) {
return $family;
}
else {
$familyID = $family;
}
}
return $famh->get_by_id($familyID) if $familyID;} |
sub get_all_nodes
{
my $self = shift;
return @{$self->{_nodes}};} |
sub get_distance
{
my ($self,$node1,$node2) = @_;
if (ref($node1) eq 'Treefam::Gene') {
$node1 = $self->get_leaf_by_gene($node1);
}
if (ref($node2) eq 'Treefam::Gene') {
$node2 = $self->get_leaf_by_gene($node2);
}
unless (ref($node1) eq 'Treefam::Node' && ref($node2) eq 'Treefam::Node') {
croak "ERROR: Two nodes are required";
}
my $distance = $node1->branch_length;
my $lca = $self->get_last_common_ancestor($node1,$node2);
foreach my $node ($node1->get_all_ancestors) {
last if ($node->internalID eq $lca->internalID);
$distance += $node->branch_length;
}
$distance += $node2->branch_length;
foreach my $node ($node2->get_all_ancestors) {
last if ($node->internalID eq $lca->internalID);
$distance += $node->branch_length;
}
return $distance;} |
sub get_genes
{
my $self = shift;
my @genes;
foreach my $leaf($self->get_leaves) {
my $gene = $leaf->gene;
push @genes, $gene if $gene;
}
return @genes;} |
sub get_last_common_ancestor
{
my $self = shift;
my @input = @_ if @_;
my $dbc = $self->{'DBConnection'};
my @nodes;
foreach my $i(@input) {
if (ref($i)=~/Gene/) {
my $geneID = $i->ID;
my ($node) = $self->get_nodes_by_tag_value(-G=>$geneID);
croak "Gene not found" unless $node;
push @nodes,$node if ($node);
}
else {
push @nodes,$i;
}
}
croak "ERROR: Treefam::Node objects required" unless (@nodes);
my %seen_parent;
my $parent = $nodes[0];
while ($parent){
$seen_parent{$parent->internalID} = $parent;
$parent = $parent->parent;
}
$parent = $nodes[1];
while ( $parent ){
if ( $seen_parent{$parent->internalID} ){
return $seen_parent{$parent->internalID};
}
$parent = $parent->parent;
}
carp("Can't find last common ancestor");
return undef;} |
sub get_leaf_by_gene
{
my $self = shift;
my $gene = shift;
if (!$gene || ref($gene) ne 'Treefam::Gene') {
croak "Treefam::Gene required";
}
my @leaves = $self->get_leaves;
foreach my $leaf(@leaves) {
my @tags = $leaf->get_all_tags();
foreach my $t(@tags) {
next unless ($t eq 'G' || $t eq 'O');
my $v = $leaf->get_tag_value($t);
if ($v eq $gene->ID || $v eq $gene->sequence_id) {
return $leaf;
}
}
}
return undef;} |
sub get_leaves
{
my $self = shift;
return grep { !$_->{C} } @{$self->{_nodes}}; } |
sub get_nodes_by_tag_value
{
my ($self,$tag,$value) = @_;
$tag=~s/^-//; my @all_nodes = $self->get_all_nodes;
my @found_nodes;
foreach my $node(@all_nodes) {
my @tags = $node->get_all_tags();
foreach my $t(@tags) {
next unless ($t eq $tag);
my $v = $node->get_tag_value($t);
next unless ( lc($v) eq lc($value) || ($tag eq 'B' && $t eq 'B' && $value=~/\D/ && eval("$v$value")));
push @found_nodes,$node;
last;
}
}
return @found_nodes; } |
sub get_species
{
my $self = shift;
my $type = shift if @_;
my @species;
my %seen;
foreach my $gene($self->get_genes) {
push @species,$gene->species($type) unless $seen{$gene->species($type)}++;
}
return @species; } |
sub get_species_tree
{
my $self = shift;
my $type = shift if @_;
my $dbc = $self->{'DBConnection'};
my $species_tree = Treefam::Tree->new($dbc,undef,'species',undef);
@{$species_tree->{_nodes}} = ();
my %seen;
my @node_names = $self->get_species('latin');
my $dbh = $dbc->{'database_handle'};
my $query = qq(SELECT taxon_id FROM spec_names WHERE name= ?);
my $sth1 = $dbh->prepare($query);
$query = qq(SELECT t2.name FROM spec_nodes t1 LEFT JOIN spec_nodes t2 ON t1.parent_id=t2.taxon_id WHERE t1.taxon_id =?);
my $sth2 = $dbh->prepare($query);
while (my $name = shift(@node_names)) {
my $node;
if ($seen{$name}) {
$node = $seen{$name};
}
else {
$node = Treefam::Node->new($dbc);
$node->name($name);
$node->branch_length(0.1);
$seen{$name} = $node;
}
push @{$species_tree->{_nodes}},$node;
$sth1->execute($name);
my ($taxid) = $sth1->fetchrow_array();
$sth2->execute($taxid);
my ($n) = $sth2->fetchrow_array();
if ($n) {
my $parent;
if ($seen{$n}) {
$parent = $seen{$n};
}
else {
$parent = Treefam::Node->new($dbc);
$parent->name($n);
$parent->branch_length(0.1);
push @node_names,$n;
$seen{$n} = $parent;
}
$node->parent($parent);
push @{$parent->{C}},$node;
}
else { $species_tree->{_root} = $node;
}
}
if (!defined($type)|| (lc($type) ne 'complete' && lc($type) ne 'full')) {
foreach my $node (@{$species_tree->{_nodes}}) {
if ($node->children && scalar($node->children)==1) {
my ($child) = $node->children;
my $parent = $node->parent;
my $nodeID = $node->internalID;
my @chldrn;
if ($parent) {
foreach my $n ($parent->children) {
if ($n->internalID eq $nodeID) {
push @chldrn,$child;
$child->{P} = $parent;
weaken($child->{P});
undef %{$n};
undef $n;
}
else {
push @chldrn,$n;
}
}
$parent->children(@chldrn);
}
}
}
while (scalar($species_tree->{_root}->children) ==1) {
my ($child) = $species_tree->{_root}->children;
$species_tree->{_root} = $child;
}
}
@{$species_tree->{_nodes}} = grep { $_->internalID } @{$species_tree->{_nodes}};
return $species_tree;} |
sub get_subtree
{
my $self = shift;
my @nodes = @_;
my $dbc = $self->{'DBConnection'};
my $nhx;
if (scalar(@nodes)==2) {
my $node = $self->get_last_common_ancestor(@nodes);
$nhx = $self->_string($node);
}
elsif (scalar(@nodes)==1) {
$nhx = $self->_string($nodes[0]);
}
else {
croak "ERROR: One or two nodes required";
}
return new Treefam::Tree($dbc,$self->ID,$self->type,$nhx);} |
sub new
{
my ($class,$dbc,$familyID,$type,$nhx) = @_;
my $self = {};
if (!$nhx && $familyID) {
my $dbh = $dbc->{'database_handle'};
my $query = qq( SELECT tree FROM trees WHERE AC= ? AND type= ?);
my $sth= $dbh->prepare ($query);
$sth->execute($familyID,$type);
my ($nhx) = $sth->fetchrow_array();
}
$self->{'DBConnection'} = $dbc;
weaken($self->{'DBConnection'});
$self->{'ID'} = $familyID if $familyID;
$self->{'type'} = $type if $type;
$self->{'nhx'} = $nhx if $nhx;
%{$self->{_valid_tags}} = (O=>1, Sd=>1, B=>1, S=>1, G=>1, E=>1, D=>1, Com=>1);
if ($nhx) {
$self->{_error} = 0;
@{$self->{_nodes}} = ();
my ($array, @stack);
$array =\@ {$self->{_nodes}};
my $pos;
my $str = $nhx;
$_ = (($pos = index($str, ';')) >= 0)? substr($str, 0, $pos) : $str;
s/\s//g;
while ($_=~/(\(|((\)?[^,;:\[\]\(\)]+|\))(:[\d.eE\-]+)?(\[&&NHX[^\[\]]*\])?))/g) {
my $st = &_parse($self,$array,\@stack,$1,$3,$4,$5);
}
if (@stack != 1) {
my $count = @stack;
warn(qq(ERROR: unmatched "(" ($count)));
$self->{_error} = 1;
@stack = ();
}
if ($self->{_error} == 0) {
$self->{_root} = shift(@stack);
weaken($self->{_root});
}
else {
@{$self->{_nodes}} = ();
delete($self->{_root});
}
if ($self->{_root}) {
my $j = 0;
foreach my $p (@{$self->{_nodes}}) {
++$j unless ($p->{C});
}
$self->{_n_leaf} = $j;
}
foreach my $n (@{$self->{_nodes}}) {
$n = Treefam::Node->new($dbc,$n);
}
}
bless ($self, $class);
return $self;} |
sub nhx
{
my $self = shift;
if (@_) {
$self->{'nhx'} = shift;
}
else {
$self->{'nhx'} = $self->_string($self->root). ";\n";
}
return $self->{'nhx'};} |
sub root
{
my $self = shift;
if (@_) {
$self->{_root} = shift;
weaken($self->{_root});
}
return $self->{_root};} |
sub score
{
my $self = shift;
$self->{'score'} = shift if @_;
return $self->{'score'};} |
sub type
{
my $self = shift;
$self->{'type'} = shift if @_;
return $self->{'type'};} |
General documentation