Treefam Gene
SummaryPackage variablesSynopsisDescriptionGeneral documentationMethods
Summary
 Treefam::Gene
Package variables
No package variables defined.
Included modules
Scalar::Util qw ( weaken )
Treefam::DBConnection
Synopsis
 use Treefam::DBConnection;

 my $dbc = new Treefam::DBConnection ();

 my $gene_handle = $dbc->get_GeneHandle();

 my $gene = $gene_handle->get_by_id('ENSG00000087586');

 my $symbol= $gene->symbol();
 my @names = $gene->names();
 my $family = $gene->family();
 my $hmmer = $gene->hmmer_score();
 my @orthologs = $gene->get_orthologs('Saccharomyces cerevisiae','Schizosaccharomyces pombe','Drosophila melanogaster','Caenorhabditis elegans');
Description
 Representation of a Treefam gene.
Methods
IDDescriptionCode
chromosomeDescriptionCode
familyDescriptionCode
get_domainsDescriptionCode
get_orthologsDescriptionCode
get_paralogsDescriptionCode
hmmer_evalueDescriptionCode
hmmer_scoreDescriptionCode
namesDescriptionCode
newDescriptionCode
positionDescriptionCode
sequence_idDescriptionCode
speciesDescriptionCode
symbolDescriptionCode
transcriptsDescriptionCode
Methods description
IDcode    nextTop
 Arg: optional, Treefam gene ID
 Description: Gets/sets Treefam ID
 Returntype: string
chromosomecodeprevnextTop
   Arg: optional, chromosome as string
   Description: Gets/sets chromosome that gene is on
   Returntype: string
familycodeprevnextTop
 Arg: optional, Treefam::Family object
 Description: Gets/sets family the gene belongs to
 Returntype: Treefam::Family object
get_domainscodeprevnextTop
 Arg: (optional) e-value cut-off
 Description: Gets this gene's protein domains with e-value
              below given cut-off, default is 1e-2.
 Returntype: list of strings (PFAM domain IDs)
get_orthologscodeprevnextTop
 Arg: optional, list of species (by latin name) from which to
      get orthologs
 Description: Gets this gene's orthologs
 Returntype: list of Treefam::Gene objects
get_paralogscodeprevnextTop
 Description: Gets this gene's paralogs. We currently only get
              within-species paralogs
 Returntype: list of Treefam::Gene objects
hmmer_evaluecodeprevnextTop
 Arg1: optional, Treefam::Family object or AC
 Arg2: optional, hmmer evalue
 Description: Gets/sets hmmer evalue for the gene in the given
              family, defaults to the family the gene belongs to
 Returntype: double
hmmer_scorecodeprevnextTop
 Arg1: optional, Treefam::Family object or AC
 Arg2: optional, hmmer score
 Description: Gets hmmer score for the gene in the given family,
              defaults to the family the gene belongs to
 Returntype: double
namescodeprevnextTop
 Arg: optional, names as one string
 Description: Gets/sets the gene's names
 Returntype: string
newcodeprevnextTop
 Arg1: Treefam::DBConnection
 Arg2: optional, gene ID
 Description: Creates a new gene object.
 Returntype: Treefam::Gene
positioncodeprevnextTop
 Arg: optional, integer position of gene (coding start) on
      chromosome
 Description: Gets/sets position of gene (coding start) on
              chromosome. Returns 5'-most position in case
              of multiple starts.
 Returntype: integer
sequence_idcodeprevnextTop
 Arg: optional, id of the gene's representative sequence
 Description: Gets/sets the gene's representative sequence used
              in the trees
 Returntype: string
speciescodeprevnextTop
 Arg1: optional, type of name: latin or swcode
       (5 letters species name used in Swissprot)
 Arg2: optional, species name
 Description: Gets/sets gene's species. Returns latin name
              by default.
 Returntype: string
symbolcodeprevnextTop
 Arg: optional, gene symbol
 Description: Gets/sets gene's symbol
 Returntype: string
transcriptscodeprevnextTop
 Arg: optional, list of transcript IDs
 Description: Gets/sets transcripts associated with gene
 Returntype: list of transcript IDs
Methods code
IDdescriptionprevnextTop
sub ID {
  my $self = shift;
  $self->{'ID'} = shift if @_;
  return $self->{'ID'};
}
chromosomedescriptionprevnextTop
sub chromosome {
   my ($self,$chrom) = @_;
   $self->{'chromosome'} = $chrom if $chrom;
   if (!defined $self->{'chromosome'}) {
    my $seqID = $self->sequence_id;
    my $dbc = $self->{'DBConnection'};
    my $dbh = $dbc->{'database_handle'};
    my $sth = $dbh->prepare("SELECT m.TARGET
                             FROM map m WHERE m.ID = ?");
    $sth->execute($seqID);
    ($self->{'chromosome'})=$sth->fetchrow_array();
    $sth->finish();
  }
  return $self->{'chromosome'};
}
familydescriptionprevnextTop
sub family {
  my $self = shift;
  $self->{'family'} = shift if @_;
  if (!defined $self->{'family'}) {
    my $dbc = $self->{'DBConnection'};
    my $famh = $dbc->get_FamilyHandle();
    $self->{'family'} = $famh->get_by_gene($self);
  }

  return $self->{'family'};
}
get_domainsdescriptionprevnextTop
sub get_domains {
  my $self = shift;
  my $cutoff = shift if @_;
  if(!$cutoff) {
    $cutoff = 1e-2;
  }
  if (!$self->{'domains'}) {
    my $dbc = $self->{'DBConnection'};
    my $dbh = $dbc->{'database_handle'};
    my $geneID = $self->sequence_id;
    my %seen;
    my $query = qq(SELECT DISTINCT PFAM_ID FROM pfam WHERE ID= ? AND EVALUE<$cutoff);
    my $sth = $dbh->prepare($query);
    $sth->execute($geneID);
    while (my ($pfamid) = $sth->fetchrow_array()) {
      push @{$self->{'domains'}},$pfamid if ($pfamid && !$seen{$pfamid}++);
    }
  }
  # remove undefined values
@{$self->{'domains'}} = grep { $_ } @{$self->{'domains'}}; return @{$self->{'domains'}} if (defined($self->{'domains'}));
}
get_orthologsdescriptionprevnextTop
sub get_orthologs {
  my $self = shift;
  my @species = @_ if @_;
  if (!defined $self->{'orthologs'}) {
    my $dbc = $self->{'DBConnection'};
    my $dbh = $dbc->{'database_handle'};
    my $geneID = $self->ID;
    my $spec = $self->species();
    my $genequery = qq(SELECT IDX FROM genes WHERE GID= ?);
    my $sth0 = $dbh->prepare($genequery);
    # get all orthologs/paralogs
my $orthoquery1 = qq (SELECT DISTINCT g.GID, sp.taxname FROM genes g, ortholog o, species sp WHERE o.idx1 = ? AND o.idx2 = g.idx AND g.TAX_ID = sp.TAX_ID ); my $orthoquery2 = qq (SELECT DISTINCT g.GID, sp.taxname FROM genes g, ortholog o, species sp WHERE o.idx2 = ? AND o.idx1 = g.idx AND g.TAX_ID = sp.TAX_ID ); my $sth1 = $dbh->prepare($orthoquery1); my $sth2 = $dbh->prepare($orthoquery2); $sth0->execute($geneID); while (my ($idx) = $sth0->fetchrow_array()) { foreach my $tmpsth($sth1,$sth2) { $tmpsth->execute($idx); while (my($orthologID,$species) = $tmpsth->fetchrow_array()) { next if ($species eq $spec); # discard paralogs
my $gh = $dbc->get_GeneHandle(); my $ortholog = $gh->get_by_id($orthologID); if ($ortholog) { push @{$self->{'orthologs'}},$ortholog; $ortholog->species('latin',$species); } } } } } if (defined($self->{'orthologs'})) { my %wanted; # reduce list to the requested species
if (@species) { @wanted{@species} = (1) x @species; @{$self->{'orthologs'}} = grep { $wanted{$_->species} } @{$self->{'orthologs'}}; } return @{$self->{'orthologs'}}; } else { return (); }
}
get_paralogsdescriptionprevnextTop
sub get_paralogs {
  my $self = shift;
  if (!defined $self->{'paralogs'}) {
    my $dbc = $self->{'DBConnection'};
    my $dbh = $dbc->{'database_handle'};
    my $geneID = $self->ID;
    my $spec = $self->species();
    my $genequery = qq(SELECT IDX FROM genes WHERE GID= ?);
    my $sth0 = $dbh->prepare($genequery);
    # get all orthologs/paralogs
my $orthoquery1 = qq (SELECT DISTINCT g.GID, sp.taxname FROM genes g, ortholog o, species sp WHERE o.idx1 = ? AND o.idx2 = g.idx AND g.TAX_ID = sp.TAX_ID ); my $orthoquery2 = qq (SELECT DISTINCT g.GID, sp.taxname FROM genes g, ortholog o, species sp WHERE o.idx2 = ? AND o.idx1 = g.idx AND g.TAX_ID = sp.TAX_ID ); my $sth1 = $dbh->prepare($orthoquery1); my $sth2 = $dbh->prepare($orthoquery2); $sth0->execute($geneID); while(my ($idx) = $sth0->fetchrow_array()) { foreach my $tmpsth($sth1,$sth2) { $tmpsth->execute($idx); while (my($paralogID,$species) = $tmpsth->fetchrow_array()) { next if ($species ne $spec || $paralogID eq $geneID); # discard orthologs and self
my $gh = $dbc->get_GeneHandle(); my $paralog = $gh->get_by_id($paralogID); if ($paralog) { push @{$self->{'paralogs'}},$paralog; } } } } } if (defined($self->{'paralogs'})) { return @{$self->{'paralogs'}}; } else { return (); }
}
hmmer_evaluedescriptionprevnextTop
sub hmmer_evalue {
  my $self = shift;
  my $family = shift if @_;
  my $evalue = shift if @_;
  if (!$family) {
    $family = $self->family();
    return undef unless $family;
  }
  my $familyID = ref($family)? $family->ID : $family;
  if ($familyID && $evalue) {
    $self->{'hmmer_evalue'}{$familyID} = $evalue;
  }
  if ($familyID && !defined $self->{'hmmer_evalue'}) {
    my $dbc = $self->{'DBConnection'};
    my $dbh = $dbc->{'database_handle'};
    my $geneID = $self->ID;
    my $sth = $dbh->prepare("SELECT h.evalue
                             FROM hmmer h, genes g
                             WHERE g.gid = ?
                             AND g.id = h.id
                             AND h.ac = ?");
    $sth->execute($geneID,$familyID);
    ($self->{'hmmer_evalue'}{$familyID}) = $sth->fetchrow_array();
    $sth->finish();
  }

  return $self->{'hmmer_evalue'}{$familyID};
}
hmmer_scoredescriptionprevnextTop
sub hmmer_score {
  my $self = shift;
  my $family = shift if @_;
  my $score = shift if @_;
  if (!$family) {
    $family = $self->family();
    return undef unless $family;
  }
  my $familyID = ref($family)? $family->ID : $family;
  if ($familyID && $score) {
    $self->{'hmmer_score'}{$familyID} = $score;
  }
  if ($familyID && !defined($self->{'hmmer_score'}{$familyID})) {
    my $dbc = $self->{'DBConnection'};
    my $dbh = $dbc->{'database_handle'};
    my $geneID = $self->ID;
    my $sth = $dbh->prepare("SELECT h.score
                             FROM hmmer h, genes g
                             WHERE g.gid = ?
                             AND g.id = h.id
                             AND h.ac = ?");
    $sth->execute($geneID,$familyID);
    ($self->{'hmmer_score'}{$familyID}) = $sth->fetchrow_array();
    $sth->finish();
  }

  return $self->{'hmmer_score'}{$familyID} if $familyID;
}
namesdescriptionprevnextTop
sub names {
  my ($self,$names) = @_;
  $self->{'names'} = $names if $names;
  if (!defined $self->{'names'}) {
    my $geneID = $self->{'ID'};
    my $dbc = $self->{'DBConnection'};
    my $dbh = $dbc->{'database_handle'};
    my $sth = $dbh->prepare("SELECT DISTINCT g.desc
                             FROM genes g
                             WHERE g.gid= ?");
    $sth->execute($geneID);
    ($self->{'names'})=$sth->fetchrow_array();
    $sth->finish();
  }
  return $self->{'names'};
}
newdescriptionprevnextTop
sub new {
  my ($class,$dbc,$geneID) = @_;
  my $self = {};
  $self->{'DBConnection'} = $dbc;
  weaken($self->{'DBConnection'});
  my $dbh = $dbc->{'database_handle'};

  $self->{'ID'} = $geneID;

  bless ($self, $class);

  return $self;
}
positiondescriptionprevnextTop
sub position {
  my ($self,$pos) = @_;
  $self->{'position'} = $pos if $pos;
  if (!defined $self->{'position'}) {
    my $dbc = $self->{'DBConnection'};
    my $dbh = $dbc->{'database_handle'};
    my @transcripts = $self->transcripts;
    my $position;
    my $sth = $dbh->prepare("SELECT m.C_START, m.C_STOP, m.STRAND
                             FROM map m, genes g
                             WHERE g.TID = ? 
                             AND g.ID = m.ID");
    foreach my $transcript(@transcripts) {
      $sth->execute($transcript);
      while ((my @array) = $sth->fetchrow_array) {
	# Note that if a transcript is on the minus strand, eg. if it is from
# 1000-2000 on the minus strand, then start=1000 stop=2000.
my $start = $array[0] + 1; # The coordinates are counted from 0.
my $end = $array[1] + 1; my $strand = $array[2]; if ($strand eq '-') { ($start,$end) = ($end,$start); } if (!(defined($position))) { $position =$start; } else { # If the gene is on the minus strand, we want the coding start,
# eg. 2000 for a gene on the minus strand from 1000-2000.
if ($strand eq '+') { if ($start < $position) { $position = $start; } } elsif ($strand eq '-') { if ($start > $position) { $position = $start; } } } } $sth->finish(); $self->{'position'} = $position; } } return $self->{'position'};
}
sequence_iddescriptionprevnextTop
sub sequence_id {
  my $self = shift;
  $self->{'sequence_id'} = shift if @_;
  if (!defined $self->{'sequence_id'}) {
    my $geneID = $self->{'ID'};
    my $dbc = $self->{'DBConnection'};
    my $dbh = $dbc->{'database_handle'};
    my $sth = $dbh->prepare("SELECT DISTINCT g.ID FROM genes g WHERE g.GID = ?");
    $sth->execute($geneID);
    ($self->{'sequence_id'}) = $sth->fetchrow_array();
    $sth->finish();
  }

  return $self->{'sequence_id'};
}
speciesdescriptionprevnextTop
sub species {
  my $self = shift;
  my $type = shift if @_;
  $type ||='latin';
  $self->{'species'} = shift if @_;
  if (!defined $self->{'species'} || (defined $self->{'species_name_type'} && defined $type && lc($type) ne lc($self->{'species_name_type'}))) {  # first request or request for a different name type e.g. latin name when we previously had swcode
my $geneID = $self->{'ID'}; my $dbc = $self->{'DBConnection'}; my $dbh = $dbc->{'database_handle'}; my $sth; if (defined($type) && lc($type) eq 'swcode') { $sth = $dbh->prepare("SELECT DISTINCT s.swcode FROM species s, genes g WHERE g.GID = ? AND g.tax_id=s.tax_id"); } else { $sth = $dbh->prepare("SELECT DISTINCT s.taxname FROM species s, genes g WHERE g.GID = ? AND g.tax_id=s.tax_id"); } $sth->execute($geneID); ($self->{'species'}) = $sth->fetchrow_array(); $sth->finish(); } $self->{'species_name_type'} = defined($type) && lc($type) eq 'swcode' ? 'SWCODE' : 'LATIN'; return $self->{'species'};
}
symboldescriptionprevnextTop
sub symbol {
  my $self = shift;
  $self->{'symbol'} = shift if @_;
  if (!defined $self->{'symbol'}) {
    my $geneID = $self->{'ID'};
    my $dbc = $self->{'DBConnection'};
    my $dbh = $dbc->{'database_handle'};
    my $sth = $dbh->prepare("SELECT DISTINCT g.symbol FROM genes g WHERE g.GID = ?");
    $sth->execute($geneID);
    ($self->{'symbol'}) = $sth->fetchrow_array();
    $sth->finish();
  }

  return $self->{'symbol'};
}
transcriptsdescriptionprevnextTop
sub transcripts {
  my ($self,@transcripts) = @_;
  push (@{$self->{'transcripts'}},@transcripts) if @transcripts;
  if (!defined @{$self->{'transcripts'}}) {
    my $geneID = $self->{'ID'};
    my $dbc = $self->{'DBConnection'};
    my $dbh = $dbc->{'database_handle'};
    my $sth = $dbh->prepare("SELECT DISTINCT TID
                             FROM genes
                             WHERE gid= ?");
    $sth->execute($geneID);
    while (my ($transcript)=$sth->fetchrow_array()) {
      push (@{$self->{'transcripts'}},$transcript);
    }
  }
  return @{$self->{'transcripts'}};
}
General documentation
CONTACTTop
 jkh1@sanger.ac.uk