paradiseo/contribution/branches/PhyloMOEA/PhyloMOEA-serial/PhyloMOEA/phylotreeIND.cpp

2161 lines
57 KiB
C++

/***************************************************************************
* Copyright (C) 2005 by Waldo Cancino *
* wcancino@icmc.usp.br *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#include <phylotreeIND.h>
#include <iostream>
#include <map>
#include <sstream>
#include <gsl/gsl_randist.h>
#include <cmath>
#include <iterator>
#include <ExceptionManager.h>
using namespace std;
struct temp_info
{
int left, right, num_nodes;
node left_most;
temp_info() {};
temp_info(int n) : left(n), right(-1), num_nodes(0) {};
};
// return the element at position pos of the list
template <typename T> const T& select_edge_at_pos( const list <T> &l, int pos)
{
if(pos >= l.size())
return l.back();
typename list<T>::const_iterator it =l.begin();
for( int i=0 ; i < pos; i++)it++;
return *it;
}
template const int& select_edge_at_pos<int>( const list <int> &, int );
double phylotreeIND::randomgsl_alpha = 500;
// ---------------- constructors
// constructor
phylotreeIND::~phylotreeIND()
{
graph::edge_iterator it = TREE.edges_begin();
graph::edge_iterator it_e = TREE.edges_end();
if(split_bits!=NULL)delete [] split_bits;
}
phylotreeIND::phylotreeIND( RandomNr *g, Sequences &p, gsl_rng *gsr) :
random_gsl(gsr), randomNr(g)
{
seqpatterns = &p;
root = NULL;
valid_splits = false;
split_bits = NULL;
// init maps
MAPNODETAXON.init(TREE);
nnode = p.num_seqs();
MAPTAXONNODE.resize(nnode);
MAPIDEDGE.resize( 2*nnode - 3);
NJDISTANCES.resize( 2*nnode - 3);
//split_bits = new bool [ (2*nnode-3) * (2*nnode-2)];
// crete the taxons
for(int i=0; i < nnode; i++)
{
new_taxon(i);
}
}
// copy constructor
phylotreeIND::phylotreeIND( const phylotreeIND &org) : random_gsl( org.random_gsl), randomNr(org.randomNr)
{
// copy the data of IND class
split_bits = NULL;
copy(org);
}
// copy an individual
void phylotreeIND::copy( const phylotreeIND &C )
{
root = C.root;
parsimony = C.parsimony;
valid_splits = false;
// likelihood = C.likelihood;
nnode = C.nnode;
seqpatterns = C.seqpatterns;
// map nodes and exes
list<node> cnodes = C.TREE.all_nodes();
list<edge> cedges = C.TREE.all_edges();
if(split_bits != NULL) delete [] split_bits;
split_bits = new bool [ (2*nnode-3) * (2*nnode-2)];
TREE.clear();
MAPNODETAXON.init(TREE);
MAPIDEDGE.resize( 2*nnode -3);
NJDISTANCES.resize( 2*nnode -3 );
MAPTAXONNODE.resize( nnode );
construct_graph( C, cnodes, cedges);
// if is a tree constructed, calculate the splits
if( nnode < TREE.number_of_nodes() )
this->calculate_splits();
}
// clone functions
// copy the entire individual
phylotreeIND* phylotreeIND::clone() const
{
phylotreeIND* p=new phylotreeIND(*this);
return p;
}
// copy only the taxons and perform an stepwise addition
phylotreeIND* phylotreeIND::randomClone() const
{
// clone the individual (only taxons)
phylotreeIND* p=new phylotreeIND( *this);
p->stepwise_addition();
// set the branch lenghts
graph::edge_iterator it = p->TREE.edges_begin();
graph::edge_iterator it_e = p->TREE.edges_end();
// init the edge lenghts
while( it != it_e)
{
p->set_branch_length(*it, randomNr->doubleuniformMinMax(0, 0.05));
++it;
}
return p;
}
// change the data set
void phylotreeIND::set_data( Sequences &s)
{
if ( s.num_seqs() != seqpatterns->num_seqs() )
{
cout << "error --> incompatible data change\n";
exit(1);
}
else if(seqpatterns!=&s)
{
//cout << "data change on individual...\n";
seqpatterns = &s;
}
}
// create an internal node
node phylotreeIND::new_internal_node()
{
node n = TREE.new_node();
//assert( n.id() < 2*nnode-2);
MAPNODETAXON[n] = -1;
return n;
}
// create a node that maps the sequence id
node phylotreeIND::new_taxon( int id)
{
node n = new_internal_node();
//node n = TREE.new_node();
//assert( n.id() < 2*nnode-2);
MAPNODETAXON[n] = id;
MAPTAXONNODE[id] = n;
return n;
}
edge phylotreeIND::new_branch( node source, node dest)
{
edge edgeaux = TREE.new_edge(source, dest);
MAPIDEDGE[edgeaux.id()] = edgeaux;
return edgeaux;
}
edge phylotreeIND::new_branch( node source, node dest, double bl)
{
edge edgeaux = new_branch(source, dest);
set_branch_length(edgeaux, bl);
return edgeaux;
}
void phylotreeIND::remove_branch( edge edgeaux)
{
MAPIDEDGE[edgeaux.id()] = invalid_edge;
TREE.del_edge(edgeaux);
}
// select edge and a pos
edge phylotreeIND::select_edge() const
{
int j;
edge edgeaux;
do
{
j =randomNr->uniform0Max(MAPIDEDGE.size());
edgeaux = MAPIDEDGE[j];
}while (edgeaux == invalid_edge);
return edgeaux;
}
// select an edge off-side subtree defined by source_edge
edge phylotreeIND::select_edge_outsidetree( edge source_edge) const
{
return choose_edge_fromside( source_edge.id() * TREE.number_of_nodes(), false);
}
// select and edge from a side of a split
edge phylotreeIND::choose_edge_fromside( int idx, bool side ) const
{
edge edgeaux;
do
{
edgeaux = select_edge();
assert( edgeaux.source().id() < 2*nnode-2);
assert( edgeaux.target().id() < 2*nnode-2);
}
while( split_bits[ idx + edgeaux.source().id()] !=side &&
split_bits[ idx + edgeaux.target().id()] != side);
return edgeaux;
}
// genetic operators
// change subtrees to form childs
void phylotreeIND::crossover(float pcross, const phylotreeIND& dad, phylotreeIND*& sis, phylotreeIND*& bro) const
{
// prevent discard qualifiers erros (because const function)
phylotreeIND &mom = (phylotreeIND &)(*this);
phylotreeIND &dady = (phylotreeIND &)dad;
sis = dady.clone();
bro = mom.clone();
phylotreeIND &sister = (phylotreeIND &)(*sis);
phylotreeIND &brother = (phylotreeIND &)(*bro);
if( randomNr->flipP(pcross) )
{
// swap subtreesf
mom.export_subtree( sister );
dady.export_subtree( brother);
sister.invalidate_splits();
brother.invalidate_splits();
}
}
void phylotreeIND::mutate(float pmut)
{
double type = randomNr->ran2();
//double type = 0.5;
if( !splits_valid() ) calculate_splits();
if( randomNr->flipP(pmut) )
{
// both kinds of mutatio if type>0.67
// topological mutation
if( type < 0.34 || type > 0.67 )
{
//TBR();
//change_subtrees();
NNI();
//SPR();
invalidate_splits();
// branch mutation
}
if( type > 0.34 )
{
// no topological change
mutate_branch_lenght( pmut);
//this->invalidateCaches();
}
}
}
void phylotreeIND::mutate_branch_lenght(float pmut)
{
double gamma;
graph::edge_iterator it = TREE.edges_begin();
graph::edge_iterator it_e = TREE.edges_end();
while(it!= it_e)
{
if (randomNr->flipP(0.5))
{
gamma = gsl_ran_gamma (random_gsl, randomgsl_alpha, (1.0/randomgsl_alpha) );
if( get_branch_length(*it) * gamma != 0 )
set_branch_length( *it, get_branch_length(*it) * gamma);
}
++it;
}
}
// select an internal edge randomly
edge phylotreeIND::choose_internal_edge() const
{
edge edgeaux;
do
edgeaux = select_edge();
while( istaxon(edgeaux.source()) || istaxon(edgeaux.target()) );
return edgeaux;
}
// create a graph of n nodes whithout edges
void phylotreeIND::init()
{
}
// implements the crossover_operator of GAML (Lewis, 1998)
// place the result in TREE2
void phylotreeIND::crossover_gaml(phylotreeIND &TREE2)
{
graph::edge_iterator edgeaux = TREE.edges_end(); // point to subtree in parent 1
edgeaux--;edgeaux--;
node root1, parent1; // point to the root of the subtree in parent1
list<edge> subtree_edges;
list<node> subtree_nodes;
// select the root of the tree to be pruned (the root is market with 1 in the split
// corresponding to edgeaux
root1 = split(*edgeaux, edgeaux->source()) ? edgeaux->source() : edgeaux->target();
// select the root of the subtree pruned
parent1 = root1 == edgeaux->source() ? edgeaux->target() : edgeaux->source();
// save the distance of edgeaux
double dist = get_branch_length(*edgeaux);
obtain_subtree( root1, &parent1, &subtree_edges, &subtree_nodes);
// eliminate the taxons in subtree from TREE2
list<node>::iterator it= subtree_nodes.begin();
list<node>::iterator it_e= subtree_nodes.end();
while( it != it_e )
{
if( istaxon(*it) ) TREE2.remove_taxon( MAPNODETAXON[*it] );
it++;
}
// point to the last node in TREE2
graph::node_iterator root2 = TREE2.TREE.nodes_end(); root2--;
// construct the subtree1 in the TREE2
TREE2.construct_graph( *this, subtree_nodes, subtree_edges);
// connect the subtree created in an edge of TREE2;
graph::edge_iterator edgeaux2 = TREE2.TREE.edges_begin();
// the root of the new subtree in TREE2 is root2++ given the root2 was
// inserted at end of the list of nodes
TREE2.insert_node_in_edge( *(++root2) , *edgeaux2, dist);
}
// copy a subtree to another individual, preventing the duplicated nodes
// when applied two times is the same that interchange subtrees
// note: dest tree MUST have the same number of taxas (related to the same problem)
void phylotreeIND::export_subtree( phylotreeIND &dest)
{
// DEBUG INFO
graph::edge_iterator itdebug = TREE.edges_begin();
// subtree nodes and edges
list<node> subtree_nodes;
list<edge> subtree_edges;
// points to the root of the subtree and the parent of the subtree
node root_subtree, parent_subtree;
edge source_edge, dest_edge;
// select the subtree to be exported
source_edge = select_edge();
//select_edge_at_pos( TREE.all_edges(), j);
// save the distance of the edge comming to the root_subtree
double dist = get_branch_length(source_edge);
root_subtree = split(source_edge,source_edge.source()) ? source_edge.source() : source_edge.target();
// points to the parent of the subtree in order to do the percurse
// DEBUG INFO
//cout << "raiz da arvore" << root_subtree << endl;
parent_subtree = source_edge.opposite(root_subtree);
// obtain the subtree
obtain_subtree( root_subtree, &parent_subtree, &subtree_edges, &subtree_nodes);
// eliminate duplicate taxons in dest
list<node>::iterator it = subtree_nodes.begin();
list<node>::iterator it_e = subtree_nodes.end();
while(it != it_e)
{
if( istaxon(*it) ) dest.remove_taxon( MAPNODETAXON[ *it] );
++it;
}
// DEBUG INFO
//cout << "arvore destino podada" << endl;
//dest.printtree();
// select the destination edge in dest
//dest_edge = select_edge_at_pos( dest.TREE.all_edges(), j);
dest_edge = dest.select_edge();
// help to find the root of the subtree regrafted
// DEBUG INFO
//cout << "eixo regrafting" << dest_edge << endl;
graph::node_iterator dest_root_subtree = --dest.TREE.nodes_end();
// copy the subtree in dest
dest.construct_graph( *this, subtree_nodes, subtree_edges);
// now, as dest_parent_node point the last node in dest before insertion,
// then dest_parent_node++ point to the root of the subtree recently inserted
// Only remain insert it to the dest_edge in order to connect the tree
dest.insert_node_in_edge( *(++dest_root_subtree), dest_edge, dist);
//DEBUG INFO
itdebug = dest.TREE.edges_begin();
}
// parsigal crossover work in rooted an unrooted trees
// note: crossover_gaml and parsigal
void phylotreeIND::crossover_parsigal( phylotreeIND & TREE2,phylotreeIND & SON1, phylotreeIND & SON2)
{
list<node> subtree1_nodes, subtree2_nodes;
list<edge> subtree1_edges, subtree2_edges;
graph::edge_iterator crossoverpoint1, crossoverpoint2, edgeaux1, edgeaux2;
node root1, root2;
node parent1, parent2;
edgeaux1 = TREE.edges_begin();
edgeaux1++;edgeaux1++;edgeaux1++;
edgeaux2 = TREE2.TREE.edges_end();
edgeaux2--;edgeaux2--;
SON1.copy(*this);
SON2.copy(TREE2);
// works from both rooted and unrooted trees
root1 = split(*edgeaux1,edgeaux1->source()) ? edgeaux1->source() : edgeaux1->target();
root2 = TREE2.split(*edgeaux2,edgeaux2->source()) ? edgeaux2->source() : edgeaux2->target();
// its imposible to swap the entire tree
if(root!=NULL && (root1 == *root || root2 == *TREE2.root)) return;
// again works from both rooted and unrooted
parent1 = root1==edgeaux1->source() ? edgeaux1->target() : edgeaux1->source();
parent2 = root2==edgeaux2->source() ? edgeaux2->target() : edgeaux2->source();
// point to the position to graft the subtrees
obtain_subtree( root1, &parent1, &subtree1_edges, &subtree1_nodes);
TREE2.obtain_subtree( root2, &parent2, &subtree2_edges, &subtree2_nodes);
// eliminate taxons from the subtree1 in SON2 and subtree2 in SON1
list<node>::iterator it = subtree1_nodes.begin();
list<node>::iterator it_e = subtree1_nodes.end();
while(it != it_e)
{
if( istaxon(*it) ) SON2.remove_taxon( MAPNODETAXON[ *it] );
++it;
}
it = subtree2_nodes.begin();
it_e = subtree2_nodes.end();
while(it != it_e)
{
if( TREE2.istaxon(*it) ) SON1.remove_taxon( TREE2.MAPNODETAXON[ *it] );
++it;
}
std::copy( SON1.TREE.edges_begin() , SON1.TREE.edges_end(), ostream_iterator<edge>( cout, " , " ));
cout << endl;
std::copy( SON2.TREE.edges_begin() , SON2.TREE.edges_end(), std::ostream_iterator<edge>( cout, " , " ));
cout << endl;
// select the crossover sites in son1 and son2
crossoverpoint1 = SON1.TREE.edges_begin();
crossoverpoint2 = SON2.TREE.edges_begin();
// select the root of the subtree to be conected
graph::node_iterator connect_1 = SON1.TREE.nodes_end(); connect_1--;
graph::node_iterator connect_2 = SON2.TREE.nodes_end(); connect_2--;
SON1.construct_graph( TREE2, subtree2_nodes, subtree2_edges);
SON2.construct_graph( *this, subtree1_nodes, subtree1_edges);
// conect to the parent 1
SON1.insert_node_in_edge( *(++connect_1), *crossoverpoint1);
SON2.insert_node_in_edge( *(++connect_2), *crossoverpoint2);
for(int i=0; i < SON1.nnode; i++) cout << i << "--> " << SON1.MAPTAXONNODE[i] << endl;
for(int i=0; i < SON2.nnode; i++) cout << i << "--> " << SON2.MAPTAXONNODE[i] << endl;
}
void phylotreeIND::remove_taxon( int id)
{
node delnode = MAPTAXONNODE[id];
// verify remaining two nodes in tree
if (TREE.number_of_nodes() < 3) TREE.del_node(delnode);
else
{
edge edgeaux = *delnode.in_edges_begin();
node parent = edgeaux.source();
remove_branch(edgeaux);
TREE.del_node(delnode);
// added by me
collapse_node(parent);
}
}
// this function gerate the list of edges that are separated by the
// edge edgeaux
void phylotreeIND::separategraphs(edge edgeaux, list<edge> &graph1, list<edge> &graph2)
{
// map to the visited edges
node nodeaux = edgeaux.target();
obtain_subtree(edgeaux.source(), &nodeaux, &graph1, NULL);
nodeaux = edgeaux.source();
obtain_subtree(edgeaux.target(), &nodeaux, &graph2, NULL);
}
void phylotreeIND::calculate_splits_exp()
{
// init the split memory
edge edgeaux = choose_internal_edge();
allocate_split_memory();
memset( split_bits, false, (2*nnode-3)*(2*nnode-2)*sizeof(bool));
//cout << "memoria alocada......" << endl;
postorder_Iterator it = postorder_begin( edgeaux.source());
// calculate the splits
while(*it!=edgeaux.source())
{
int idx_begin1 = it.branch().id()*TREE.number_of_nodes();
split_bits[ idx_begin1 + (*it).id() ] = true;
int ones = 0;
if( !istaxon(*it) )
{
child_Iterator son = child_begin( *it, it.ancestor() );
child_Iterator son_end = child_end( *it, it.ancestor() );
while( son != son_end)
{
int idx_begin2 = son.branch().id()*TREE.number_of_nodes();
for(int i=0; i< TREE.number_of_nodes(); i++)
{
split_bits[idx_begin1 + i] = split_bits[idx_begin1 + i] || split_bits[idx_begin2 + i];
if( split_bits[idx_begin2 + i] ) ones++;
}
// the son splits is not used anymore
if (ones >= TREE.number_of_nodes()/2)invert_split( son.branch() );
++son;
}
}
++it;
}
valid_splits = true;
//print_splits();
}
void phylotreeIND::calculate_splits()
{
allocate_split_memory();
memset( split_bits, false, (2*nnode-3)*(2*nnode-2)*sizeof(bool));
//fill(split_bits.begin(), split_bits.end(), false);
if( root!=NULL)
{
int i;
cin >> i;
node::out_edges_iterator it = root->out_edges_begin();
node::out_edges_iterator it_e = root->out_edges_end();
while ( it != it_e )
{
calculate_splits_from_edge2( *it, it->target());
it++;
}
}
else
{
graph::edge_iterator edgeaux_it = TREE.edges_begin();
graph::edge_iterator edgeaux_it_e = TREE.edges_end();
edge edgeaux;
// select an internal edge
while( edgeaux_it!= edgeaux_it_e )
{
if( ! istaxon( edgeaux_it->target() ) ) break;
edgeaux_it++;
}
// else select any edge
if(edgeaux_it==TREE.edges_end())edgeaux_it=TREE.edges_begin();
edgeaux = *edgeaux_it;
calculate_splits_from_edge2( edgeaux, edgeaux.source());
// clean the splits
memset( split_bits + edgeaux.id()*TREE.number_of_nodes(), false,
TREE.number_of_nodes());
calculate_splits_from_edge2( edgeaux, edgeaux.target());
int ones = 0;
int nn = TREE.number_of_nodes();
int idx_begin = edgeaux.id() * nn;
for( int i=0; i < nn; i++)
if( split_bits[ idx_begin + i] )ones++;
if (ones >= TREE.number_of_nodes()/2)
invert_split( edgeaux );
// display splits
}
valid_splits = true;
}
void phylotreeIND::calculate_splits_from_edge2(edge edgeaux, node from)
{
node::inout_edges_iterator it = from.inout_edges_begin();
node::inout_edges_iterator it_e = from.inout_edges_end();
//cout << "chamando --> " << edgeaux << "," << from << endl;
//SPLITS[edgeaux].clear();
//SPLITS[edgeaux].init(TREE,0);
int idx_begin1 = edgeaux.id()*TREE.number_of_nodes();
// clean the split
//memset( &split_bits[idx_begin1], 0, TREE.number_of_nodes());
if( edgeaux.id() >= 2*nnode-3 || from.id() >= 2*nnode-2)
{
cout << "edge_id:" << edgeaux.id() << " node_id:" << from.id() << endl;
assert( edgeaux.id() < 2*nnode-3 && from.id() < 2*nnode-2);
}
split_bits[ idx_begin1 + from.id() ] = true;
//SPLITS[edgeaux][from] = 1;
if( istaxon(from) ) return;
while( it != it_e )
{
if( *it != edgeaux )
{
node target = it->source() == from ? it->target() : it->source();
calculate_splits_from_edge2(*it, target);
int n = TREE.number_of_nodes();
int ones = 0;
int idx_begin2 = it->id()*TREE.number_of_nodes();
for(int i=0; i<n; i++)
{
split_bits[idx_begin1 + i] = split_bits[idx_begin1 + i] || split_bits[idx_begin2 + i];
if( split_bits[idx_begin2 + i] ) ones++;
}
if (ones >= TREE.number_of_nodes()/2)
invert_split( *it );
}
it++;
}
// invert the split in the case that the graph is unrooted
}
void phylotreeIND::print_splits() const
{
graph::edge_iterator it = TREE.edges_begin();
graph::edge_iterator it_e = TREE.edges_end();
while( it!= it_e)
{
cout << get_split_key( *it) << endl;
//print_split(*it);
++it;
}
}
void phylotreeIND::print_split(edge edgeaux) const
{
graph::node_iterator it = TREE.nodes_begin();
graph::node_iterator it_e = TREE.nodes_end();
cout << "eixo " << edgeaux << "divide" << endl;
while( it != it_e )
{
if( !split(edgeaux,*it) ) cout << *it << " - ";
else cout << " *" << *it << " - ";
it++;
}
cout << endl;
}
// invert the zeros and ones in the split
void phylotreeIND::invert_split( edge edgeaux)
{
int n= TREE.number_of_nodes();
int idx_begin = edgeaux.id()*n;
for(int i=0; i< n; i++)
split_bits[idx_begin+i] = !split_bits[idx_begin+i];
}
// return if an edgeaux is what set of the split in consensus
// 0 -> the taxas in edgeaux are in the "0" side
// 1 -> the taxas in edgeaux are in the "1" side
int phylotreeIND::split_set_edge( edge consensus, edge edgeaux)
{
graph::node_iterator it = TREE.nodes_begin();
graph::node_iterator it_e = TREE.nodes_end();
int setconsensus = -1;
int setedgeaux, setedgeaux2;
bool flag = true;
while(it != it_e)
{
// only check taxons
if(istaxon(*it))
{
if(setconsensus == -1){
setedgeaux = split(edgeaux,*it);
setconsensus = split(consensus,*it);
}
// checks only for taxon from split in edgeaux whose split taxon values = setedgeaux
// if this value don't coincide with setconsensus then the corresponding set
// in split of edgeaux is the opposite and finalize the loop
else if( split(edgeaux,*it) == setedgeaux && split(consensus,*it) != setconsensus )
flag = false;
else if( split(edgeaux,*it) != setedgeaux )
{
setedgeaux2 = split(consensus,*it);
if(!flag)break;
}
}
++it;
}
return ( flag ? setconsensus : setedgeaux2 );
}
void phylotreeIND::stepwise_addition()
{
list<node> auxlist = TREE.all_nodes();
edge edgeaux;
node node1, node2;
// choose two nodes randomly add
int j = randomNr->uniform0Max( auxlist.size() );
int k = randomNr->uniform0Max( auxlist.size() );
while(k == j)k = randomNr->uniform0Max(TREE.number_of_nodes());
// choose the nodes
node1 = select_edge_at_pos( auxlist, j);
node2 = select_edge_at_pos( auxlist, k);
// first pick two nodes and insert edge in it
//edgeaux = TREE.new_edge(node1, node2);
//MAPIDEDGE[edgeaux.id()] = edgeaux;
edgeaux = new_branch(node1, node2);
auxlist.remove( node1);
auxlist.remove( node2);
// the remaining edges
while( auxlist.size()>0 )
{
j = randomNr->uniform0Max( auxlist.size() );
node1 = select_edge_at_pos( auxlist, j);
insert_node_in_edge(node1, edgeaux);
auxlist.remove( node1);
// selec the next edge to add a new node
j = randomNr->uniform0Max( TREE.number_of_edges() );
edgeaux = select_edge_at_pos( TREE.all_edges(), j);
}
calculate_splits();
}
void phylotreeIND::start_decomposition()
{
list<node> auxlist = TREE.all_nodes();
node node1, node2, cluster;
edge edgeaux;
while( auxlist.size() > 2)
{
// pick two nodes followingan criteria
//cluster = TREE.new_node();
cluster = new_internal_node();
new_branch(cluster, node1);
new_branch(cluster, node2);
//edgeaux = TREE.new_edge( cluster, node1 );
//MAPIDEDGE[edgeaux.id()] = edgeaux;
//edgeaux = TREE.new_edge( cluster, node2 );
//MAPIDEDGE[edgeaux.id()] = edgeaux;
auxlist.remove( node1);
auxlist.remove( node2);
auxlist.push_back( cluster );
}
new_branch( cluster, *(auxlist.begin()) );
//edgeaux = TREE.new_edge( cluster, *(auxlist.begin()) );
//MAPIDEDGE[edgeaux.id()] = edgeaux;
}
void phylotreeIND::SPR()
{
// spr no makes sense in trees with less than four edges (three or less taxons)
if( TREE.number_of_edges() <5) return;
// choose and edge and divides the three
edge edgeaux1 = select_edge();
//select_edge_at_pos( TREE.all_edges(), j);
node nodeaux, delnode, nodeaux2;
// because the edgeaux will be deleted we save it split
int idx_begin = edgeaux1.id() * TREE.number_of_nodes();
// save the distance of edgeaux1
double d_edgeaux1 = get_branch_length(edgeaux1);
nodeaux = split(edgeaux1,edgeaux1.source()) ? edgeaux1.source() : edgeaux1.target();
// select the source preventing remove a taxon
delnode = edgeaux1.opposite(nodeaux);
// prune edge1 and graft in edge 2
//cout << "eixo escolhido " << edgeaux1 << endl;
remove_branch(edgeaux1);
//MAPIDEDGE[edgeaux1.id()] = invalid_edge;
//TREE.del_edge(edgeaux1);
collapse_node(delnode);
edge edgeaux2 = choose_edge_fromside(idx_begin, false);
// finally, regraft in edgeaux2
// insert the subtree rooted at nodeaux in edgeaux2;
insert_node_in_edge(nodeaux, edgeaux2, d_edgeaux1);
}
void phylotreeIND::TBR()
{
// tbr no makes sense in trees with less than four edges (three or less taxons)
if( TREE.number_of_edges() <7) return;
list<edge> tree1, tree2;
edge edgeaux1, edgeaux2, prune_edge;
node source, dest, nodeaux1, nodeaux2;
// select the source and dest edges
prune_edge = choose_internal_edge();
// the graph was divide by removing edgeaux1
int idx_begin = prune_edge.id() * TREE.number_of_nodes();
source = prune_edge.source();
dest = prune_edge.target();
//TREE.del_edge(prune_edge);
remove_branch(prune_edge);
// delete the source and target nodes*/
collapse_node(source);
collapse_node(dest);
edgeaux1 = choose_edge_fromside( idx_begin, true);
edgeaux2 = choose_edge_fromside( idx_begin, false);
// create the new nodes to be reconected in the respective subtrees
nodeaux1 = divide_edge(edgeaux1);
nodeaux2 = divide_edge(edgeaux2);
// reconect the tree
new_branch(nodeaux1, nodeaux2);
//edgeaux1 = TREE.new_edge(nodeaux1, nodeaux2);
//MAPIDEDGE[edgeaux1.id()] = edgeaux1;
}
// subtrees in the tree
void phylotreeIND::change_subtrees()
{
edge source_edge, dest_edge;
list <edge> remain_edges;
node root_subtree1, root_subtree2, parent_subtree1, parent_subtree2;
// select the edge of the subtrees
source_edge = select_edge();
root_subtree1 = split(source_edge, source_edge.source())
? source_edge.source() : source_edge.target();
// points to the parent of the subtree in order to do the percurse
parent_subtree1 = source_edge.opposite(root_subtree1);
dest_edge = select_edge_outsidetree(source_edge);
bool lado = !split(dest_edge,root_subtree1);
root_subtree2 = split(dest_edge,dest_edge.source()) == lado
? dest_edge.source() : dest_edge.target();
parent_subtree2 = dest_edge.opposite(root_subtree2);
// reconecting the nodes
if( parent_subtree1 == source_edge.source() )
source_edge.change_target(root_subtree2);
else
source_edge.change_source(root_subtree2);
if( parent_subtree2 == dest_edge.source() )
dest_edge.change_target(root_subtree1);
else
dest_edge.change_source(root_subtree1);
if( istaxon(source_edge.source()) ) source_edge.reverse();
if( istaxon(dest_edge.source()) ) dest_edge.reverse();
}
// divides an edge by creating a new node and it with the
// edge extrems and return the new node;
// set distance dist to new edge to be inserted
void phylotreeIND::insert_node_in_edge(node nodeaux, edge edgeaux, double dist)
{
// divide an edgeaux
node new_internal_node = divide_edge(edgeaux);
// new_node is not a taxon
//MAPNODETAXON[new_internal_node] = -1;
// connect the new node
new_branch( new_internal_node, nodeaux, dist);
//edge new_edge = TREE.new_edge( new_internal_node, nodeaux);
//set_branch_length(new_edge, dist);
//MAPIDEDGE[new_edge.id()] = new_edge;
}
// breaks an edge with an new root edge that joins the edgeaux extremes
node phylotreeIND::divide_edge(edge edgeaux)
{
// create the node that divides edgeaux
//node r = TREE.new_node();
//assert(r.id() < 2*nnode -2);
// because the edge is divided, the distance of the edge is also divided
node r = new_internal_node();
double d = get_branch_length(edgeaux)/2;
// join the edgeaux vertices to root
new_branch( r, edgeaux.source(), d);
//edge new_edge = TREE.new_edge( r, edgeaux.source());
// each part of the edge receives the half of the original distance
//set_branch_length(new_edge, d);
set_branch_length(edgeaux, d);
//MAPIDEDGE[new_edge.id()] = new_edge;
edgeaux.change_source(r);
return r;
}
void phylotreeIND::collapse_node(node delnode)
{
edge edgeaux; // new edge to be inserted
// case the delnode have a degree 3, the graph is valid
if(delnode.degree() > 2) return;
// the node is incomplete, remove them and link the adjacent noddes
else if(delnode.degree() == 2)
{
node adj1, adj2;
node::inout_edges_iterator it1, it2;
it1 = it2 = delnode.inout_edges_begin();
it2++;
adj1 = it1->opposite(delnode);
adj2 = it2->opposite(delnode);
// if adj1 is a taxon, connect adj2->adj1
if( istaxon(adj1) )
new_branch( adj2, adj1, get_branch_length(*it1) + get_branch_length(*it2));
//edgeaux = TREE.new_edge( adj2, adj1);
else
new_branch( adj1, adj2, get_branch_length(*it1) + get_branch_length(*it2));
//edgeaux = TREE.new_edge( adj1, adj2);
// set the new distance as the sum of edge lenghts
//set_branch_length( edgeaux, get_branch_length(*it1) + get_branch_length(*it2) );
//MAPIDEDGE[edgeaux.id()] = edgeaux;
remove_branch( *it1);
remove_branch( *it2);
//MAPIDEDGE[it1->id()] = invalid_edge;
//MAPIDEDGE[it2->id()] = invalid_edge;
}
// invalidate edges
TREE.del_node(delnode);
}
void phylotreeIND::collapse_zero_edges()
{
graph::edge_iterator it = TREE.edges_begin();
graph::edge_iterator it_end = TREE.edges_end();
graph::edge_iterator tmp;
node source, dest;
while(it!=it_end)
{
tmp = it;
++tmp;
if(is_internal( *it) && get_branch_length( *it) == 0)
{
source = it->source();
dest = it->target();
//cout << "apagando branch" << endl;
remove_branch(*it);
//cout << "reconectando nos..." << endl;
reconnect_nodes( source, dest );
}
it = tmp;
}
}
// connect all adjacent nodes from source to dest and delete source
void phylotreeIND::reconnect_nodes(node source, node dest)
{
node::inout_edges_iterator it = source.inout_edges_begin();
node::inout_edges_iterator it_end = source.inout_edges_end();
node::inout_edges_iterator tmp_it;
edge tmp;
while(it != it_end)
{
tmp = *it;
tmp_it = it;
++tmp_it;
if( it->source() == source)
tmp.change_source( dest);
else if(it->target() == source)
tmp.change_target( dest);
it = tmp_it;
}
TREE.del_node(source);
}
// select an neighboor defined by the edge edgeaux and node start
edge phylotreeIND::choose_neighboor(edge edgeaux, node start) const
{
int n_neighboors = start.degree();
// choose an neighbooord edge differnet from edgeaux
node::inout_edges_iterator it;
do
{
int k = randomNr->uniform0Max( n_neighboors );
it = start.inout_edges_begin();
for(int i=0; i < k; i++) it++;
}while( *it == edgeaux );
// select the neighboor
return *it;
}
void phylotreeIND::taxa_swap()
{
node node1, node2;
node1 = taxon_number( randomNr->uniform0Max( nnode ) );
node2 = taxon_number( randomNr->uniform0Max( nnode ) );
edge extern_edge1 = *node1.inout_edges_begin();
edge extern_edge2 = *node2.inout_edges_begin();
extern_edge1.change_target(node2);
extern_edge2.change_target(node1);
}
void phylotreeIND::NNI()
{
edge select_edge;
edge edge_neighboor1, edge_neighboor2;
node node_neighboor1, node_neighboor2;
// select an internal edge
select_edge = choose_internal_edge();
// choose the neighboors
edge_neighboor1 = choose_neighboor( select_edge, select_edge.source() );
node_neighboor1 = edge_neighboor1.opposite( select_edge.source() );
edge_neighboor2 = choose_neighboor( select_edge, select_edge.target() );
node_neighboor2 = edge_neighboor2.opposite( select_edge.target() );
//change the neighboors
if( edge_neighboor1.source() == node_neighboor1 )
edge_neighboor1.change_source( node_neighboor2);
else
edge_neighboor1.change_target( node_neighboor2);
if( edge_neighboor2.source() == node_neighboor2 )
edge_neighboor2.change_source( node_neighboor1);
else
edge_neighboor2.change_target( node_neighboor1);
if(istaxon( edge_neighboor1.source() ))edge_neighboor1.reverse();
if(istaxon( edge_neighboor2.source() ))edge_neighboor2.reverse();
}
void phylotreeIND::insert_root(edge edgeaux)
{
// first divide the edge with a new node that connects
// its vertices
root = new node(divide_edge(edgeaux));
// finally convert the graph to thee with root nodeaux
convert_graph_to_tree(*root, NULL);
}
void phylotreeIND::convert_graph_to_tree(node n, node *antecessor)
{
edge edgeaux;
// reverse all the incoming edges that are not coming from antecessor
node::inout_edges_iterator it,tmp;
it = tmp = n.inout_edges_begin();
node::inout_edges_iterator it_e = n.inout_edges_end();
while(it!= it_e)
{
//neccesary if the variable "it" is deferenced
tmp++;
edgeaux = *it;
if(antecessor==NULL || edgeaux.source()!=*antecessor)
{
// reverse the edge and deferefence it
if( edgeaux.source() != n)
edgeaux.reverse();
convert_graph_to_tree(edgeaux.target(), &n);
}
it=tmp;
}
}
// return the list of nodes and edges that conform the tree rooted at node root
void phylotreeIND::obtain_subtree(node n, node *antecessor, list <edge> *edgelist, list <node> *nodelist)
{
//cout << "chamado con ...." << n;
//cout << *antecessor << endl;
node::inout_edges_iterator it, tmp, it_e;
it = tmp = n.inout_edges_begin();
it_e = n.inout_edges_end();
edge edgeaux;
//
if(nodelist)nodelist->push_back(n);
while( it != it_e )
{
if(antecessor==NULL || (it->source()!=*antecessor && it->target()!=*antecessor) )
{
if(edgelist) edgelist->push_back(*it);
if(edgelist->size() > TREE.number_of_edges())
{
cout << "hubo un error..." << endl;
int lll;
cin >> lll;
}
if( it->source() == n )
obtain_subtree( it->target(), &n, edgelist, nodelist);
else obtain_subtree( it->source(), &n, edgelist, nodelist);
}
it++;
}
}
bool phylotreeIND::isparent(node parent, node children) const
{
if( parent == *root) return true;
else
{
while( children.indeg()!=0 )
{
if (parent == children) return true;
children = children.in_edges_begin()->source();
}
}
return false;
}
node phylotreeIND::firstcommonancestor(node node1, node node2) const
{
if( isparent( node1, node2 )) return node1;
else
{
while(node2.indeg()!=0 )
{
if ( isparent( node2 , node1) )
return node2;
node2 = node2.in_edges_begin()->source();
}
}
// root is the parent
return *root;
}
node phylotreeIND::firstcommonancestor( list<node> &listnode) const
{
// points to the second element
list<node>::iterator it = ++listnode.begin();
list<node>::iterator it_e = listnode.end();
node ancestor = *listnode.begin();
while(it!= it_e) { ancestor = firstcommonancestor( ancestor, *it); ++it; }
return ancestor;
}
// return true when edgeaux is an internal edge
bool phylotreeIND::is_internal(edge edgeaux) const
{
return ( !istaxon(edgeaux.source()) && !istaxon(edgeaux.target()) );
}
// parsimony for rooted and non-rooted threes
// construct an graph with nodes and edges in the lists provided
void phylotreeIND::construct_graph( const phylotreeIND &G, list <node> &gnodes, list <edge> &gedges)
{
// maps the nodes from list nodes to the new nodes in tree
map <node, node> fromto;
list<node>::iterator it = gnodes.begin();
list<node>::iterator it_e = gnodes.end();
edge aux;
node n;
while(it!=it_e)
{
//cout << "insertando taxa " << *it << G.MAPNODETAXON[*it] << endl;
if( G.istaxon(*it) )
n = new_taxon( G.MAPNODETAXON[*it] );
else
n = new_internal_node();
fromto.insert( pair<node, node> (*it, n));
/*node n = TREE.new_node();
assert(n.id() < 2*nnode -2);
fromto.insert( pair<node, node> (*it, n));
MAPNODETAXON[n] = G.MAPNODETAXON[*it];
if(G.istaxon(*it))
{
inserido = true;
MAPTAXONNODE[ MAPNODETAXON[n] ] = n;
}*/
++it;
}
list<edge>::iterator it2 = gedges.begin();
list<edge>::iterator it2_e = gedges.end();
while(it2!=it2_e)
{
new_branch( fromto[it2->source()], fromto[it2->target()], G.get_branch_length(*it2));
//aux = TREE.new_edge( fromto[it2->source()], fromto[it2->target()] );
//set_branch_length( aux, G.get_branch_length(*it2) );
//MAPIDEDGE[aux.id()] = aux;
++it2;
}
}
void phylotreeIND::printtree() const
{
graph::node_iterator it, it_e;
node::out_edges_iterator it2, it2_e;
it = TREE.nodes_begin();
it_e = TREE.nodes_end();
while(it!=it_e)
{
if(istaxon(*it)) cout << "*" << "(" << MAPNODETAXON[*it] << ")" << *it << " :";
it2 = it->out_edges_begin();
it2_e = it->out_edges_end();
for( ; it2 != it2_e; it2++)
cout << *it2 << "(" << get_branch_length(*it2) << ")" << ", " ;
cout << endl;
++it;
}
}
void phylotreeIND::printNewick(ostream &os)
{
//edge edgeaux = choose_internal_edge();
//string s = "(";
//newick_traverse( edgeaux.source(), NULL, edgeaux, s);
//s += ");\n";
string s = newick_traverse2(true, true);
os << s << endl;
}
string phylotreeIND::newick_traverse2( bool brlens, bool longnames)
{
string s;
ostringstream s_2;
s_2.precision(15);
s_2.setf(ios::fixed);
edge edgeaux = * ( MAPTAXONNODE[0].inout_edges_begin());
node root_traverse = edgeaux.opposite( MAPTAXONNODE[0] );
convert_graph_to_tree( root_traverse, NULL);
postorder_Iterator it = postorder_begin( root_traverse );
postorder_Iterator it2 = postorder_end( root_traverse );
// imprimir ordem dos filhos
// child_Iterator it3 = child_begin( root_traverse);
// child_Iterator it4 = child_end( root_traverse);
//
// cout << "ordem de percorrido" << endl;
//
// while (it3!=it4)
// {
// if(istaxon(*it3)) cout << taxon_id(*it3) << ",";
// else cout << "I," << endl;
// ++it3;
// }
// cout << endl;
int prev_nivel = 1;
while(it!= it2)
{
s_2.str("");
if( istaxon(*it) )
{
//cout << "gravando\t" << MAPNODETAXON[*it] << endl;
for(int i=0; i < it.stack_size() - prev_nivel; i++) s+="(";
if( longnames)
s_2 << seqpatterns->seqname( MAPNODETAXON[*it] );
else s_2 << MAPNODETAXON[*it];
if ( brlens)
s_2 << ":" << get_branch_length( it.branch() );
s_2 << ",";
s += s_2.str();
}
else
{
if(s[ s.size() -1] == ',') s[s.size()-1] = ')';
else s+=")";
if( *it != edgeaux.source() && brlens)
s_2 << ":" << get_branch_length( it.branch() );
s_2 << ",";
s+= s_2.str();
}
prev_nivel = it.stack_size();
++it;
}
s[s.size()-1] = ';';
return s;
}
void phylotreeIND::newick_traverse( node n, node *ancestor, edge edgeaux, string &s)
{
ostringstream s_2;
s_2.precision(15);
s_2.setf(ios::fixed);
if(istaxon(n)) {
s_2 << seqpatterns->seqname( MAPNODETAXON[n] ) << ":" << get_branch_length(edgeaux);
s+= s_2.str();
return;
}
else if(ancestor!=NULL)s+='(';
node::inout_edges_iterator it, it_e;
it = n.inout_edges_begin();
it_e = n.inout_edges_end();
node nodeaux;
bool first_son = true;
while( it != it_e )
{
if(ancestor==NULL || (it->source()!=*ancestor && it->target()!=*ancestor) )
{
if(!first_son)s += ",";
first_son=false;
nodeaux = it->source() == n ? it->target() : it->source();
newick_traverse( nodeaux, &n, *it, s );
}
it++;
}
if(ancestor!=NULL)s_2 << "):" << get_branch_length(edgeaux);
s+= s_2.str();
}
void phylotreeIND::read_newick( string newickstring)
{
TREE.clear();
edge invalid_edge;
list<node> pilha;
int pos = 0;
node father;
edge internal_edge;
string taxon_name;
double blen;
char token;
int tottaxons = 0;
int totinternals = 0;
try{
do
{
token = newickstring[pos];
switch(token)
{
case '(':
{
//father = TREE.new_node();
father = new_internal_node();
//cout << "no interno..." << father.id() << endl;
totinternals++;
pilha.push_back(father);
break;
}
case ')':
{
node tmp = father;
assert(pilha.size() >0);
pilha.pop_back();
if(pilha.size()>0)
{
father = pilha.back();
internal_edge = new_branch( father, tmp);
//internal_edge = TREE.new_edge(father, tmp);
//MAPIDEDGE[internal_edge.id()] = internal_edge;
//cout << "eixo interno..." << internal_edge.id() << endl;
}
break;
}
case ',':
break;
case ':':
{
read_bl(newickstring, blen, (++pos));
//cout << "longitude de ramo:" << blen << endl;
// cout << "lendo brlen" << blen << endl;
set_branch_length( internal_edge, blen);
break;
}
case ';':
break;
default:
{
read_taxonname_bl(newickstring, taxon_name, blen, pos);
// create the taxon
cout << "agregando taxon" << taxon_name << endl;
string short_taxonname = taxon_name.substr(0,10);
int taxon_id = seqpatterns->search_taxon_name(short_taxonname);
//cout << "tentando agregar taxon ..." << taxon_name;
if( taxon_id<0)
{
cout << "falla al agregar taxon..." << taxon_name;
throw ExceptionManager(11);
//assert( taxon_id >=0 );
}
//cout << "...sucess" << endl;
//cout << "eixo interno..." << e.id() << endl;
//cout << "longitude de ramo:" << blen << endl;
node n = new_taxon( taxon_id);
tottaxons++;
//cout << "taxon..." << n.id() << endl;
new_branch( father, n, blen);
//edge e = TREE.new_edge( father, n);
//MAPIDEDGE[e.id()] = e;
//set_branch_length( e, blen);
//MAPNODETAXON[n] = taxon_id;
//MAPTAXONNODE[taxon_id] = n;
break;
}
}
pos++;
}while (token!=';');
}
catch(ExceptionManager e)
{
e.Report();
}
cout << "total... --> nos:" << TREE.number_of_nodes() << " eixos:" << TREE.number_of_edges() << endl;
cout << "contados..--> taxons:" << tottaxons << " internals:" << totinternals << endl;
}
void phylotreeIND::read_newick2(string newickstring)
{
//cout << "leyendo arvore\n";
TREE.clear();
edge invalid_edge;
list<node> pilha;
int pos = 0;
node father;
edge internal_edge;
string taxon_name;
double blen;
char token;
int tottaxons = 0;
int totinternals = 0;
int taxon_id;
do
{
token = newickstring[pos];
switch(token)
{
case '(':
{
//father = TREE.new_node();
father = new_internal_node();
//cout << "no interno..." << father.id() << endl;
totinternals++;
pilha.push_back(father);
break;
}
// close internal node
case ')':
{
pos++;
if( read_taxonname_bl(newickstring, taxon_name, blen, pos) == -1)
{
cout << "sintax error in position " << pos << endl;
throw ExceptionManager(11);
exit(1);
}
node tmp = father;
assert(pilha.size() >0);
pilha.pop_back();
if(pilha.size()>0)
{
//cout << "warning... ignorin internal taxon_name" << taxon_name
// << " with blen:" << blen << endl;
if(blen>1.e+05){
cout << "warning ... blen >1";
blen=1.e+05;
}
if(blen == 0){
//cout << "warning.... blen = 0 , posicao " << pos << endl;
//blen = BL_MIN;
}
if(blen <0){
cout << "warning.... blen negativo " << blen << endl;
cout << "posicao " << pos << endl;
cout << "taxonname" << taxon_name << endl;
blen=0;
}
father = pilha.back();
internal_edge = new_branch( father, tmp);
set_branch_length( internal_edge, blen);
}
break;
}
case ',':
case ';':
break;
default:
{
if(read_taxonname_bl(newickstring, taxon_name, blen, pos) == -1)
{
cout << "sintax error at position " << pos << endl;
exit(1);
}
// create the taxon
// check if a taxon is a number (no long name!)
istringstream convert(taxon_name);
if( convert >> taxon_id)
{
if(taxon_id < 0 || taxon_id > ( number_of_taxons() -1) )
taxon_id=-1;
}
// long name
else
{
string short_taxonname = taxon_name.substr(0,10);
taxon_id = seqpatterns->search_taxon_name(short_taxonname);
// cout << "tentando agregar taxon ..." << taxon_name << "con blen" << blen
// << "posicon " << pos << endl;
}
if( taxon_id<0)
{
cout << "falla al agregar taxon..." << taxon_name;
throw ExceptionManager(11);
assert( taxon_id >=0 );
}
//cout << "leyendo\t" << taxon_id << endl;
node n = new_taxon( taxon_id);
tottaxons++;
if(blen<0)
{
cout << "negative set to 0";
blen=0;
}
new_branch( father, n, blen);
break;
}
}
pos++;
}while (token!=';');
//cout << "total... --> nos:" << TREE.number_of_nodes() << " eixos:" << TREE.number_of_edges() << endl;
//cout << "contados..--> taxons:" << tottaxons << " internals:" << totinternals << endl;
}
int phylotreeIND::read_taxonname_bl( string &s, string &taxonname, double &blen, int &pos)
{
int first_pos = s.size();
int first_par = s.find( ')', pos); //first parentesis
first_pos = (first_par !=string::npos && first_par < first_pos)
? first_par : first_pos;
int first_comma = s.find( ',', pos);
first_pos = (first_comma !=string::npos && first_comma < first_pos)
? first_comma : first_pos;
int first_points = s.find( ':', pos);
first_pos = (first_points !=string::npos && first_points < first_pos)
? first_points : first_pos;
int first_semicolon = s.find( ';', pos);
first_pos = (first_semicolon !=string::npos && first_semicolon < first_pos)
? first_semicolon : first_pos;
// assert
if(first_pos == string::npos) return -1; // error !
// get_taxon_name
taxonname = s.substr( pos, first_pos - pos );
if( first_pos == first_points)
{
pos = first_pos+1;
return read_bl( s, blen, pos );
}
//else cout << "no brlens..." << endl;
pos = first_pos-1;
return 0;
}
int phylotreeIND::read_bl( string &s, double &blen, int &pos)
{
int first_pos = s.find( ';', pos); // final semicolor ex. (a,b):xx.yy;
int first_par = s.find( ')', pos); //first parentesis
first_pos = (first_par !=string::npos && first_par < first_pos)
? first_par : first_pos;
int first_comma = s.find( ',', pos);
first_pos = (first_comma !=string::npos && first_comma < first_pos)
? first_comma : first_pos;
if(first_pos == string::npos) return -1;
//assert(first_pos != string::npos);
string bl = s.substr( pos, first_pos - pos );
istringstream convert(bl);
if (convert >> blen)
{
pos = first_pos-1;
return 0;
}
else return -1;
//blen = atof(bl.c_str());
//return first_pos - 1;
}
string phylotreeIND::get_split_key( edge edgeaux) const
{
int n= TREE.number_of_nodes();
int idx_begin = edgeaux.id()*n;
string s;
graph::node_iterator it = TREE.nodes_begin();
graph::node_iterator it_e = TREE.nodes_end();
s.resize(nnode);
while(it!=it_e)
{
if(istaxon(*it))
s[ MAPNODETAXON[*it] ] =
split_bits[ idx_begin + it->id() ] == 1 ? '*':'.';
++it;
}
return s;
}
string phylotreeIND::get_invert_split_key( edge edgeaux) const
{
int n= TREE.number_of_nodes();
int idx_begin = edgeaux.id()*n;
string s;
s.resize(nnode);
graph::node_iterator it = TREE.nodes_begin();
graph::node_iterator it_e = TREE.nodes_end();
while(it!=it_e)
{
if(istaxon(*it))
s[ MAPNODETAXON[*it] ] =
split_bits[ idx_begin + it->id() ] == 1 ? '.':'*';
++it;
}
return s;
}
double phylotreeIND::compare_topology(phylotreeIND &other)
{
map <string, edge> split_repository;
map<string,edge>::iterator it_aux;
graph &tree2 = other.TREE;
int num_different_edges=0;
// create repository for splits in tree1
graph::edge_iterator it= TREE.edges_begin();
graph::edge_iterator it_e= TREE.edges_end();
while(it!=it_e)
{
if(!istaxon(it->target()))
{
split_repository[get_split_key(*it)] = *it;
}
++it;
}
// count for different splits
it = tree2.edges_begin();
it_e = tree2.edges_end();
while(it!=it_e)
{
if(!istaxon(it->target()))
{
if( (it_aux = split_repository.find( other.get_split_key(*it))) != split_repository.end()
|| (it_aux = split_repository.find( other.get_invert_split_key(*it)) ) !=split_repository.end() )
split_repository.erase( it_aux );
else
num_different_edges++;
}
//}
++it;
}
// different tree1->tree2 + different tree2->tree1 (remaining)
return num_different_edges + split_repository.size();
}
// day algorithms
double phylotreeIND::compare_topology_2(phylotreeIND &other)
{
// hash table
int *hash_table;
// node mapes
int *map_nodes;
int node_count = 0;
int n = number_of_taxons();
int good_edges = 0;
node_map<struct temp_info> interior_node(TREE, temp_info(n));
// allocate memory
hash_table = new int[n*2];
for(int i=0; i<n*2; i++)hash_table[i] = -1;
map_nodes = new int[n];
// step 1
// select last taxon as root
node invalid_node;
node root1 = taxon_number( n-1);
// step 2 and 3
postorder_Iterator it = postorder_begin( root1);
/*
while( *it != root1 )
{
struct temp_info &father_info = interior_node [ it.ancestor() ] ;
struct temp_info &current_info = interior_node [ *it ] ;
//cout << " node " << *it << " ancestral" << it.ancestor() << endl;
if( istaxon(*it) )
{
// update the map
map_nodes[ taxon_id( *it) ] = node_count;
// check if is the leftmost
if( father_info.left == n ) //left_most == invalid_node
{
//father_info.left_most = *it;
father_info.left = node_count;
}
//else father_info.right = node_count;
node_count++;
}
else
{
int idx;
current_info.right = node_count-1;
if( father_info.left == n ) //left_most == invalid_node
{
idx = current_info.right*2;
// step 3 copy to the hash table
//father_info.left_most = *it;
father_info.left = current_info.left;
//father_info.right = current_info.right;
}
else
{
idx = current_info.left*2;
father_info.right = current_info.right;
}
// fill hash table
hash_table[ idx ] = current_info.left;
hash_table[ idx+1 ] = current_info.right;
}
++it;
}
*/
int l, r;
while( *it != root1 )
{
struct temp_info &father_info = interior_node [ it.ancestor() ] ;
struct temp_info &current_info = interior_node [ *it ] ;
//cout << " node " << *it << " ancestral" << it.ancestor() << endl;
if( istaxon(*it) )
{
// update the map
map_nodes[ taxon_id( *it) ] = r = node_count;
// check if is the leftmost
if( father_info.left == n /*left_most == invalid_node*/ )
{
//father_info.left_most = *it;
father_info.left = node_count;
}
//else father_info.right = node_count;
node_count++;
++it;
}
else
{
int idx;
l = current_info.left;
++it;
if (istaxon(*it) || *it==root1) idx = (r)*2;
else idx = l*2;
current_info.right = r;
if( father_info.left == n /*left_most == invalid_node*/ )
{
father_info.left = current_info.left;
}
//else
//{
// father_info.right = current_info.right;
//}
// fill hash table
hash_table[ idx ] = l;
hash_table[ idx+1 ] = r;
}
}
// step 4
node root2 = other.taxon_number( n-1);
// father of root2
node no_root2 = root2.in_edges_begin()->source();
interior_node.init(other.TREE, temp_info(n));
it = postorder_begin( no_root2, root2);
while( *it != no_root2)
{
struct temp_info &father_info = interior_node [ it.ancestor() ] ;
struct temp_info &current_info = interior_node [ *it ] ;
if( istaxon(*it) )
{
if( father_info.left_most == invalid_node ) father_info.left_most = *it;
if( map_nodes[ other.taxon_id( *it) ] < father_info.left) father_info.left = map_nodes[ other.taxon_id(*it) ];
if( map_nodes[ other.taxon_id( *it) ] > father_info.right) father_info.right = map_nodes[ other.taxon_id(*it) ];
father_info.num_nodes++;
}
else
{
// check hash tables
if ( current_info.right - current_info.left + 1 ==
current_info.num_nodes)
{
if( (hash_table[ current_info.left*2 ] == current_info.left &&
hash_table[ current_info.left*2+1 ] == current_info.right)
||
(hash_table[ current_info.right*2 ] == current_info.left &&
hash_table[ current_info.right*2+1 ] == current_info.right) )
good_edges++;
}
if( father_info.left_most == invalid_node )father_info.left_most = *it;
if( current_info.left < father_info.left) father_info.left = current_info.left;
if( current_info.right > father_info.right) father_info.right = current_info.right;
father_info.num_nodes += current_info.num_nodes;
}
++it;
}
delete [] map_nodes;
delete [] hash_table;
return TREE.number_of_edges() - number_of_taxons() - good_edges +
other.TREE.number_of_edges() - number_of_taxons() - good_edges;
}
double phylotreeIND::robinson_foulds_distance(phylotreeIND &other, int debug)
{
map <string, edge> split_repository;
graph &tree2 = other.TREE;
double distance = 0;
// create repository for splits in tree1
graph::edge_iterator it= TREE.edges_begin();
graph::edge_iterator it_e= TREE.edges_end();
map<string,edge>::iterator it_aux;
while(it != it_e)
{
split_repository[get_split_key(*it)] = *it; ++it;
}
// count for different splits
it = tree2.edges_begin();
it_e = tree2.edges_end();
while(it!=it_e)
{
if( (it_aux = split_repository.find( other.get_split_key(*it))) != split_repository.end()
|| (it_aux = split_repository.find( other.get_invert_split_key(*it)) ) != split_repository.end() )
{
distance += fabs( other.get_branch_length(*it)
- get_branch_length( (*it_aux).second ));
split_repository.erase( it_aux );
}
// edge in tree2 not found in tree1
else
{
distance += pow(other.get_branch_length(*it),2);
}
++it;
}
// edges in tree1 not found in tree2
it_aux = split_repository.begin();
while( it_aux != split_repository.end() )
{
distance += get_branch_length( (*it_aux).second );
++it_aux;
}
return distance;
}
void phylotreeIND::printtreeinfo() const
{
cout << this->TREE;
graph::node_iterator it=TREE.nodes_begin();
graph::node_iterator it_e=TREE.nodes_end();
cout << "taxa information" << endl;
while(it!=it_e)
{
if(istaxon(*it))cout << " * " << MAPNODETAXON[*it];
cout << endl;
++it;
}
}
double phylotreeIND::compare_topology_3(phylotreeIND &other)
{
// hash table
int n = number_of_taxons();
struct temp_info **hash_table; // hash that points struct info
struct temp_info *interior_node_info = new struct temp_info[n-1];
struct temp_info *interior_node_info_2 = new struct temp_info[n-1];
int idx_interior = 0;
node_map<struct temp_info*> interior_node(TREE, NULL);
node_map<struct temp_info*> interior_node_2(other.TREE, NULL);
edge_map<struct temp_info*> interior_edge(TREE, NULL);
// node mapes
int *map_nodes;
int node_count = 0;
int good_edges = 0;
// allocate memory
hash_table = new struct temp_info*[n];
for(int i=0; i<n; i++)hash_table[i] = NULL;
map_nodes = new int[n];
// step 1
// select last taxon as root
node invalid_node;
node root1 = taxon_number( n-1);
// step 2 and 3
postorder_Iterator it = postorder_begin( root1);
int l, r;
while( *it != root1 )
{
struct temp_info *father_info = interior_node [ it.ancestor() ] ;
struct temp_info *current_info = interior_node [ *it ] ;
//cout << " node " << *it << " ancestral" << it.ancestor() << endl;
if( istaxon(*it) )
{
// update the map
map_nodes[ taxon_id( *it) ] = r = node_count;
// check if is the leftmost
if( father_info == NULL )
{
interior_node [ it.ancestor() ] = father_info = &interior_node_info[idx_interior];
idx_interior++;
//father_info.left_most = *it;
father_info->left = node_count;
}
//else father_info.right = node_count;
node_count++;
++it;
}
else
{
int idx;
l = current_info->left;
interior_edge[ it.branch() ] = current_info;
if( father_info == NULL )
{
interior_node [ it.ancestor() ] = father_info = &interior_node_info[idx_interior];
idx_interior++;
father_info->left = current_info->left;
}
++it;
if (istaxon(*it) || *it==root1) idx = r;
else idx = l;
current_info->right = r;
// fill hash table
hash_table[ idx ] = current_info;
}
}
// step 4
node root2 = other.taxon_number( n-1);
// father of root2
node no_root2 = root2.in_edges_begin()->source();
interior_node_2.init(other.TREE, NULL);
it = postorder_begin( no_root2, root2);
idx_interior = 0;
while( *it != no_root2)
{
struct temp_info *father_info = interior_node_2 [ it.ancestor() ] ;
struct temp_info *current_info = interior_node_2 [ *it ] ;
if( istaxon(*it) )
{
if( father_info == NULL)
{
interior_node_2 [ it.ancestor() ] = father_info = &interior_node_info_2[idx_interior];
father_info->left = n;
father_info->right= -1;
father_info->num_nodes = 0;
idx_interior++;
//.left_most == invalid_node ) father_info.left_most = *it;
}
if( map_nodes[ other.taxon_id( *it) ] < father_info->left) father_info->left = map_nodes[ other.taxon_id(*it) ];
if( map_nodes[ other.taxon_id( *it) ] > father_info->right) father_info->right = map_nodes[ other.taxon_id(*it) ];
father_info->num_nodes++;
}
else
{
// check hash tables
if ( current_info->right - current_info->left + 1 ==
current_info->num_nodes)
{
//cout << "inicio checkeando hash" << current_info->left << " " << current_info->right << endl;
//cout << "hash table adress" << hash_table[current_info->left] << " " << hash_table[current_info->right] << endl;
if( hash_table[current_info->left]!=NULL)
if( hash_table[current_info->left]->left == current_info->left &&
hash_table[ current_info->left]->right == current_info->right) good_edges++;
if(hash_table[current_info->right]!=NULL)
if(hash_table[ current_info->right]->left == current_info->left &&
hash_table[ current_info->right]->right == current_info->right) good_edges++;
//cout << "fin checkeando hash " << good_edges << endl;
}
if( father_info == NULL)
{
interior_node_2 [ it.ancestor() ] = father_info = &interior_node_info_2[idx_interior];
father_info->left = n;
father_info->right= -1;
father_info->num_nodes = 0;
idx_interior++;
//.left_most == invalid_node ) father_info.left_most = *it;
}
if( current_info->left < father_info->left) father_info->left = current_info->left;
if( current_info->right > father_info->right) father_info->right = current_info->right;
father_info->num_nodes += current_info->num_nodes;
}
++it;
}
interior_edge.clear();
interior_node.clear();
interior_node_2.clear();
delete [] interior_node_info;
delete [] interior_node_info_2;
delete [] map_nodes;
delete [] hash_table;
return TREE.number_of_edges() - number_of_taxons() - good_edges +
other.TREE.number_of_edges() - number_of_taxons() - good_edges;
}