New split calculation

git-svn-id: svn://scm.gforge.inria.fr/svnroot/paradiseo@1592 331e1502-861f-0410-8da2-ba01fb791d7f
This commit is contained in:
wcancino 2009-06-25 15:32:07 +00:00
commit 5312c3321e
6 changed files with 322 additions and 15 deletions

View file

@ -63,13 +63,23 @@ SET( UTILITARY_SOURCES eigensolver.cpp
utility.cpp
)
SET( SPLIT_TEST_SOURCES RandomNr.cpp
Sequences.cpp
phylotreeIND.cpp
treeIterator.cpp
split_test.cpp
)
ADD_EXECUTABLE( PhyloMOEA ${PHYLOMOEA_SOURCES} )
ADD_EXECUTABLE( testomp ${TESTOMP_SOURCES} )
ADD_EXECUTABLE( utility ${UTILITARY_SOURCES} )
ADD_EXECUTABLE( split_test ${SPLIT_TEST_SOURCES} )
TARGET_LINK_LIBRARIES(PhyloMOEA gsl gslcblas GTL eo eoutils ga moeo cma peo rmc_mpi xml2)
TARGET_LINK_LIBRARIES(testomp gsl gslcblas GTL eo eoutils ga moeo cma peo rmc_mpi xml2 gomp)
TARGET_LINK_LIBRARIES(utility gsl gslcblas GTL eo eoutils ga moeo cma peo rmc_mpi xml2 gomp)
TARGET_LINK_LIBRARIES(split_test gsl gslcblas GTL eo eoutils)
INSTALL( TARGETS PhyloMOEA RUNTIME DESTINATION bin)

View file

@ -3,11 +3,15 @@
#include <phylotreeIND.h>
struct split_info
struct split_table
{
int left, right, num_nodes;
split_info() {};
split_info(int n) : left(n), right(-1), num_nodes(0) {};
phylotreeIND &tree;
vector<struct split_info> splitstable;
node_map<struct split_info*> interior_node;
edge_map<struct split_info*> interior_edge;
split_table( phylotreeIND &t ) : tree(t) {};
};
@ -24,12 +28,15 @@ public :
virtual ~SplitCalculator() {}
/// The pure virtual function that needs to be implemented by the subclass
struct split_info operator()(phylotreeIND &tree)
{
// hash table
int n = tree.number_of_taxons();
struct split_info **hash_table; // hash that points struct info
struct split_info *interior_node_info = new struct split_info[n-1];
vector<struct split_info> splitstable;
splitstable.resize(n);
int idx_interior = 0;
node_map<struct split_info*> interior_node(tree.TREE, NULL);
@ -37,8 +44,9 @@ public :
// node mapes
int *map_nodes;
int node_count = 0;
int good_edges = 0;
int temp2 = 2;
// allocate memory
hash_table = new struct split_info*[n];
@ -47,7 +55,6 @@ public :
// step 1
// select last taxon as root
node invalid_node;
node root1 = tree.taxon_number( n-1);
// step 2 and 3
@ -58,17 +65,20 @@ public :
{
struct split_info *father_info = interior_node [ it.ancestor() ] ;
struct split_info *current_info = interior_node [ *it ] ;
//cout << " node " << *it << " ancestral" << it.ancestor() << endl;
if( tree.istaxon(*it) )
{
// update the map
map_nodes[ tree.taxon_id( *it) ] = r = node_count;
splitstable[ tree.taxon_id( *it) ].map_to_node = node_count;
splitstable[ node_count ] = tree.taxon_id( *it);
// check if is the leftmost
if( father_info == NULL )
{
interior_node [ it.ancestor() ] = father_info = &interior_node_info[idx_interior];
interior_node [ it.ancestor() ] = father_info = &(splitstable[idx_interior]);
idx_interior++;
//father_info.left_most = *it;
father_info->left = node_count;
@ -86,6 +96,7 @@ public :
if( father_info == NULL )
{
interior_node [ it.ancestor() ] = father_info = &interior_node_info[idx_interior];
interior_node [ it.ancestor() ] = father_info = &(splitstable[idx_interior]);
idx_interior++;
father_info->left = current_info->left;
}
@ -97,13 +108,12 @@ public :
current_info->right = r;
// fill hash table
hash_table[ idx ] = current_info;
splitstable[ idx ].hash = current_info;
}
}
delete [] interior_node_info;
delete [] map_nodes;
delete [] hash_table;
}
};
#endif

View file

@ -39,6 +39,7 @@ struct temp_info
};
// return the element at position pos of the list
template <typename T> const T& select_edge_at_pos( const list <T> &l, int pos)
{
@ -1804,7 +1805,7 @@ double phylotreeIND::compare_topology_2(phylotreeIND &other)
node invalid_node;
node root1 = taxon_number( n-1);
// step 2 and 3
// step 2 and 3+
postorder_Iterator it = postorder_begin( root1);
/*
while( *it != root1 )
@ -2161,3 +2162,198 @@ double phylotreeIND::compare_topology_3(phylotreeIND &other)
return TREE.number_of_edges() - number_of_taxons() - good_edges +
other.TREE.number_of_edges() - number_of_taxons() - good_edges;
}
void phylotreeIND::calculate_splits4()
{
// hash table
int n = number_of_taxons();
//struct split_info **hash_table; // hash that points struct info
//struct split_info *interior_node_info = new struct split_info[n-1];
splitstable.resize(n);
int idx_interior = 0;
interior_node.init(TREE, NULL);
interior_edge.init(TREE, NULL);
// node mapes
//int *map_nodes;
int node_count = 0;
int temp2 = 2;
// step 1
// select last taxon as root
node root1 = taxon_number( n-1);
// step 2 and 3
postorder_Iterator it = postorder_begin( root1);
int l, r;
while( *it != root1 )
{
struct split_info *father_info = interior_node [ it.ancestor() ] ;
struct split_info *current_info = interior_node [ *it ] ;
//cout << " node " << *it << " ancestral" << it.ancestor() << endl;
if( istaxon(*it) )
{
// update the map
splitstable[ taxon_id( *it) ].map_to_node = r = node_count;
splitstable[ node_count ].node_to_map = taxon_id( *it);
// check if is the leftmost
if( father_info == NULL )
{
interior_node [ it.ancestor() ] = father_info = &(splitstable[idx_interior]);
idx_interior++;
//father_info.left_most = *it;
father_info->left = node_count;
}
//else father_info.right = node_count;
node_count++;
++it;
}
else
{
int idx;
l = current_info->left;
interior_edge[ it.branch() ] = current_info;
if( father_info == NULL )
{
interior_node [ it.ancestor() ] = father_info = &(splitstable[idx_interior]);
idx_interior++;
father_info->left = current_info->left;
}
++it;
if (istaxon(*it) || *it==root1) idx = r;
else idx = l;
current_info->right = r;
// fill hash table
splitstable[ idx ].hash = current_info;
}
}
}
double phylotreeIND::compare_topology_4(phylotreeIND &other)
{
// hash table
int n = number_of_taxons();
node_map<struct split_info*> interior_node_2(other.TREE, NULL);
vector<struct split_info> split_table2;
split_table2.resize(n);
int node_count = 0;
int good_edges = 0;
// allocate memory
// step 4
node root2 = other.taxon_number( n-1);
// father of root2
node no_root2 = root2.in_edges_begin()->source();
postorder_Iterator it = postorder_begin( no_root2, root2);
int idx_interior = 0;
while( *it != no_root2)
{
struct split_info *father_info = interior_node_2 [ it.ancestor() ] ;
struct split_info *current_info = interior_node_2 [ *it ] ;
if( istaxon(*it) )
{
if( father_info == NULL)
{
interior_node_2 [ it.ancestor() ] = father_info = &(split_table2[idx_interior]);
father_info->left = n;
father_info->right= -1;
father_info->num_nodes = 0;
idx_interior++;
//.left_most == invalid_node ) father_info.left_most = *it;
}
if( splitstable[ other.taxon_id( *it) ].map_to_node < father_info->left) father_info->left = splitstable[ other.taxon_id( *it) ].map_to_node;
if( splitstable[ other.taxon_id( *it) ].map_to_node > father_info->right) father_info->right = splitstable[ other.taxon_id( *it) ].map_to_node;
father_info->num_nodes++;
}
else
{
// check hash tables
if ( current_info->right - current_info->left + 1 ==
current_info->num_nodes)
{
//cout << "inicio checkeando hash" << current_info->left << " " << current_info->right << endl;
//cout << "hash table adress" << hash_table[current_info->left] << " " << hash_table[current_info->right] << endl;
if( splitstable[current_info->left].hash !=NULL)
if( (splitstable[current_info->left].hash)->left == current_info->left &&
(splitstable[current_info->left].hash)->right == current_info->right) good_edges++;
if( splitstable[current_info->right].hash!=NULL)
if((splitstable[ current_info->right].hash)->left == current_info->left &&
(splitstable[ current_info->right].hash)->right == current_info->right) good_edges++;
//cout << "fin checkeando hash " << good_edges << endl;
}
if( father_info == NULL)
{
interior_node_2 [ it.ancestor() ] = father_info = &(split_table2[idx_interior]);
father_info->left = n;
father_info->right= -1;
father_info->num_nodes = 0;
idx_interior++;
//.left_most == invalid_node ) father_info.left_most = *it;
}
if( current_info->left < father_info->left) father_info->left = current_info->left;
if( current_info->right > father_info->right) father_info->right = current_info->right;
father_info->num_nodes += current_info->num_nodes;
}
++it;
}
interior_node_2.clear();
return TREE.number_of_edges() - number_of_taxons() - good_edges +
other.TREE.number_of_edges() - number_of_taxons() - good_edges;
}
void phylotreeIND::print_splits_2() const
{
graph::edge_iterator it = TREE.edges_begin();
graph::edge_iterator it_e = TREE.edges_end();
while( it!= it_e)
{
cout << get_split_key_2( *it) << endl;
//print_split(*it);
++it;
}
}
string phylotreeIND::get_split_key_2( edge edgeaux) const
{
int n= number_of_taxons();
int idx_begin = edgeaux.id()*n;
string s(n, '.');
if(is_internal(edgeaux))
{
struct split_info *ref = interior_edge[edgeaux];
//cout << ref->left << " " << ref->right << " ";
for(int i= ref->left; i <= ref->right; i++)
{
s[ splitstable[i].node_to_map ] = '*';
//cout << splitstable[i].node_to_map << " " ;
}
}
else
s[ MAPNODETAXON[edgeaux.target()] ] = '*';
return s;
}

View file

@ -36,6 +36,19 @@
#include <treeIterator.h>
#include <stack>
#include <tree_limits.h>
#include <functors.h>
struct split_info
{
int left, right, num_nodes, map_to_node, node_to_map;
split_info *hash;
split_info() {};
split_info(int n) : left(n), right(-1), num_nodes(0), hash(NULL), map_to_node(-1), node_to_map(-1) {};
bool invalid() { return right == -1; };
};
class phylotreeIND
@ -47,6 +60,11 @@ class phylotreeIND
vector<node> MAPTAXONNODE; // each taxon number points to a node
vector<edge> MAPIDEDGE;
valarray<double> NJDISTANCES; // distances stored in the edges
vector<struct split_info> splitstable;
node_map<struct split_info*> interior_node;
edge_map<struct split_info*> interior_edge;
//edge_map< node_map<int> > SPLITS; // the splits that each edge separate the graph
bool *split_bits;
Sequences *seqpatterns;
@ -59,9 +77,6 @@ class phylotreeIND
void init();
void SPR(); //SPR operator
void NNI(); // NNI operator
void TBR(); // TBR operator
void taxa_swap(); // taxa swap operator
void collapse_node ( node );
@ -75,6 +90,7 @@ class phylotreeIND
void calculate_splits_from_edge2 ( edge, node );
int split_set_edge ( edge , edge );
void print_split ( edge ) const;
void obtain_subtree ( node , node *, list <edge> *, list<node> * );
void construct_graph ( const phylotreeIND &, list <node>& , list <edge>& );
void change_subtrees();
@ -113,6 +129,9 @@ class phylotreeIND
virtual phylotreeIND* clone() const;
virtual phylotreeIND* randomClone() const;
void SPR(); //SPR operator
void NNI(); // NNI operator
void TBR(); // TBR operator
GTL::graph TREE; // final tree calculated by NJ
// constructors
@ -135,6 +154,7 @@ class phylotreeIND
void convert_graph_to_tree ( node, node * );
void calculate_splits();
void calculate_splits4();
void calculate_splits_exp();
inline void invalidate_splits() { valid_splits = false; }
inline void remove_split_memory()
@ -150,6 +170,7 @@ class phylotreeIND
}
string get_split_key ( edge edgeaux ) const;
string get_split_key_2 ( edge edgeaux ) const;
string get_invert_split_key ( edge edgeaux ) const;
bool is_internal ( edge ) const;
@ -192,9 +213,10 @@ class phylotreeIND
double compare_topology ( phylotreeIND &other );
double compare_topology_2 ( phylotreeIND &other );
double compare_topology_3 ( phylotreeIND &other );
double compare_topology_4(phylotreeIND &other);
double robinson_foulds_distance ( phylotreeIND &other, int debug=0 );
void print_splits_2 ( ) const;
// iterator
postorder_Iterator postorder_begin ( node root, node father ) const
{
@ -286,5 +308,21 @@ class phylotreeIND
};
template<typename T> const T& select_edge_at_pos ( const list <T> &, int );
template <class R>
class TreeCalculator : public FunctorUnary<phylotreeIND &, R>
{
public :
/// virtual dtor here so there is no need to define it in derived classes
virtual ~TreeCalculator() {}
/// The pure virtual function that needs to be implemented by the subclass
virtual R operator()(phylotreeIND &) = 0;
};
#endif

View file

@ -0,0 +1,53 @@
#include <eo>
#include <phylotreeIND.h>
using namespace std;
gsl_rng *rn2;
RandomNr *rn;
//Sequences *seq;
long seed;
//vector<phylotreeIND> arbores;
string datafile, path;
phylotreeIND *templatetree_ptr;
int main(int argc, char *argv[])
{
// measures execution time
eoParser parser(argc, argv);
datafile = parser.createParam(string(), "data", "Datafile", 'd',"Param").value();
path = parser.createParam(string(), "path", "Treefile", 'p',"Param").value();
cout << "\n\nReading Sequence Datafile..." << path+datafile;
datafile = path+datafile;
Sequences seq6(datafile.c_str());
// Sequences seq7("/home/wcancino/experimentos/PhyloMOEA_0.2/omp_tests/datasets/TEST.500_5000");
cout << " done.\n";
// calculate datafile
cout << "calculating pattersn..." << endl;
seq6.calculate_patterns();
seq6.calculate_frequences();
gsl_rng *rn2 = gsl_rng_alloc(gsl_rng_default);
RandomNr *rn = new RandomNr(time(NULL));
phylotreeIND templatetree6( rn, seq6, rn2);
phylotreeIND *test = templatetree6.randomClone();
phylotreeIND test2(*test);
test2.TBR();
test->calculate_splits4();
test->print_splits_2();
test->printNewick(cout);
test2.calculate_splits4();
cout << "distance " << test->compare_topology_4(test2) << endl;
cout << "distance " << test->compare_topology_2(test2) << endl;
// of.close();
gsl_rng_free(rn2);
// delete probmatrixs;
delete rn;
return 0;
}

View file

@ -65,7 +65,7 @@ int main(int argc, char *argv[])
seq6.calculate_frequences();
ostringstream os;
os << datafile << "_results_" << nthreads << ".txt";
os << datafile << "_results_serial_" << nthreads << ".txt";
ofstream of(os.str().c_str());
gsl_rng *rn2 = gsl_rng_alloc(gsl_rng_default);