New split calculation
git-svn-id: svn://scm.gforge.inria.fr/svnroot/paradiseo@1592 331e1502-861f-0410-8da2-ba01fb791d7f
This commit is contained in:
parent
ab01ec6e79
commit
5312c3321e
6 changed files with 322 additions and 15 deletions
|
|
@ -63,13 +63,23 @@ SET( UTILITARY_SOURCES eigensolver.cpp
|
|||
utility.cpp
|
||||
)
|
||||
|
||||
SET( SPLIT_TEST_SOURCES RandomNr.cpp
|
||||
Sequences.cpp
|
||||
phylotreeIND.cpp
|
||||
treeIterator.cpp
|
||||
split_test.cpp
|
||||
)
|
||||
|
||||
|
||||
|
||||
ADD_EXECUTABLE( PhyloMOEA ${PHYLOMOEA_SOURCES} )
|
||||
ADD_EXECUTABLE( testomp ${TESTOMP_SOURCES} )
|
||||
ADD_EXECUTABLE( utility ${UTILITARY_SOURCES} )
|
||||
ADD_EXECUTABLE( split_test ${SPLIT_TEST_SOURCES} )
|
||||
|
||||
TARGET_LINK_LIBRARIES(PhyloMOEA gsl gslcblas GTL eo eoutils ga moeo cma peo rmc_mpi xml2)
|
||||
TARGET_LINK_LIBRARIES(testomp gsl gslcblas GTL eo eoutils ga moeo cma peo rmc_mpi xml2 gomp)
|
||||
TARGET_LINK_LIBRARIES(utility gsl gslcblas GTL eo eoutils ga moeo cma peo rmc_mpi xml2 gomp)
|
||||
TARGET_LINK_LIBRARIES(split_test gsl gslcblas GTL eo eoutils)
|
||||
|
||||
INSTALL( TARGETS PhyloMOEA RUNTIME DESTINATION bin)
|
||||
|
|
@ -3,11 +3,15 @@
|
|||
|
||||
#include <phylotreeIND.h>
|
||||
|
||||
struct split_info
|
||||
struct split_table
|
||||
{
|
||||
int left, right, num_nodes;
|
||||
split_info() {};
|
||||
split_info(int n) : left(n), right(-1), num_nodes(0) {};
|
||||
phylotreeIND &tree;
|
||||
vector<struct split_info> splitstable;
|
||||
node_map<struct split_info*> interior_node;
|
||||
edge_map<struct split_info*> interior_edge;
|
||||
|
||||
split_table( phylotreeIND &t ) : tree(t) {};
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -24,12 +28,15 @@ public :
|
|||
virtual ~SplitCalculator() {}
|
||||
|
||||
/// The pure virtual function that needs to be implemented by the subclass
|
||||
|
||||
struct split_info operator()(phylotreeIND &tree)
|
||||
{
|
||||
// hash table
|
||||
int n = tree.number_of_taxons();
|
||||
struct split_info **hash_table; // hash that points struct info
|
||||
struct split_info *interior_node_info = new struct split_info[n-1];
|
||||
vector<struct split_info> splitstable;
|
||||
splitstable.resize(n);
|
||||
|
||||
int idx_interior = 0;
|
||||
node_map<struct split_info*> interior_node(tree.TREE, NULL);
|
||||
|
|
@ -37,8 +44,9 @@ public :
|
|||
// node mapes
|
||||
int *map_nodes;
|
||||
int node_count = 0;
|
||||
int good_edges = 0;
|
||||
|
||||
int temp2 = 2;
|
||||
|
||||
|
||||
// allocate memory
|
||||
hash_table = new struct split_info*[n];
|
||||
|
|
@ -47,7 +55,6 @@ public :
|
|||
|
||||
// step 1
|
||||
// select last taxon as root
|
||||
node invalid_node;
|
||||
node root1 = tree.taxon_number( n-1);
|
||||
|
||||
// step 2 and 3
|
||||
|
|
@ -58,17 +65,20 @@ public :
|
|||
{
|
||||
struct split_info *father_info = interior_node [ it.ancestor() ] ;
|
||||
struct split_info *current_info = interior_node [ *it ] ;
|
||||
|
||||
|
||||
//cout << " node " << *it << " ancestral" << it.ancestor() << endl;
|
||||
if( tree.istaxon(*it) )
|
||||
{
|
||||
// update the map
|
||||
map_nodes[ tree.taxon_id( *it) ] = r = node_count;
|
||||
|
||||
splitstable[ tree.taxon_id( *it) ].map_to_node = node_count;
|
||||
splitstable[ node_count ] = tree.taxon_id( *it);
|
||||
// check if is the leftmost
|
||||
if( father_info == NULL )
|
||||
{
|
||||
interior_node [ it.ancestor() ] = father_info = &interior_node_info[idx_interior];
|
||||
interior_node [ it.ancestor() ] = father_info = &(splitstable[idx_interior]);
|
||||
idx_interior++;
|
||||
//father_info.left_most = *it;
|
||||
father_info->left = node_count;
|
||||
|
|
@ -86,6 +96,7 @@ public :
|
|||
if( father_info == NULL )
|
||||
{
|
||||
interior_node [ it.ancestor() ] = father_info = &interior_node_info[idx_interior];
|
||||
interior_node [ it.ancestor() ] = father_info = &(splitstable[idx_interior]);
|
||||
idx_interior++;
|
||||
father_info->left = current_info->left;
|
||||
}
|
||||
|
|
@ -97,13 +108,12 @@ public :
|
|||
current_info->right = r;
|
||||
// fill hash table
|
||||
hash_table[ idx ] = current_info;
|
||||
splitstable[ idx ].hash = current_info;
|
||||
}
|
||||
}
|
||||
delete [] interior_node_info;
|
||||
delete [] map_nodes;
|
||||
delete [] hash_table;
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
#endif
|
||||
|
|
@ -39,6 +39,7 @@ struct temp_info
|
|||
};
|
||||
|
||||
|
||||
|
||||
// return the element at position pos of the list
|
||||
template <typename T> const T& select_edge_at_pos( const list <T> &l, int pos)
|
||||
{
|
||||
|
|
@ -1804,7 +1805,7 @@ double phylotreeIND::compare_topology_2(phylotreeIND &other)
|
|||
node invalid_node;
|
||||
node root1 = taxon_number( n-1);
|
||||
|
||||
// step 2 and 3
|
||||
// step 2 and 3+
|
||||
postorder_Iterator it = postorder_begin( root1);
|
||||
/*
|
||||
while( *it != root1 )
|
||||
|
|
@ -2161,3 +2162,198 @@ double phylotreeIND::compare_topology_3(phylotreeIND &other)
|
|||
return TREE.number_of_edges() - number_of_taxons() - good_edges +
|
||||
other.TREE.number_of_edges() - number_of_taxons() - good_edges;
|
||||
}
|
||||
|
||||
|
||||
void phylotreeIND::calculate_splits4()
|
||||
{
|
||||
// hash table
|
||||
int n = number_of_taxons();
|
||||
//struct split_info **hash_table; // hash that points struct info
|
||||
//struct split_info *interior_node_info = new struct split_info[n-1];
|
||||
splitstable.resize(n);
|
||||
|
||||
int idx_interior = 0;
|
||||
interior_node.init(TREE, NULL);
|
||||
interior_edge.init(TREE, NULL);
|
||||
// node mapes
|
||||
//int *map_nodes;
|
||||
int node_count = 0;
|
||||
|
||||
int temp2 = 2;
|
||||
|
||||
|
||||
// step 1
|
||||
// select last taxon as root
|
||||
node root1 = taxon_number( n-1);
|
||||
|
||||
// step 2 and 3
|
||||
postorder_Iterator it = postorder_begin( root1);
|
||||
|
||||
int l, r;
|
||||
while( *it != root1 )
|
||||
{
|
||||
struct split_info *father_info = interior_node [ it.ancestor() ] ;
|
||||
struct split_info *current_info = interior_node [ *it ] ;
|
||||
|
||||
|
||||
//cout << " node " << *it << " ancestral" << it.ancestor() << endl;
|
||||
if( istaxon(*it) )
|
||||
{
|
||||
// update the map
|
||||
splitstable[ taxon_id( *it) ].map_to_node = r = node_count;
|
||||
splitstable[ node_count ].node_to_map = taxon_id( *it);
|
||||
// check if is the leftmost
|
||||
if( father_info == NULL )
|
||||
{
|
||||
interior_node [ it.ancestor() ] = father_info = &(splitstable[idx_interior]);
|
||||
idx_interior++;
|
||||
//father_info.left_most = *it;
|
||||
father_info->left = node_count;
|
||||
}
|
||||
//else father_info.right = node_count;
|
||||
node_count++;
|
||||
++it;
|
||||
}
|
||||
else
|
||||
{
|
||||
int idx;
|
||||
l = current_info->left;
|
||||
interior_edge[ it.branch() ] = current_info;
|
||||
|
||||
if( father_info == NULL )
|
||||
{
|
||||
interior_node [ it.ancestor() ] = father_info = &(splitstable[idx_interior]);
|
||||
idx_interior++;
|
||||
father_info->left = current_info->left;
|
||||
}
|
||||
|
||||
++it;
|
||||
if (istaxon(*it) || *it==root1) idx = r;
|
||||
else idx = l;
|
||||
|
||||
current_info->right = r;
|
||||
// fill hash table
|
||||
splitstable[ idx ].hash = current_info;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
double phylotreeIND::compare_topology_4(phylotreeIND &other)
|
||||
{
|
||||
// hash table
|
||||
int n = number_of_taxons();
|
||||
|
||||
node_map<struct split_info*> interior_node_2(other.TREE, NULL);
|
||||
vector<struct split_info> split_table2;
|
||||
split_table2.resize(n);
|
||||
|
||||
int node_count = 0;
|
||||
int good_edges = 0;
|
||||
|
||||
|
||||
// allocate memory
|
||||
// step 4
|
||||
node root2 = other.taxon_number( n-1);
|
||||
// father of root2
|
||||
node no_root2 = root2.in_edges_begin()->source();
|
||||
|
||||
postorder_Iterator it = postorder_begin( no_root2, root2);
|
||||
|
||||
|
||||
int idx_interior = 0;
|
||||
|
||||
while( *it != no_root2)
|
||||
{
|
||||
struct split_info *father_info = interior_node_2 [ it.ancestor() ] ;
|
||||
struct split_info *current_info = interior_node_2 [ *it ] ;
|
||||
|
||||
if( istaxon(*it) )
|
||||
{
|
||||
|
||||
if( father_info == NULL)
|
||||
{
|
||||
interior_node_2 [ it.ancestor() ] = father_info = &(split_table2[idx_interior]);
|
||||
father_info->left = n;
|
||||
father_info->right= -1;
|
||||
father_info->num_nodes = 0;
|
||||
idx_interior++;
|
||||
//.left_most == invalid_node ) father_info.left_most = *it;
|
||||
}
|
||||
if( splitstable[ other.taxon_id( *it) ].map_to_node < father_info->left) father_info->left = splitstable[ other.taxon_id( *it) ].map_to_node;
|
||||
if( splitstable[ other.taxon_id( *it) ].map_to_node > father_info->right) father_info->right = splitstable[ other.taxon_id( *it) ].map_to_node;
|
||||
father_info->num_nodes++;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
|
||||
// check hash tables
|
||||
if ( current_info->right - current_info->left + 1 ==
|
||||
current_info->num_nodes)
|
||||
{
|
||||
//cout << "inicio checkeando hash" << current_info->left << " " << current_info->right << endl;
|
||||
//cout << "hash table adress" << hash_table[current_info->left] << " " << hash_table[current_info->right] << endl;
|
||||
if( splitstable[current_info->left].hash !=NULL)
|
||||
if( (splitstable[current_info->left].hash)->left == current_info->left &&
|
||||
(splitstable[current_info->left].hash)->right == current_info->right) good_edges++;
|
||||
if( splitstable[current_info->right].hash!=NULL)
|
||||
if((splitstable[ current_info->right].hash)->left == current_info->left &&
|
||||
(splitstable[ current_info->right].hash)->right == current_info->right) good_edges++;
|
||||
//cout << "fin checkeando hash " << good_edges << endl;
|
||||
}
|
||||
|
||||
if( father_info == NULL)
|
||||
{
|
||||
interior_node_2 [ it.ancestor() ] = father_info = &(split_table2[idx_interior]);
|
||||
father_info->left = n;
|
||||
father_info->right= -1;
|
||||
father_info->num_nodes = 0;
|
||||
idx_interior++;
|
||||
//.left_most == invalid_node ) father_info.left_most = *it;
|
||||
}
|
||||
if( current_info->left < father_info->left) father_info->left = current_info->left;
|
||||
if( current_info->right > father_info->right) father_info->right = current_info->right;
|
||||
father_info->num_nodes += current_info->num_nodes;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
interior_node_2.clear();
|
||||
return TREE.number_of_edges() - number_of_taxons() - good_edges +
|
||||
other.TREE.number_of_edges() - number_of_taxons() - good_edges;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void phylotreeIND::print_splits_2() const
|
||||
{
|
||||
graph::edge_iterator it = TREE.edges_begin();
|
||||
graph::edge_iterator it_e = TREE.edges_end();
|
||||
while( it!= it_e)
|
||||
{
|
||||
cout << get_split_key_2( *it) << endl;
|
||||
//print_split(*it);
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
string phylotreeIND::get_split_key_2( edge edgeaux) const
|
||||
{
|
||||
int n= number_of_taxons();
|
||||
int idx_begin = edgeaux.id()*n;
|
||||
string s(n, '.');
|
||||
if(is_internal(edgeaux))
|
||||
{
|
||||
struct split_info *ref = interior_edge[edgeaux];
|
||||
//cout << ref->left << " " << ref->right << " ";
|
||||
for(int i= ref->left; i <= ref->right; i++)
|
||||
{
|
||||
s[ splitstable[i].node_to_map ] = '*';
|
||||
//cout << splitstable[i].node_to_map << " " ;
|
||||
}
|
||||
}
|
||||
else
|
||||
s[ MAPNODETAXON[edgeaux.target()] ] = '*';
|
||||
return s;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,6 +36,19 @@
|
|||
#include <treeIterator.h>
|
||||
#include <stack>
|
||||
#include <tree_limits.h>
|
||||
#include <functors.h>
|
||||
|
||||
struct split_info
|
||||
{
|
||||
int left, right, num_nodes, map_to_node, node_to_map;
|
||||
split_info *hash;
|
||||
|
||||
split_info() {};
|
||||
split_info(int n) : left(n), right(-1), num_nodes(0), hash(NULL), map_to_node(-1), node_to_map(-1) {};
|
||||
bool invalid() { return right == -1; };
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
class phylotreeIND
|
||||
|
|
@ -47,6 +60,11 @@ class phylotreeIND
|
|||
vector<node> MAPTAXONNODE; // each taxon number points to a node
|
||||
vector<edge> MAPIDEDGE;
|
||||
valarray<double> NJDISTANCES; // distances stored in the edges
|
||||
|
||||
vector<struct split_info> splitstable;
|
||||
node_map<struct split_info*> interior_node;
|
||||
edge_map<struct split_info*> interior_edge;
|
||||
|
||||
//edge_map< node_map<int> > SPLITS; // the splits that each edge separate the graph
|
||||
bool *split_bits;
|
||||
Sequences *seqpatterns;
|
||||
|
|
@ -59,9 +77,6 @@ class phylotreeIND
|
|||
|
||||
|
||||
void init();
|
||||
void SPR(); //SPR operator
|
||||
void NNI(); // NNI operator
|
||||
void TBR(); // TBR operator
|
||||
void taxa_swap(); // taxa swap operator
|
||||
void collapse_node ( node );
|
||||
|
||||
|
|
@ -75,6 +90,7 @@ class phylotreeIND
|
|||
void calculate_splits_from_edge2 ( edge, node );
|
||||
int split_set_edge ( edge , edge );
|
||||
void print_split ( edge ) const;
|
||||
|
||||
void obtain_subtree ( node , node *, list <edge> *, list<node> * );
|
||||
void construct_graph ( const phylotreeIND &, list <node>& , list <edge>& );
|
||||
void change_subtrees();
|
||||
|
|
@ -113,6 +129,9 @@ class phylotreeIND
|
|||
virtual phylotreeIND* clone() const;
|
||||
virtual phylotreeIND* randomClone() const;
|
||||
|
||||
void SPR(); //SPR operator
|
||||
void NNI(); // NNI operator
|
||||
void TBR(); // TBR operator
|
||||
|
||||
GTL::graph TREE; // final tree calculated by NJ
|
||||
// constructors
|
||||
|
|
@ -135,6 +154,7 @@ class phylotreeIND
|
|||
void convert_graph_to_tree ( node, node * );
|
||||
|
||||
void calculate_splits();
|
||||
void calculate_splits4();
|
||||
void calculate_splits_exp();
|
||||
inline void invalidate_splits() { valid_splits = false; }
|
||||
inline void remove_split_memory()
|
||||
|
|
@ -150,6 +170,7 @@ class phylotreeIND
|
|||
}
|
||||
|
||||
string get_split_key ( edge edgeaux ) const;
|
||||
string get_split_key_2 ( edge edgeaux ) const;
|
||||
string get_invert_split_key ( edge edgeaux ) const;
|
||||
|
||||
bool is_internal ( edge ) const;
|
||||
|
|
@ -192,9 +213,10 @@ class phylotreeIND
|
|||
double compare_topology ( phylotreeIND &other );
|
||||
double compare_topology_2 ( phylotreeIND &other );
|
||||
double compare_topology_3 ( phylotreeIND &other );
|
||||
double compare_topology_4(phylotreeIND &other);
|
||||
double robinson_foulds_distance ( phylotreeIND &other, int debug=0 );
|
||||
|
||||
|
||||
void print_splits_2 ( ) const;
|
||||
// iterator
|
||||
postorder_Iterator postorder_begin ( node root, node father ) const
|
||||
{
|
||||
|
|
@ -286,5 +308,21 @@ class phylotreeIND
|
|||
};
|
||||
|
||||
template<typename T> const T& select_edge_at_pos ( const list <T> &, int );
|
||||
|
||||
|
||||
template <class R>
|
||||
class TreeCalculator : public FunctorUnary<phylotreeIND &, R>
|
||||
{
|
||||
public :
|
||||
|
||||
/// virtual dtor here so there is no need to define it in derived classes
|
||||
virtual ~TreeCalculator() {}
|
||||
|
||||
/// The pure virtual function that needs to be implemented by the subclass
|
||||
virtual R operator()(phylotreeIND &) = 0;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
|||
53
contribution/branches/PhyloMOEA/PhyloMOEA/split_test.cpp
Normal file
53
contribution/branches/PhyloMOEA/PhyloMOEA/split_test.cpp
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
#include <eo>
|
||||
#include <phylotreeIND.h>
|
||||
using namespace std;
|
||||
gsl_rng *rn2;
|
||||
RandomNr *rn;
|
||||
//Sequences *seq;
|
||||
long seed;
|
||||
//vector<phylotreeIND> arbores;
|
||||
string datafile, path;
|
||||
phylotreeIND *templatetree_ptr;
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
// measures execution time
|
||||
eoParser parser(argc, argv);
|
||||
datafile = parser.createParam(string(), "data", "Datafile", 'd',"Param").value();
|
||||
path = parser.createParam(string(), "path", "Treefile", 'p',"Param").value();
|
||||
|
||||
cout << "\n\nReading Sequence Datafile..." << path+datafile;
|
||||
|
||||
datafile = path+datafile;
|
||||
Sequences seq6(datafile.c_str());
|
||||
// Sequences seq7("/home/wcancino/experimentos/PhyloMOEA_0.2/omp_tests/datasets/TEST.500_5000");
|
||||
cout << " done.\n";
|
||||
// calculate datafile
|
||||
cout << "calculating pattersn..." << endl;
|
||||
|
||||
seq6.calculate_patterns();
|
||||
seq6.calculate_frequences();
|
||||
|
||||
|
||||
gsl_rng *rn2 = gsl_rng_alloc(gsl_rng_default);
|
||||
RandomNr *rn = new RandomNr(time(NULL));
|
||||
phylotreeIND templatetree6( rn, seq6, rn2);
|
||||
phylotreeIND *test = templatetree6.randomClone();
|
||||
phylotreeIND test2(*test);
|
||||
test2.TBR();
|
||||
|
||||
test->calculate_splits4();
|
||||
test->print_splits_2();
|
||||
test->printNewick(cout);
|
||||
test2.calculate_splits4();
|
||||
cout << "distance " << test->compare_topology_4(test2) << endl;
|
||||
cout << "distance " << test->compare_topology_2(test2) << endl;
|
||||
|
||||
|
||||
// of.close();
|
||||
gsl_rng_free(rn2);
|
||||
// delete probmatrixs;
|
||||
delete rn;
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -65,7 +65,7 @@ int main(int argc, char *argv[])
|
|||
seq6.calculate_frequences();
|
||||
|
||||
ostringstream os;
|
||||
os << datafile << "_results_" << nthreads << ".txt";
|
||||
os << datafile << "_results_serial_" << nthreads << ".txt";
|
||||
ofstream of(os.str().c_str());
|
||||
|
||||
gsl_rng *rn2 = gsl_rng_alloc(gsl_rng_default);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue