diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/CMakeLists.txt b/contribution/branches/PhyloMOEA/PhyloMOEA/CMakeLists.txt index e43eb5f3e..32aa04b7f 100644 --- a/contribution/branches/PhyloMOEA/PhyloMOEA/CMakeLists.txt +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/CMakeLists.txt @@ -63,13 +63,23 @@ SET( UTILITARY_SOURCES eigensolver.cpp utility.cpp ) +SET( SPLIT_TEST_SOURCES RandomNr.cpp + Sequences.cpp + phylotreeIND.cpp + treeIterator.cpp + split_test.cpp +) + + ADD_EXECUTABLE( PhyloMOEA ${PHYLOMOEA_SOURCES} ) ADD_EXECUTABLE( testomp ${TESTOMP_SOURCES} ) ADD_EXECUTABLE( utility ${UTILITARY_SOURCES} ) +ADD_EXECUTABLE( split_test ${SPLIT_TEST_SOURCES} ) TARGET_LINK_LIBRARIES(PhyloMOEA gsl gslcblas GTL eo eoutils ga moeo cma peo rmc_mpi xml2) TARGET_LINK_LIBRARIES(testomp gsl gslcblas GTL eo eoutils ga moeo cma peo rmc_mpi xml2 gomp) TARGET_LINK_LIBRARIES(utility gsl gslcblas GTL eo eoutils ga moeo cma peo rmc_mpi xml2 gomp) +TARGET_LINK_LIBRARIES(split_test gsl gslcblas GTL eo eoutils) INSTALL( TARGETS PhyloMOEA RUNTIME DESTINATION bin) \ No newline at end of file diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/SplitCalculator.h b/contribution/branches/PhyloMOEA/PhyloMOEA/SplitCalculator.h index 5fd8d0e1e..20b5cadf7 100644 --- a/contribution/branches/PhyloMOEA/PhyloMOEA/SplitCalculator.h +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/SplitCalculator.h @@ -3,11 +3,15 @@ #include -struct split_info + struct split_table { - int left, right, num_nodes; - split_info() {}; - split_info(int n) : left(n), right(-1), num_nodes(0) {}; + phylotreeIND &tree; + vector splitstable; + node_map interior_node; + edge_map interior_edge; + + split_table( phylotreeIND &t ) : tree(t) {}; + }; @@ -24,12 +28,15 @@ public : virtual ~SplitCalculator() {} /// The pure virtual function that needs to be implemented by the subclass + struct split_info operator()(phylotreeIND &tree) { // hash table int n = tree.number_of_taxons(); struct split_info **hash_table; // hash that points struct info struct split_info *interior_node_info = new struct split_info[n-1]; + vector splitstable; + splitstable.resize(n); int idx_interior = 0; node_map interior_node(tree.TREE, NULL); @@ -37,8 +44,9 @@ public : // node mapes int *map_nodes; int node_count = 0; - int good_edges = 0; + int temp2 = 2; + // allocate memory hash_table = new struct split_info*[n]; @@ -47,7 +55,6 @@ public : // step 1 // select last taxon as root - node invalid_node; node root1 = tree.taxon_number( n-1); // step 2 and 3 @@ -58,17 +65,20 @@ public : { struct split_info *father_info = interior_node [ it.ancestor() ] ; struct split_info *current_info = interior_node [ *it ] ; + //cout << " node " << *it << " ancestral" << it.ancestor() << endl; if( tree.istaxon(*it) ) { // update the map map_nodes[ tree.taxon_id( *it) ] = r = node_count; - + splitstable[ tree.taxon_id( *it) ].map_to_node = node_count; + splitstable[ node_count ] = tree.taxon_id( *it); // check if is the leftmost if( father_info == NULL ) { interior_node [ it.ancestor() ] = father_info = &interior_node_info[idx_interior]; + interior_node [ it.ancestor() ] = father_info = &(splitstable[idx_interior]); idx_interior++; //father_info.left_most = *it; father_info->left = node_count; @@ -86,6 +96,7 @@ public : if( father_info == NULL ) { interior_node [ it.ancestor() ] = father_info = &interior_node_info[idx_interior]; + interior_node [ it.ancestor() ] = father_info = &(splitstable[idx_interior]); idx_interior++; father_info->left = current_info->left; } @@ -97,13 +108,12 @@ public : current_info->right = r; // fill hash table hash_table[ idx ] = current_info; + splitstable[ idx ].hash = current_info; } } delete [] interior_node_info; delete [] map_nodes; delete [] hash_table; - } - }; #endif \ No newline at end of file diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.cpp index 6f9353931..fdf723c0b 100644 --- a/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.cpp +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.cpp @@ -39,6 +39,7 @@ struct temp_info }; + // return the element at position pos of the list template const T& select_edge_at_pos( const list &l, int pos) { @@ -1804,7 +1805,7 @@ double phylotreeIND::compare_topology_2(phylotreeIND &other) node invalid_node; node root1 = taxon_number( n-1); - // step 2 and 3 + // step 2 and 3+ postorder_Iterator it = postorder_begin( root1); /* while( *it != root1 ) @@ -2161,3 +2162,198 @@ double phylotreeIND::compare_topology_3(phylotreeIND &other) return TREE.number_of_edges() - number_of_taxons() - good_edges + other.TREE.number_of_edges() - number_of_taxons() - good_edges; } + + +void phylotreeIND::calculate_splits4() +{ + // hash table + int n = number_of_taxons(); + //struct split_info **hash_table; // hash that points struct info + //struct split_info *interior_node_info = new struct split_info[n-1]; + splitstable.resize(n); + + int idx_interior = 0; + interior_node.init(TREE, NULL); + interior_edge.init(TREE, NULL); + // node mapes + //int *map_nodes; + int node_count = 0; + + int temp2 = 2; + + + // step 1 + // select last taxon as root + node root1 = taxon_number( n-1); + + // step 2 and 3 + postorder_Iterator it = postorder_begin( root1); + + int l, r; + while( *it != root1 ) + { + struct split_info *father_info = interior_node [ it.ancestor() ] ; + struct split_info *current_info = interior_node [ *it ] ; + + + //cout << " node " << *it << " ancestral" << it.ancestor() << endl; + if( istaxon(*it) ) + { + // update the map + splitstable[ taxon_id( *it) ].map_to_node = r = node_count; + splitstable[ node_count ].node_to_map = taxon_id( *it); + // check if is the leftmost + if( father_info == NULL ) + { + interior_node [ it.ancestor() ] = father_info = &(splitstable[idx_interior]); + idx_interior++; + //father_info.left_most = *it; + father_info->left = node_count; + } + //else father_info.right = node_count; + node_count++; + ++it; + } + else + { + int idx; + l = current_info->left; + interior_edge[ it.branch() ] = current_info; + + if( father_info == NULL ) + { + interior_node [ it.ancestor() ] = father_info = &(splitstable[idx_interior]); + idx_interior++; + father_info->left = current_info->left; + } + + ++it; + if (istaxon(*it) || *it==root1) idx = r; + else idx = l; + + current_info->right = r; + // fill hash table + splitstable[ idx ].hash = current_info; + } + } +} + + + +double phylotreeIND::compare_topology_4(phylotreeIND &other) +{ + // hash table + int n = number_of_taxons(); + + node_map interior_node_2(other.TREE, NULL); + vector split_table2; + split_table2.resize(n); + + int node_count = 0; + int good_edges = 0; + + + // allocate memory + // step 4 + node root2 = other.taxon_number( n-1); + // father of root2 + node no_root2 = root2.in_edges_begin()->source(); + + postorder_Iterator it = postorder_begin( no_root2, root2); + + + int idx_interior = 0; + + while( *it != no_root2) + { + struct split_info *father_info = interior_node_2 [ it.ancestor() ] ; + struct split_info *current_info = interior_node_2 [ *it ] ; + + if( istaxon(*it) ) + { + + if( father_info == NULL) + { + interior_node_2 [ it.ancestor() ] = father_info = &(split_table2[idx_interior]); + father_info->left = n; + father_info->right= -1; + father_info->num_nodes = 0; + idx_interior++; + //.left_most == invalid_node ) father_info.left_most = *it; + } + if( splitstable[ other.taxon_id( *it) ].map_to_node < father_info->left) father_info->left = splitstable[ other.taxon_id( *it) ].map_to_node; + if( splitstable[ other.taxon_id( *it) ].map_to_node > father_info->right) father_info->right = splitstable[ other.taxon_id( *it) ].map_to_node; + father_info->num_nodes++; + } + else + { + + + // check hash tables + if ( current_info->right - current_info->left + 1 == + current_info->num_nodes) + { + //cout << "inicio checkeando hash" << current_info->left << " " << current_info->right << endl; + //cout << "hash table adress" << hash_table[current_info->left] << " " << hash_table[current_info->right] << endl; + if( splitstable[current_info->left].hash !=NULL) + if( (splitstable[current_info->left].hash)->left == current_info->left && + (splitstable[current_info->left].hash)->right == current_info->right) good_edges++; + if( splitstable[current_info->right].hash!=NULL) + if((splitstable[ current_info->right].hash)->left == current_info->left && + (splitstable[ current_info->right].hash)->right == current_info->right) good_edges++; + //cout << "fin checkeando hash " << good_edges << endl; + } + + if( father_info == NULL) + { + interior_node_2 [ it.ancestor() ] = father_info = &(split_table2[idx_interior]); + father_info->left = n; + father_info->right= -1; + father_info->num_nodes = 0; + idx_interior++; + //.left_most == invalid_node ) father_info.left_most = *it; + } + if( current_info->left < father_info->left) father_info->left = current_info->left; + if( current_info->right > father_info->right) father_info->right = current_info->right; + father_info->num_nodes += current_info->num_nodes; + } + ++it; + } + interior_node_2.clear(); + return TREE.number_of_edges() - number_of_taxons() - good_edges + + other.TREE.number_of_edges() - number_of_taxons() - good_edges; +} + + + +void phylotreeIND::print_splits_2() const +{ + graph::edge_iterator it = TREE.edges_begin(); + graph::edge_iterator it_e = TREE.edges_end(); + while( it!= it_e) + { + cout << get_split_key_2( *it) << endl; + //print_split(*it); + ++it; + } +} + +string phylotreeIND::get_split_key_2( edge edgeaux) const +{ + int n= number_of_taxons(); + int idx_begin = edgeaux.id()*n; + string s(n, '.'); + if(is_internal(edgeaux)) + { + struct split_info *ref = interior_edge[edgeaux]; + //cout << ref->left << " " << ref->right << " "; + for(int i= ref->left; i <= ref->right; i++) + { + s[ splitstable[i].node_to_map ] = '*'; + //cout << splitstable[i].node_to_map << " " ; + } + } + else + s[ MAPNODETAXON[edgeaux.target()] ] = '*'; + return s; +} diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.h b/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.h index d02fbde66..6dfc87490 100644 --- a/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.h +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.h @@ -36,6 +36,19 @@ #include #include #include +#include + +struct split_info +{ + int left, right, num_nodes, map_to_node, node_to_map; + split_info *hash; + + split_info() {}; + split_info(int n) : left(n), right(-1), num_nodes(0), hash(NULL), map_to_node(-1), node_to_map(-1) {}; + bool invalid() { return right == -1; }; +}; + + class phylotreeIND @@ -47,6 +60,11 @@ class phylotreeIND vector MAPTAXONNODE; // each taxon number points to a node vector MAPIDEDGE; valarray NJDISTANCES; // distances stored in the edges + + vector splitstable; + node_map interior_node; + edge_map interior_edge; + //edge_map< node_map > SPLITS; // the splits that each edge separate the graph bool *split_bits; Sequences *seqpatterns; @@ -59,9 +77,6 @@ class phylotreeIND void init(); - void SPR(); //SPR operator - void NNI(); // NNI operator - void TBR(); // TBR operator void taxa_swap(); // taxa swap operator void collapse_node ( node ); @@ -75,6 +90,7 @@ class phylotreeIND void calculate_splits_from_edge2 ( edge, node ); int split_set_edge ( edge , edge ); void print_split ( edge ) const; + void obtain_subtree ( node , node *, list *, list * ); void construct_graph ( const phylotreeIND &, list & , list & ); void change_subtrees(); @@ -113,6 +129,9 @@ class phylotreeIND virtual phylotreeIND* clone() const; virtual phylotreeIND* randomClone() const; + void SPR(); //SPR operator + void NNI(); // NNI operator + void TBR(); // TBR operator GTL::graph TREE; // final tree calculated by NJ // constructors @@ -135,6 +154,7 @@ class phylotreeIND void convert_graph_to_tree ( node, node * ); void calculate_splits(); + void calculate_splits4(); void calculate_splits_exp(); inline void invalidate_splits() { valid_splits = false; } inline void remove_split_memory() @@ -150,6 +170,7 @@ class phylotreeIND } string get_split_key ( edge edgeaux ) const; + string get_split_key_2 ( edge edgeaux ) const; string get_invert_split_key ( edge edgeaux ) const; bool is_internal ( edge ) const; @@ -192,9 +213,10 @@ class phylotreeIND double compare_topology ( phylotreeIND &other ); double compare_topology_2 ( phylotreeIND &other ); double compare_topology_3 ( phylotreeIND &other ); + double compare_topology_4(phylotreeIND &other); double robinson_foulds_distance ( phylotreeIND &other, int debug=0 ); - + void print_splits_2 ( ) const; // iterator postorder_Iterator postorder_begin ( node root, node father ) const { @@ -286,5 +308,21 @@ class phylotreeIND }; template const T& select_edge_at_pos ( const list &, int ); + + +template +class TreeCalculator : public FunctorUnary +{ +public : + + /// virtual dtor here so there is no need to define it in derived classes + virtual ~TreeCalculator() {} + + /// The pure virtual function that needs to be implemented by the subclass + virtual R operator()(phylotreeIND &) = 0; + +}; + + #endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/split_test.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/split_test.cpp new file mode 100644 index 000000000..8cef97a4c --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/split_test.cpp @@ -0,0 +1,53 @@ +#include +#include +using namespace std; +gsl_rng *rn2; +RandomNr *rn; +//Sequences *seq; +long seed; +//vector arbores; +string datafile, path; +phylotreeIND *templatetree_ptr; + + +int main(int argc, char *argv[]) +{ + // measures execution time + eoParser parser(argc, argv); + datafile = parser.createParam(string(), "data", "Datafile", 'd',"Param").value(); + path = parser.createParam(string(), "path", "Treefile", 'p',"Param").value(); + + cout << "\n\nReading Sequence Datafile..." << path+datafile; + + datafile = path+datafile; + Sequences seq6(datafile.c_str()); +// Sequences seq7("/home/wcancino/experimentos/PhyloMOEA_0.2/omp_tests/datasets/TEST.500_5000"); + cout << " done.\n"; + // calculate datafile + cout << "calculating pattersn..." << endl; + + seq6.calculate_patterns(); + seq6.calculate_frequences(); + + + gsl_rng *rn2 = gsl_rng_alloc(gsl_rng_default); + RandomNr *rn = new RandomNr(time(NULL)); + phylotreeIND templatetree6( rn, seq6, rn2); + phylotreeIND *test = templatetree6.randomClone(); + phylotreeIND test2(*test); + test2.TBR(); + + test->calculate_splits4(); + test->print_splits_2(); + test->printNewick(cout); + test2.calculate_splits4(); + cout << "distance " << test->compare_topology_4(test2) << endl; + cout << "distance " << test->compare_topology_2(test2) << endl; + + +// of.close(); + gsl_rng_free(rn2); + // delete probmatrixs; + delete rn; + return 0; +} \ No newline at end of file diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/testomp.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/testomp.cpp index d532bed51..78266000d 100644 --- a/contribution/branches/PhyloMOEA/PhyloMOEA/testomp.cpp +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/testomp.cpp @@ -65,7 +65,7 @@ int main(int argc, char *argv[]) seq6.calculate_frequences(); ostringstream os; - os << datafile << "_results_" << nthreads << ".txt"; + os << datafile << "_results_serial_" << nthreads << ".txt"; ofstream of(os.str().c_str()); gsl_rng *rn2 = gsl_rng_alloc(gsl_rng_default);