From aa33715685149fd20892364504e5cc6b523e7c9c Mon Sep 17 00:00:00 2001 From: wcancino Date: Thu, 15 Jan 2009 15:27:23 +0000 Subject: [PATCH] First commit of PhyloMOEO files with some additions to Paradiseo-MOEO git-svn-id: svn://scm.gforge.inria.fr/svnroot/paradiseo@1335 331e1502-861f-0410-8da2-ba01fb791d7f --- .../PhyloMOEA/PhyloMOEA/ExceptionManager.h | 57 + .../branches/PhyloMOEA/PhyloMOEA/Makefile.am | 27 + .../branches/PhyloMOEA/PhyloMOEA/Makefile.in | 509 ++++ .../branches/PhyloMOEA/PhyloMOEA/Patterns.h | 54 + .../PhyloMOEA/PhyloMOEA/PhyloMOEA.cpp | 236 ++ .../branches/PhyloMOEA/PhyloMOEA/PhyloMOEO.h | 83 + .../PhyloMOEA/PhyloMOEOPartitionStat.h | 187 ++ .../PhyloMOEOProbMatrixContainerUpdater.h | 35 + .../PhyloMOEA/PhyloMOEA/PhyloMOEO_archive.h | 108 + .../PhyloMOEA/PhyloMOEA/PhyloMOEO_eval.h | 55 + .../PhyloMOEA/PhyloMOEA/PhyloMOEO_init.h | 51 + .../PhyloMOEA/PhyloMOEA/PhyloMOEO_operators.h | 61 + .../PhyloMOEA/PhyloMOEA/ProbMatrix.cpp | 178 ++ .../branches/PhyloMOEA/PhyloMOEA/ProbMatrix.h | 64 + .../branches/PhyloMOEA/PhyloMOEA/RandomNr.cpp | 238 ++ .../branches/PhyloMOEA/PhyloMOEA/RandomNr.h | 116 + .../PhyloMOEA/PhyloMOEA/Sequences.cpp | 276 +++ .../branches/PhyloMOEA/PhyloMOEA/Sequences.h | 89 + .../PhyloMOEA/PhyloMOEA/SubsModel.cpp | 214 ++ .../branches/PhyloMOEA/PhyloMOEA/SubsModel.h | 74 + .../PhyloMOEA/PhyloMOEA/eigensolver.cpp | 77 + .../PhyloMOEA/PhyloMOEA/eigensolver.h | 45 + .../PhyloMOEA/eoCountedFileMonitor.h | 66 + .../PhyloMOEA/eoSingleFileCountedStateSaver.h | 79 + .../PhyloMOEA/likelihoodcalculator.cpp | 544 +++++ .../PhyloMOEA/likelihoodcalculator.h | 142 ++ .../PhyloMOEA/PhyloMOEA/likoptimizer.cpp | 234 ++ .../PhyloMOEA/PhyloMOEA/likoptimizer.h | 121 + .../PhyloMOEA/PhyloMOEA/matrixutils.cpp | 907 +++++++ .../PhyloMOEA/PhyloMOEA/matrixutils.h | 44 + .../PhyloMOEA/PhyloMOEA/moeoObjVecStat.h | 148 ++ .../PhyloMOEA/PhyloMOEA/moeoPtrComparator.h | 41 + .../PhyloMOEA/parsimonycalculator.cpp | 332 +++ .../PhyloMOEA/PhyloMOEA/parsimonycalculator.h | 61 + .../PhyloMOEA/PhyloMOEA/phylotreeIND.cpp | 2161 +++++++++++++++++ .../PhyloMOEA/PhyloMOEA/phylotreeIND.h | 290 +++ .../PhyloMOEA/probmatrixcontainer.cpp | 23 + .../PhyloMOEA/PhyloMOEA/probmatrixcontainer.h | 93 + .../PhyloMOEA/PhyloMOEA/treeIterator.cpp | 179 ++ .../PhyloMOEA/PhyloMOEA/treeIterator.h | 104 + .../PhyloMOEA/PhyloMOEA/tree_limits.h | 25 + .../branches/PhyloMOEA/PhyloMOEA/utils.cpp | 111 + .../branches/PhyloMOEA/PhyloMOEA/utils.h | 34 + .../PhyloMOEA/PhyloMOEA/vectorSortIndex.h | 50 + 44 files changed, 8623 insertions(+) create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/ExceptionManager.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/Makefile.am create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/Makefile.in create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/Patterns.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEA.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEOPartitionStat.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEOProbMatrixContainerUpdater.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_archive.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_eval.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_init.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_operators.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/ProbMatrix.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/ProbMatrix.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/RandomNr.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/RandomNr.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/Sequences.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/Sequences.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/SubsModel.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/SubsModel.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/eigensolver.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/eigensolver.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/eoCountedFileMonitor.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/eoSingleFileCountedStateSaver.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/likelihoodcalculator.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/likelihoodcalculator.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/likoptimizer.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/likoptimizer.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/matrixutils.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/matrixutils.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/moeoObjVecStat.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/moeoPtrComparator.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/parsimonycalculator.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/parsimonycalculator.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/probmatrixcontainer.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/probmatrixcontainer.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/treeIterator.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/treeIterator.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/tree_limits.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/utils.cpp create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/utils.h create mode 100644 contribution/branches/PhyloMOEA/PhyloMOEA/vectorSortIndex.h diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/ExceptionManager.h b/contribution/branches/PhyloMOEA/PhyloMOEA/ExceptionManager.h new file mode 100644 index 000000000..e91133239 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/ExceptionManager.h @@ -0,0 +1,57 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#include +#include +#include +#ifndef _ExceptionManager_H_ +#define _ExceptionManager_H_ + +using namespace std; + +class ExceptionManager +{ + string msg; + short number; + public: + ExceptionManager(int n) { + number = n; + switch(number){ + case 10: + msg = "Could not open file\n"; + break; + case 11: + msg ="Incorrect file format\n"; + break; + case 12: + msg = "Could not write to file\n"; + break; + } + }; + void Report() + { + cout << endl << msg << endl; + cout << "PhyloMOEA was finished because errors." << endl; + exit(1); + } +}; + +#endif + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/Makefile.am b/contribution/branches/PhyloMOEA/PhyloMOEA/Makefile.am new file mode 100644 index 000000000..3e11ec5b3 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/Makefile.am @@ -0,0 +1,27 @@ +INCLUDES = -I$(top_srcdir)/. -I$(top_srcdir)/eo -I$(top_srcdir)/mo \ + -I$(top_srcdir)/moeo -I$(top_srcdir)/moeo/core +METASOURCES = AUTO +bin_PROGRAMS = PhyloMOEA +PhyloMOEA_SOURCES = ExceptionManager.h Patterns.h PhyloMOEA.cpp ProbMatrix.cpp \ + ProbMatrix.h RandomNr.cpp RandomNr.h Sequences.cpp Sequences.h SubsModel.cpp \ + SubsModel.h eigensolver.cpp eigensolver.h likelihoodcalculator.cpp \ + likelihoodcalculator.h likoptimizer.cpp likoptimizer.h matrixutils.cpp matrixutils.h \ + parsimonycalculator.cpp parsimonycalculator.h phylotreeIND.cpp phylotreeIND.h \ + probmatrixcontainer.cpp probmatrixcontainer.h treeIterator.cpp treeIterator.h tree_limits.h \ + utils.cpp +PhyloMOEA_LDADD = $(top_builddir)/moeo/core/libmoeo.la \ + $(top_builddir)/eo/utils/libutils.la $(top_builddir)/eo/ga/libga.la $(top_builddir)/eo/libeo.la -lGTL -lgsl \ + -lgslcblas +noinst_HEADERS = PhyloMOEOProbMatrixContainerUpdater.h moeoObjVecStat.h \ + vectorSortIndex.h +_SOURCES = PhyloMOEO.h +_SOURCES = PhyloMOEO.h PhyloMOEO_operators.h +_SOURCES = PhyloMOEO.h PhyloMOEO_init.h PhyloMOEO_operators.h +_SOURCES = PhyloMOEO.h PhyloMOEO_eval.h PhyloMOEO_init.h PhyloMOEO_operators.h +_SOURCES = PhyloMOEO.h PhyloMOEO_eval.h PhyloMOEO_init.h PhyloMOEO_operators.h \ + utils.h +_SOURCES = eoCountedFileMonitor.h +_SOURCES = eoCountedFileMonitor.h eoSingleFileCountedStateSaver.h +_SOURCES = moeoPtrComparator.h +_SOURCES = PhyloMOEO_archive.h +_SOURCES = PhyloMOEOPartitionStat.h PhyloMOEO_archive.h diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/Makefile.in b/contribution/branches/PhyloMOEA/PhyloMOEA/Makefile.in new file mode 100644 index 000000000..30b046a24 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/Makefile.in @@ -0,0 +1,509 @@ +# Makefile.in generated by automake 1.10.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +bin_PROGRAMS = PhyloMOEA$(EXEEXT) +subdir = moeo/PhyloMOEA +DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.in +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" +binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) +PROGRAMS = $(bin_PROGRAMS) +am_PhyloMOEA_OBJECTS = PhyloMOEA.$(OBJEXT) ProbMatrix.$(OBJEXT) \ + RandomNr.$(OBJEXT) Sequences.$(OBJEXT) SubsModel.$(OBJEXT) \ + eigensolver.$(OBJEXT) likelihoodcalculator.$(OBJEXT) \ + likoptimizer.$(OBJEXT) matrixutils.$(OBJEXT) \ + parsimonycalculator.$(OBJEXT) phylotreeIND.$(OBJEXT) \ + probmatrixcontainer.$(OBJEXT) treeIterator.$(OBJEXT) \ + utils.$(OBJEXT) +PhyloMOEA_OBJECTS = $(am_PhyloMOEA_OBJECTS) +PhyloMOEA_DEPENDENCIES = $(top_builddir)/moeo/core/libmoeo.la \ + $(top_builddir)/eo/utils/libutils.la \ + $(top_builddir)/eo/ga/libga.la $(top_builddir)/eo/libeo.la +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(PhyloMOEA_SOURCES) +DIST_SOURCES = $(PhyloMOEA_SOURCES) +HEADERS = $(noinst_HEADERS) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DSYMUTIL = @DSYMUTIL@ +ECHO = @ECHO@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +NMEDIT = @NMEDIT@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_F77 = @ac_ct_F77@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +INCLUDES = -I$(top_srcdir)/. -I$(top_srcdir)/eo -I$(top_srcdir)/mo \ + -I$(top_srcdir)/moeo -I$(top_srcdir)/moeo/core + +METASOURCES = AUTO +PhyloMOEA_SOURCES = ExceptionManager.h Patterns.h PhyloMOEA.cpp ProbMatrix.cpp \ + ProbMatrix.h RandomNr.cpp RandomNr.h Sequences.cpp Sequences.h SubsModel.cpp \ + SubsModel.h eigensolver.cpp eigensolver.h likelihoodcalculator.cpp \ + likelihoodcalculator.h likoptimizer.cpp likoptimizer.h matrixutils.cpp matrixutils.h \ + parsimonycalculator.cpp parsimonycalculator.h phylotreeIND.cpp phylotreeIND.h \ + probmatrixcontainer.cpp probmatrixcontainer.h treeIterator.cpp treeIterator.h tree_limits.h \ + utils.cpp + +PhyloMOEA_LDADD = $(top_builddir)/moeo/core/libmoeo.la \ + $(top_builddir)/eo/utils/libutils.la $(top_builddir)/eo/ga/libga.la $(top_builddir)/eo/libeo.la -lGTL -lgsl \ + -lgslcblas + +noinst_HEADERS = PhyloMOEOProbMatrixContainerUpdater.h moeoObjVecStat.h \ + vectorSortIndex.h + +_SOURCES = PhyloMOEOPartitionStat.h PhyloMOEO_archive.h +all: all-am + +.SUFFIXES: +.SUFFIXES: .cpp .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu moeo/PhyloMOEA/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu moeo/PhyloMOEA/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)" + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + if test -f $$p \ + || test -f $$p1 \ + ; then \ + f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \ + else :; fi; \ + done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \ + echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \ + rm -f "$(DESTDIR)$(bindir)/$$f"; \ + done + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done +PhyloMOEA$(EXEEXT): $(PhyloMOEA_OBJECTS) $(PhyloMOEA_DEPENDENCIES) + @rm -f PhyloMOEA$(EXEEXT) + $(CXXLINK) $(PhyloMOEA_OBJECTS) $(PhyloMOEA_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/PhyloMOEA.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ProbMatrix.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/RandomNr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Sequences.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/SubsModel.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/eigensolver.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/likelihoodcalculator.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/likoptimizer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matrixutils.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parsimonycalculator.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/phylotreeIND.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/probmatrixcontainer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/treeIterator.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils.Po@am__quote@ + +.cpp.o: +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cpp.lo: +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(bindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-exec-am: install-binPROGRAMS + +install-html: install-html-am + +install-info: install-info-am + +install-man: + +install-pdf: install-pdf-am + +install-ps: install-ps-am + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \ + clean-generic clean-libtool ctags distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-binPROGRAMS install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags uninstall uninstall-am \ + uninstall-binPROGRAMS + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/Patterns.h b/contribution/branches/PhyloMOEA/PhyloMOEA/Patterns.h new file mode 100644 index 000000000..0914ec56d --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/Patterns.h @@ -0,0 +1,54 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef PATTERNS_H +#define PATTERNS_H +#include +#include +#include + +class Patterns +{ + private: + int num_patterns; + + double freqs[4]; + std::vector pattern; + std::vector pattern_count; + Sequences &seqs; + public: + // return a pattern content + Patterns(Sequences &S); + ~Patterns(); + unsigned char* operator[](int n) const { return pattern[n]; } + inline int count(int n) const { return pattern_count[n]; } + inline int count() const { return num_patterns; } + inline double *frequences() { return freqs; } + inline double frequence(int i) const { return freqs[i]; } + inline std::string pattern_name(int i) const { return seqs.seqname(i); } + inline int search_taxon_name(std::string &s) const { return seqs.search_taxon_name(s); } + inline bool is_ambiguous(short int l) const { return seqs.is_ambiguous(l); } + inline bool is_gap(short int l) const { return seqs.is_gap(l); } + inline bool is_undefined(short int l) const { return seqs.is_undefined(l); } + inline unsigned char* ambiguos_meaning(short int l) const { return seqs.ambiguos_meaning(l); } + void calculate_frequences(); + int num_sequences(); +}; +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEA.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEA.cpp new file mode 100644 index 000000000..945d96b9f --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEA.cpp @@ -0,0 +1,236 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +gsl_rng *rn2; +RandomNr *rn; +//Sequences *seq; +long seed; +//vector arbores; +string datafile,usertree, expid, path; +double pcrossover, pmutation, kappa, alpha; +unsigned int ngenerations, popsize, ncats; +ofstream exp_data,evolution_data, best_media_scores, final_trees, final_pareto_trees, clades_pareto, clades_final,final_scores,pareto_scores; + + +int main(int argc, char *argv[]) +{ + welcome_message(); + + eoParser parser(argc, argv); + + + seed = parser.createParam((unsigned int)(time(NULL)), "seed", "Random Seed", 's',"Param").value(); + + popsize = parser.createParam((unsigned int)(50), "popSize", "Population size", 'n',"Param").value(); + pcrossover = parser.createParam((double)(0.8), "pcross", "Crossover Rate", 'c',"Param").value(); + pmutation = parser.createParam((double)(0.1), "pmut", "Mutation Rate", 'm',"Param").value(); + ngenerations = parser.createParam((unsigned int)(500), "nGen", "Number of Generations", 'g',"Param").value(); + kappa = parser.createParam((double)(4.0), "kappa", "Kappa value", 'k',"Param").value(); + alpha = parser.createParam((double)(2.0), "alpha", "Alpha value", 'a',"Param").value(); + ncats = parser.createParam((unsigned int)(4), "nCat", "Number of Categories", 'r',"Param").value(); + datafile = parser.createParam(string(), "data", "Datafile", 'd',"Param").value(); + usertree = parser.createParam(string(), "treef", "Treefile", 't',"Param").value(); + path = parser.createParam(string(), "path", "Treefile", 'p',"Param").value(); + ostringstream convert; + convert << seed; + expid = parser.createParam(convert.str(), "expid", "Experiment ID", 'e',"Param").value(); + + + if( datafile.size()==0 ) + { + parser.printHelp( cout ); + return(-1); + } + + string filename = path + datafile; + cout << "\n\nReading Sequence Datafile..."; + Sequences seq(filename.c_str()); + cout << " done.\n"; + // calculate datafile + seq.calculate_patterns(); + seq.calculate_frequences(); + + rn2 = gsl_rng_alloc(gsl_rng_default); + rn = new RandomNr(seed); + phylotreeIND templatetree( rn, seq, rn2); + ParsimonyCalculator parsi_calc(templatetree); + SubstModel modelHKY( seq, SubstModel::HKY85); + modelHKY.init(); + modelHKY.set_kappa(kappa); // banco_grande + ProbMatrixContainer probmatrixs(modelHKY); + LikelihoodCalculator lik_calc(templatetree, modelHKY, probmatrixs,ncats); + lik_calc.set_alpha(alpha); + modelHKY.init(); + PhyloEval byobj( parsi_calc, lik_calc ); + + Phyloraninit initializer(templatetree); + + eoState state; + + //eoPop &population = state.takeOwnership(eoPop(popsize, initializer)); + + eoPop population(popsize, initializer); + //state.registerObject( population ); + + cout << "\n\nReading Initial Trees..."; + if( usertree.size() >0) + { + filename = path + usertree; + readtrees(filename.c_str(), population); + } + cout << " done.\n"; + + + cout << "\n\nCreating output files..."; + + try{ + filename = path + datafile + "_exp_param_" + expid + ".txt"; + exp_data.open(filename.c_str()); + exp_data.precision(15); + exp_data.setf(ios::fixed); + + + + if( !exp_data.is_open() ) + { + throw( ExceptionManager(12) ); + } + cout << " done.\n"; + } + catch ( ExceptionManager e ) + { + e.Report(); + } + // create the moea + save_exp_params(exp_data); + seq.save_seq_data(exp_data); + + moeoAverageObjVecStat bestfit; + moeoBestObjVecStat avgfit; + eoPopStat popstats; + + eoCountedFileMonitor media_scores( 2, path + datafile + "_media_scores_" + expid + ".txt", "\t", true,true ); + media_scores.add( bestfit); + media_scores.add( avgfit) ; + + eoCountedFileMonitor evolution_scores( 2, path + datafile + "_evolution_data_" + expid + ".txt", "\n", true,true ); + evolution_scores.add( popstats); + + + cout << "\n\nRunning NSGA-II ..." << endl; + + eoGenContinue continuator(ngenerations); + eoCheckPoint cp(continuator); + eoValueParam generationCounter(0, "Gen."); + eoIncrementor increment(generationCounter.value()); + cp.add(increment); + eoStdoutMonitor monitor(false); + monitor.add(generationCounter); + cp.add(monitor); + Phylomutate mutator; + Phylocross crossover; + eoSequentialOp operadores; + operadores.add(crossover,pcrossover); + operadores.add(mutator,pmutation); + PhyloMOEOProbMatrixContainerUpdater probmatrixupdater(probmatrixs); + cp.add( bestfit); + cp.add( avgfit); + cp.add( media_scores); + cp.add( evolution_scores ); + cp.add( popstats); + cp.add( probmatrixupdater ); + +// apply ( byobj, population ); +// population.printOn(cout); + moeoNSGAII < PhyloMOEO > nsgaII (cp, byobj, operadores); + + nsgaII(population); + + cout << "\nCalculating Final Solutions..."; + cout << " done\n"; + + PhyloMOEOFinalSolutionsArchive finalsolutions; + finalsolutions.update(population); + + //remove_final_solutions( population ); + // optimize remaining solutions + cout << "\nOptimizing tree branch lenghts...\n"; + + optimize_solutions( finalsolutions, lik_calc ); + cout << "\nReevaluating individuals \n"; + apply ( byobj, finalsolutions ); + + finalsolutions.save_scores(path + datafile + "_final_scores_" + expid + ".txt","#Final Solutions Scores"); + finalsolutions.save_trees(path + datafile + "_final_trees_" + expid + ".txt"); + cout << "\ndone \n"; + + // print the optimized solutions + //print_scores_pop( -2, population, evolution_data); + + //print_scores_pop( -2, population, final_scores); + + //save_trees(finalsolutions, final_trees); + cout << "\n\nCalculating Final Solutions clade support..."; + + PhyloMOEOPartitionStat splitstats; + splitstats(finalsolutions); + eoFileMonitor finalsplitstatsaver(path+datafile+"_clades_final_"+expid+".txt"); + finalsplitstatsaver.add(splitstats); + finalsplitstatsaver(); + //cout << splitstats.value() << endl; + //partition_map split_frequences; + //calculate_frequence_splits(finalsolutions,split_frequences); + cout << " done\n"; + //save_partitions(splitstats.value(), clades_final); + //split_frequences.clear(); + // remove dominate solutions + cout << "\nCalculating Pareto-optimal Solutions..."; + + PhyloMOEOParetoSolutionsArchive paretosolutions; + paretosolutions.update(finalsolutions); + paretosolutions.save_scores(path + datafile + "_pareto_scores_" + expid + ".txt","#Pareto Solutions Scores"); + paretosolutions.save_trees(path + datafile + "_pareto_trees_" + expid + ".txt"); + cout << " done\n"; + // print final pareto trees + //save_trees( paretosolutions, final_pareto_trees); + cout << "\nCalculating Pareto-optimal Solutions clade support..."; + splitstats(paretosolutions); + //calculate_frequence_splits(paretosolutions,split_frequences); + eoFileMonitor paretosplitstatsaver(path+datafile+"_clades_pareto_"+expid+".txt"); + paretosplitstatsaver.add(splitstats); + paretosplitstatsaver(); + +// save_partitions(splitstats.value(), clades_pareto); + //split_frequences.clear(); + cout << " done\n"; + exp_data.close(); + evolution_data.close(); + pareto_scores.close(); + final_scores.close(); + best_media_scores.close(); + final_trees.close(); + final_pareto_trees.close(); + clades_pareto.close(); + clades_final.close(); + gsl_rng_free(rn2); +// delete probmatrixs; + delete rn; + cout << "\nPhyloMOEA execution finishes !\n"; + return 0; +} + + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO.h b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO.h new file mode 100644 index 000000000..f77036786 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO.h @@ -0,0 +1,83 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef PHYLOMOEO_H_ +#define PHYLOMOEO_H_ + +#include "phylotreeIND.h" +#include + +class ObjectiveVectorTraits : public moeoObjectiveVectorTraits +{ +public: + static bool minimizing (int i) + { + return true; + } + static bool maximizing (int i) + { + return false; + } + static unsigned int nObjectives () + { + return 2; + } +}; + +typedef moeoRealObjectiveVector < ObjectiveVectorTraits > ObjectiveVector; + +class PhyloMOEO : public MOEO< ObjectiveVector, double, double > +{ +private: + phylotreeIND *tree; + void copy(const PhyloMOEO &other) + { + if(tree!=NULL)delete tree; + tree=NULL; + if(other.tree!=NULL)tree = new phylotreeIND(other.get_tree()); + if(!other.invalidObjectiveVector())this->objectiveVector( other.objectiveVector() ); + if(!other.invalidFitness())this->fitness( other.fitness() ); + if(!other.invalidDiversity())this->diversity( other.diversity() ); + } + +public: + PhyloMOEO() : MOEO < ObjectiveVector, double, double >() { tree = NULL; } + PhyloMOEO(const PhyloMOEO &other) : MOEO < ObjectiveVector, double, double >(other) { + tree=NULL; + if(other.tree!=NULL)tree = new phylotreeIND(other.get_tree()); + } + void set_tree_template(phylotreeIND &other) { if(tree!=NULL)delete tree; tree = other.clone(); }; + void set_random_tree(phylotreeIND &other) { if(tree!=NULL)delete tree; tree = other.randomClone(); }; + PhyloMOEO& operator= (const PhyloMOEO& other) { + copy(other); + return *this; + } + void readFrom (std::istream &_is) + { + string s; + _is >> s; + tree->read_newick2( s ); + } + phylotreeIND & get_tree() const { return *tree; } + ~PhyloMOEO() { if(tree!=NULL)delete tree; } +}; +#endif + + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEOPartitionStat.h b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEOPartitionStat.h new file mode 100644 index 000000000..0066181fd --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEOPartitionStat.h @@ -0,0 +1,187 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#ifndef PHYLOMOEOPARTITONSTAT_H_ +#define PHYLOMOEOPARTITONSTAT_H_ + +#include +#include +//#include + +struct part_info +{ + int freq; double prob; + double dp, dl, tdp, tdl; + short int type; // 001=intermediate 1, 010=mp 2, 100=ml 4, + // 110= mp & ml 6, 101 = ml & int 5 011 mp & int 3 , 111 tree 7 + bool inverse; + part_info(): freq(0), prob(0.0), dp(0), dl(0), tdp(0), tdl(0), type(0), inverse(false) {}; +}; + +typedef std::map partition_map; + + + class PhyloMOEOPartitionStat : public eoStat + { + public : + + using eoStat::value; + + PhyloMOEOPartitionStat(std::string _description = "Partition Stats") + : eoStat(partition_map(), _description) {} + + virtual void operator()(const eoPop& _pop){ + doit(_pop); // specializations for scalar and std::vector + } + + virtual std::string className(void) const { return "PhyloMOEOPartitionStat"; } + + + private : + + // Specialization for pareto fitness + void doit(const eoPop& _pop) + { + value().clear(); + calculate_frequence_splits( _pop, value() ); + } + + void calculate_frequence_splits( const eoPop &pop, partition_map &split_frequences) + { + bool ismp, isml; + isml = isml = false; + double dtp, dtl, maxdl, maxdp, dp, dl; + int n = pop.size(); + double best_ml_score, best_mp_score; + string split, split_i; + graph::edge_iterator it, it_e; + //print_media_scores(pop, -1, best_l_idx, best_p_idx); + moeoBestObjVecStat best_inds; + best_inds(pop); + best_mp_score = (best_inds.value())[0]; + best_ml_score = (best_inds.value())[1]; + const PhyloMOEO &best_l = best_inds.bestindividuals(1); + const PhyloMOEO &best_p = best_inds.bestindividuals(0); + maxdl = maxdp = 0; + for(int i=0; icompare_topology_3( *best_l); + dtp = sol.get_tree().compare_topology_2( best_p.get_tree()) / (2.0*(sol.get_tree().TREE.number_of_edges() - sol.get_tree().number_of_taxons())); + //sol->compare_topology_3( *best_p); + while(it!=it_e) + { + if( sol.get_tree().is_internal( *it)) + { + split = sol.get_tree().get_split_key( *it); + split_i = sol.get_tree().get_invert_split_key( *it); + if( split_frequences.find(split) != split_frequences.end() ) + { + split_frequences[split].freq++; + split_frequences[split].prob += 1.0/n; + } + else if( split_frequences.find(split_i) != split_frequences.end() ) + { + split_frequences[split_i].freq++; + split_frequences[split_i].prob += 1.0/n; + split = split_i; + } + else + { + split_frequences[split].freq = 1; + split_frequences[split].prob = 1.0/n; + } + if( isml ) + split_frequences[split].type |= 4; // split belongs ml tree + if( ismp ) + split_frequences[split].type |= 2; // split belongs mp tree + if( !(isml || ismp)) split_frequences[split].type |= 1; // split belong intermediate + dp = sol.objectiveVector().operator[](0) - best_mp_score; + dl = sol.objectiveVector().operator[](1) - best_ml_score; + split_frequences[split].dp += dp; + split_frequences[split].dl += dl; + maxdp = (dp > maxdp ? dp : maxdp); + maxdl = (dl > maxdl ? dl : maxdl); + split_frequences[split].tdp += dtp; + split_frequences[split].tdl += dtl; + } + ++it; + } + //sol->invalidate_splits(); + sol.get_tree().remove_split_memory(); + } + + partition_map::iterator it1 = split_frequences.begin(); + partition_map::iterator it2 = split_frequences.end(); + + while(it1!=it2) + { + if(maxdp!=0) (*it1).second.dp /= (1.0*(*it1).second.freq*maxdp); + if(maxdl!=0)(*it1).second.dl /= (1.0*(*it1).second.freq*maxdl); + (*it1).second.tdp /= (1.0*(*it1).second.freq); + (*it1).second.tdl /= (1.0*(*it1).second.freq); + ++it1; + } + } + + + }; + +std::string inverse_split( string split ) + { + string s; + string::iterator it1 = split.begin(); + string::iterator it2 = split.end(); + while(it1!= it2) + { + s += *it1 == '*' ? '.' : '*'; + ++it1; + } + return s; + } + + +std::ostream & operator<<(std::ostream & _os, const partition_map & _map) { + partition_map::const_iterator it1 = _map.begin(); + partition_map::const_iterator it2 = _map.end(); + _os << _map.size() << endl; + _os.setf(ios::fixed); + while(it1!=it2) + { + _os << + ((*it1).second.inverse ? inverse_split((*it1).first) : (*it1).first) + << '\t' << (*it1).second.freq << '\t' << (*it1).second.prob << '\t' + << (*it1).second.dp << '\t' << (*it1).second.dl << '\t' + << (*it1).second.tdp << '\t' << (*it1).second.tdl << '\t' << (*it1).second.type << endl; + ++it1; + } + return _os; +} + +std::istream & operator>>(std::istream & _is, const partition_map & _map) { std::cout << "not implemented\n"; return _is; } +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEOProbMatrixContainerUpdater.h b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEOProbMatrixContainerUpdater.h new file mode 100644 index 000000000..bbca02dd2 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEOProbMatrixContainerUpdater.h @@ -0,0 +1,35 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef PHYLOMOEOPROBMATRIXCONTAINERUPDATER_H_ +#define PHYLOMOEOPROBMATRIXCONTAINERUPDATER_H_ + +#include + +class PhyloMOEOProbMatrixContainerUpdater: public eoUpdater +{ + public: + PhyloMOEOProbMatrixContainerUpdater(ProbMatrixContainer &_pm): pm(_pm) {} + void operator() () { pm.clear(); } + void lastCall() { operator()(); }; + private: + ProbMatrixContainer ± +}; +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_archive.h b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_archive.h new file mode 100644 index 000000000..96c652b09 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_archive.h @@ -0,0 +1,108 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#ifndef PHYLOMOEO_ARCHIVE_H +#define PHYLOMOEO_ARCHIVE_H + +#include + +typedef moeoArchive PhyloMOEOArchive; +typedef PhyloMOEOArchive PhyloMOEOPFArchive; + + + + +class PhyloMOEOParetoSolutionsArchive:public moeoArchive +{ + public: + void save_trees( std::string filename, string title="" ) + { + create_file( filename ); + if(title.size()>0) os << title << endl; + os << size() << std::endl; + for(int i=0; i0) os << title << endl; + for(int i=0; i & _pop) + { + std::copy(_pop.begin(), _pop.end(), back_inserter(*this) ); + } +}; + +class PhyloMOEOFinalSolutionsArchive: public PhyloMOEOParetoSolutionsArchive +{ + public: + void update(const eoPop < PhyloMOEO > & _pop) + { + std::copy(_pop.begin(), _pop.end(), back_inserter(*this) ); + + moeoParetoObjectiveVectorComparator < ObjectiveVector > paretoComparator; + for(int i=0; i< size()-1; i++) + { + //cout << (i+1)/(p.currentSize()*1.0) << "%" << endl; + for(int j=i+1; j< size(); j++) + { + phylotreeIND &sol = operator[](i).get_tree(); + phylotreeIND &sol_2 = operator[](j).get_tree(); + if( sol.compare_topology_2( sol_2) == 0) + { + if( paretoComparator( operator[](i).objectiveVector(), operator[](j).objectiveVector() ) ) + { + operator[](i) = back(); + pop_back(); + i--; j = size(); + } + else + { + operator[](j) = back(); + pop_back(); + j--; + } + } + } + } + } +}; +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_eval.h b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_eval.h new file mode 100644 index 000000000..b80fe30be --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_eval.h @@ -0,0 +1,55 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef PHYLOMOEO_EVAL_H_ +#define PHYLOMOEO_EVAL_H_ +#include +#include +#include + +class PhyloEval : public moeoEvalFunc < PhyloMOEO > +{ +public: + + PhyloEval( ParsimonyCalculator &x, LikelihoodCalculator &y): parseval(x), likeval(y) { } + + void operator () (PhyloMOEO & _sol) + { + if (_sol.invalidObjectiveVector()) + { + + ObjectiveVector objVec; + //if(! (_sol.get_tree().splits_valid() ) ) _sol.get_tree().calculate_splits(); + parseval.set_tree(_sol.get_tree()); + likeval.set_tree(_sol.get_tree()); + //objVec[0] = 1; + //objVec[1] = 1; + objVec[0] = parseval.fitch(); + objVec[1] = -likeval.calculate_likelihood(); + _sol.objectiveVector(objVec); + } + } + +private: + LikelihoodCalculator &likeval; + ParsimonyCalculator &parseval; +}; + +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_init.h b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_init.h new file mode 100644 index 000000000..c404c7cc8 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_init.h @@ -0,0 +1,51 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef PHYLOMOEO_INIT_H_ +#define PHYLOMOEO_INIT_H_ +#include +class Phyloraninit : public eoInit +{ + public: + Phyloraninit(phylotreeIND &templatetree): tree(templatetree) { } + + void operator()(PhyloMOEO &ind) + { + ind.set_random_tree(tree); + } + private: + phylotreeIND &tree; +}; + +class Phylonewickinit : public eoInit +{ + public: + Phylonewickinit(std::string newick): tree_string(newick) { } + + void operator()(PhyloMOEO &ind) + { + phylotreeIND &tree = ind.get_tree(); + tree.read_newick2( tree_string ); + } + private: + std::string tree_string; +}; +#endif + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_operators.h b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_operators.h new file mode 100644 index 000000000..04cdf3e1c --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/PhyloMOEO_operators.h @@ -0,0 +1,61 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#ifndef _PHYLOMOEO_OPERATORS_H_ +#define _PHYLOMOEO_OPERATORS_H_ +#include + + +class Phylocross : public eoQuadOp < PhyloMOEO > + { + public: + + /** + * the class name (used to display statistics) + */ + std::string className() const { return "Phylocross"; } ; + + + bool operator()(PhyloMOEO & _tree1, PhyloMOEO & _tree2) + { + phylotreeIND parent_1(_tree1.get_tree()); + phylotreeIND parent_2(_tree2.get_tree()); + phylotreeIND &son1 = _tree1.get_tree(); + phylotreeIND &son2 = _tree2.get_tree(); + parent_1.export_subtree( son2 ); + parent_2.export_subtree( son1 ); + parent_1.invalidate_splits(); + parent_2.invalidate_splits(); + return true; + } + }; + +class Phylomutate : public eoMonOp +{ + public: + std::string className() const { return "Phylomut"; } + + bool operator() (PhyloMOEO & _tree) + { + _tree.get_tree().mutate(1); + return true; + } + +}; +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/ProbMatrix.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/ProbMatrix.cpp new file mode 100644 index 000000000..13435d6fd --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/ProbMatrix.cpp @@ -0,0 +1,178 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#include "ProbMatrix.h" +#include +#include + +gsl_cheb_series ** ProbMatrix::cs = NULL; + +double f_pij(double x, void *p) +{ + struct params *ij = (struct params *)p; + ij->pmatrix->set_branch_length(x); + return ij->pmatrix->p_ij_t(ij->i,ij->j); +} + + + +ProbMatrix::ProbMatrix(SubstModel *m, double bl) +{ + model = m; + branchlenght = bl; + calculate_derivate=false; + p = new double[4*4]; + p1d = new double[4*4]; + p2d = new double[4*4]; + tmpexp = new double[4*4]; +} + +/* +void ProbMatrix::init() +{ + struct params par; + double tmpbl = branchlenght; + int k=0; + // first call + gsl_function F; + F.function = f_pij; + F.params = ∥ + if(cs == NULL) + { + std::cout << "initializaing polynomials...\n"; + cs = new gsl_cheb_series*[16]; + calculatepmatrix(); + // calculate chevchevy polynomials + for(int i=0; i<4; i++) + for(int j=0;j<4;j++) + { + cs[k] = gsl_cheb_alloc(8); + par.pmatrix = this; + par.i = i; par.j = j; + gsl_cheb_init (cs[k], &F, 0.0, 1.0); + k++; + } + } + //restore the original branch lenght + branchlenght = tmpbl; + calculatepmatrixchev(); +}*/ + + + +void ProbMatrix::init_derivate() +{ + if(calculate_derivate)return; + calculate_p1d_matrix(); + calculate_p2d_matrix(); + calculate_derivate=true; +} + +// set a new branch lenght and recalculate the probability matrix +void ProbMatrix::set_branch_length(double t) +{ + // save time if t = older branch lenght + if ( branchlenght != t) + { + branchlenght = t; + calculatepmatrix(); + calculate_p1d_matrix(); + calculate_p2d_matrix(); + } + calculate_derivate=false; +} + +// calculate the matrix P = evec*diag( e^eigenvalues )*ievec +void ProbMatrix::calculatepmatrix() +{ + double temp; + + double **xevec = model->eigensystem->eigenvectors(); + double *xeval = model->eigensystem->eigenvalues(); + double **xievec = (double **)model->ievec; + + //diag( e^eigenvalues )*ievec + for (int i = 0; i < 4; i++) + { + temp = exp( branchlenght * xeval[i] ); + for (int j = 0; j < 4; j++) + tmpexp[i*4+j] = xievec[i][j]*temp; + } + + // P matrix + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + temp = 0.0; + for (int k = 0; k < 4; k++) + temp += xevec[i][k] * tmpexp[k*4+j]; + p[i*4+j] = fabs(temp); + } + + } +} + +void ProbMatrix::calculatepmatrixchev() +{ + int k=0; + for(int i=0; i<4; i++) + for(int j=0; j<4; j++) + { + p[i*4+j] = gsl_cheb_eval(cs[i*4+j], branchlenght); + k++; + } +} + +// matrix Q*P +void ProbMatrix::calculate_p1d_matrix() +{ + double temp; + double *q=model->rate; + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + temp = 0.0; + for (int k = 0; k < 4; k++) + temp += q[i*4+k] * p[k*4+j]; + p1d[i*4+j] = temp; + } + } +} + +// calculate Q^2*P +void ProbMatrix::calculate_p2d_matrix() +{ + double temp; + double *q=model->rate; + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + temp = 0.0; + for (int k = 0; k < 4; k++) + temp += q[i*4+k]*p1d[i*4+k]; + p2d[i*4+j] = temp; + } + } +} + + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/ProbMatrix.h b/contribution/branches/PhyloMOEA/PhyloMOEA/ProbMatrix.h new file mode 100644 index 000000000..b39488281 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/ProbMatrix.h @@ -0,0 +1,64 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef PROBMATRIX_H +#define PROBMATRIX_H +#include "SubsModel.h" +#include +#include +#include +#include +// store a set of the probability matric according some brach-lenght + +class ProbMatrix +{ + private: + + SubstModel *model; + + double branchlenght; + + double *tmpexp; + + static gsl_cheb_series **cs; + + bool calculate_derivate; + void calculatepmatrix(); + void calculatepmatrixchev(); + void calculate_p1d_matrix(); + void calculate_p2d_matrix(); + + public: + double *p, *p1d, *p2d; + ProbMatrix( SubstModel *m, double bl ); + ~ProbMatrix() { delete [] tmpexp; delete [] p; delete [] p1d; delete [] p2d; } + inline void init() { calculatepmatrix(); } + //void init(); + void init_derivate(); + inline double p_ij_t(int i, int j) { return p[i*4+j]; } + inline double p1d_ij_t(int i, int j) { return p1d[i*4+j]; } + inline double p2d_ij_t(int i, int j) { return p2d[i*4+j]; } + void set_branch_length(double l); +}; + +struct params { ProbMatrix *pmatrix; int i; int j; }; + +#endif + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/RandomNr.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/RandomNr.cpp new file mode 100644 index 000000000..d55c29dd3 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/RandomNr.cpp @@ -0,0 +1,238 @@ +/******************************************************************************* + RandomNr.cpp + + last change: 01/20/1999 + + version: 0.0.0 + + design: Eckart Zitzler + Paul E. Sevinc + + implementation: Paul E. Sevinc + + (c) 1998-1999: Computer Engineering and Networks Laboratory + Swiss Federal Institute of Technology Zurich + + description: + See RandomNr.h +*******************************************************************************/ + +#include "RandomNr.h" + +#include +#include +#include +#include +#include + + +using namespace std; + +static long idum2=123456789; +static long iy=0; +static long iv[NTAB]; +static long idum=0; + + +RandomNr::RandomNr() +{ + z = static_cast< long >( time( 0 ) ); // see + iy = idum = 0; + sran1(z); +} + + +RandomNr::RandomNr( long seed ) + : z( seed ) +{ + sran2(seed); +} + + +void +RandomNr::setSeed( long seed ) +{ + z = seed; + sran2(seed); +} + + +double +RandomNr::uniform01() +{ +// see Reiser, Martin / Wirth, Niklaus. +// - Programming in Oberon: Steps beyond Pascal and Modula. + + const int a = 16807, + m = LONG_MAX, // see - replace by + // numeric_limits< long >::max() + // when is available + q = m / a, + r = m % a; + + long gamma; + + gamma = a * ( z % q ) - r * ( z / q ); + z = ( gamma > 0 ) ? gamma : ( gamma + m ); + return z * ( 1.0 / m ); + +// NOTE: we're in trouble if at some point z becomes -LONG_MAX, 0, or LONG_MAX... +} + + +void +RandomNr::sran1(unsigned int seed) { + int j; + long k; + + idum = seed; + if (idum == 0) idum=1; + if (idum < 0) idum = -idum; + for (j=NTAB+7;j>=0;j--) { + k=(idum)/IQ; + idum=IA*(idum-k*IQ)-IR*k; + if (idum < 0) idum += IM; + if (j < NTAB) iv[j] = idum; + } + iy=iv[0]; +} + +double +RandomNr::ran1() { + int j; + long k; + double temp; + k=(idum)/IQ; + idum=IA*(idum-k*IQ)-IR*k; + if (idum < 0) idum += IM; + j=iy/NDIV; + iy=iv[j]; + iv[j] = idum; + if ((temp=AM*iy) > RNMX) return RNMX; + else return temp; +} + +void +RandomNr::sran2(unsigned int seed) { + int j; + long k; + idum = static_cast(seed); + if (idum == 0) idum=1; + if (idum < 0) idum = -idum; + idum2=(idum); + for (j=NTAB+7;j>=0;j--) { + k=(idum)/IQ1; + idum=IA1*(idum-k*IQ1)-k*IR1; + if (idum < 0) idum += IM1; + if (j < NTAB) iv[j] = idum; + } + iy=iv[0]; +} + +double +RandomNr::ran2() { + int j; + long k; + float temp; + k=(idum)/IQ1; + idum=IA1*(idum-k*IQ1)-k*IR1; + if (idum < 0) idum += IM1; + k=idum2/IQ2; + idum2=IA2*(idum2-k*IQ2)-k*IR2; + if (idum2 < 0) idum2 += IM2; + j=iy/NDIV; + iy=iv[j]-idum2; + iv[j] = idum; + if (iy < 1) iy += IMM1; + if ((temp=AM*iy) > RNMX) { + return RNMX; + } + else{ + return temp; + } +} + + + +bool +RandomNr::flipP( double p ) +{ + return ran2() < p ? true : false; +} + + +unsigned int +RandomNr::uniform0Max( unsigned int max ) // exclusive +{ + return static_cast< unsigned short >( max * ran2() ); +} + + +int +RandomNr::uniformMinMax( int min, // inclusive + int max ) // exclusive +{ + return min >= 0 + ? static_cast< int >( min + ( max - min ) * ran2() ) + : static_cast< int >( min - 1 + ( max - min ) * ran2() ); + +} + +int +RandomNr::gaussMinMax( int min, // inclusive + int max ) // exclusive +{ + double tmp = UnitGaussian(); + if ((max*tmp > max) || ( max*tmp < min)) + { + return max; + } + return min >= 0 + ? static_cast< int >( max * tmp ) + : static_cast< int >( max * tmp ); + +} + + + + +double +RandomNr::doubleuniformMinMax( double min, // inclusive + double max ) // exclusive +{ + return static_cast< double >( min + ( max - min ) * ran2() ); + +} + +double RandomNr::UnitGaussian(){ + static int cached=0; + static double cachevalue; + if(cached == 1){ + cached = 0; + return cachevalue; + } + + double rsquare, factor, var1, var2; + do{ + var1 = 2.0 * ran2() - 1.0; + var2 = 2.0 * ran2() - 1.0; + + rsquare = var1*var1 + var2*var2; + } while(rsquare >= 1.0 || rsquare == 0.0); + + double val = -2.0 * log(rsquare) / rsquare; + if(val > 0.0) factor = sqrt(val); + else factor = 0.0; // should not happen, but might due to roundoff + + cachevalue = var1 * factor; + cached = 1; + + return (var2 * factor); +} + +void RandomNr::resumo(){ + cout << "semilla =" << z << endl; +} + + + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/RandomNr.h b/contribution/branches/PhyloMOEA/PhyloMOEA/RandomNr.h new file mode 100644 index 000000000..0d6406b0a --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/RandomNr.h @@ -0,0 +1,116 @@ +/******************************************************************************* + RandomNr.h + + last change: 01/20/1999 + + version: 0.0.0 + + design: Eckart Zitzler + Paul E. Sevinc + + implementation: Paul E. Sevinc + + (c) 1998-1999: Computer Engineering and Networks Laboratory + Swiss Federal Institute of Technology Zurich + + description: + RandomNr is a helper class for pseudo-random number + generation that doesn't need to be subclassed unless + one wants to replace the algorithm used or add + another method. + + Usually, one only instance per optimization problem + is necessary, and classes or functions that make use + of random numbers keep a reference to that instance. +*******************************************************************************/ + +#ifndef RANDOM_NR_H +#define RANDOM_NR_H + +#include +//#include "TIKEAFExceptions.h" + + +#define IM1 2147483563L +#define IM2 2147483399L +#define AM (1.0/IM1) +#define IMM1 (IM1-1) +#define IA1 40014L +#define IA2 40692L +#define IQ1 53668L +#define IQ2 52774L +#define IR1 12211L +#define IR2 3791 +#define NTAB 32 +#define NDIV (1+IMM1/NTAB) +#define EPS 1.2e-7 +#define RNMX (1.0-EPS) + +#define IA 16807L +#define IM 2147483647L +//#define AM (1.0/IM) +#define IQ 127773L +#define IR 2836L +//#define NTAB 32 +//#define NDIV (1+(IM-1)/NTAB) +//#define EPS 1.2e-7 +//#define RNMX (1.0-EPS) + + +//typedef unsigned int size_t; + +/*static long iy = 0; +static long iv[NTAB]; +static long idum = 0;*/ + + +class RandomNr +{ + protected: + long z; + + public: + // use the current time as seed + RandomNr(); + + // use the argument as seed + RandomNr( long ); + + void setSeed( long ); + virtual ~RandomNr() {}; + // return the next uniformly distributed + // random real in ]0; 1[ + virtual double uniform01(); + + // return the next uniformly distributed + // random integer in [Min; Max[ + int uniformMinMax( int, int ); +// throw ( LimitsException ); + + double doubleuniformMinMax(double, double); + + int gaussMinMax(int, int ); // exclusive + + + double UnitGaussian(); + + // return the next uniformly distributed + // random integer (size_t) in [0; Max[ + unsigned int uniform0Max( unsigned int ); +// throw ( LimitsException ); + + // flip an unfair coin (head is up with + // probability P), return true if head + // is up and false otherwise + bool flipP( double ); + //throw ( ProbabilityException ); + + void sran1(unsigned int seed); + double ran1(); + void sran2(unsigned int seed); + double ran2(); + void resumo(); + long get_seed() { return z; } +}; + +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/Sequences.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/Sequences.cpp new file mode 100644 index 000000000..b0ad5150a --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/Sequences.cpp @@ -0,0 +1,276 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#include +#include +#include +#include +#include "assert.h" +#include + + + + +void Sequences::list_taxons() const { + std::cout << "lista de taxons" << std::endl; + for(int i=0; i< seqnames.size(); i++) + std::cout << seqnames[i] << std::endl; +} + +using namespace std; + +Sequences::Sequences(const char *fn) +{ + numseqs = 0; + seqlen = 0; + seqs = NULL; + + fstream fin; + string tmpseq; + + // ambiguos characters + meaning[0][0] = meaning[0][1] = 1; meaning[0][2] = meaning[0][3] = meaning[0][4] = 0; // M + meaning[1][0] = meaning[1][2] = 1; meaning[1][1] = meaning[1][3] = meaning[1][4] = 0; // R + meaning[2][0] = meaning[2][3] = 1; meaning[2][1] = meaning[2][2] = meaning[2][4] = 0; // W + meaning[3][1] = meaning[3][2] = 1; meaning[3][0] = meaning[3][3] = meaning[3][4] = 0; // S + meaning[4][1] = meaning[4][3] = 1; meaning[4][0] = meaning[4][2] = meaning[4][4] = 0; // Y + meaning[5][2] = meaning[5][3] = 1; meaning[5][0] = meaning[5][1] = meaning[5][4] = 0; // K + + meaning[6][0] = meaning[6][1] = meaning[6][2] = 1 ; meaning[6][3] = meaning[6][4] = 0; // V + meaning[7][0] = meaning[7][1] = meaning[7][3] = 1 ; meaning[7][2] = meaning[7][4] = 0; // H + meaning[8][0] = meaning[8][2] = meaning[8][3] = 1 ; meaning[8][1] = meaning[8][4] = 0; // D + meaning[9][1] = meaning[9][2] = meaning[9][3] = 1 ; meaning[9][0] = meaning[9][4] = 0; // B + meaning[10][1] = meaning[10][2] = meaning[10][3] = meaning[10][0] = 0; meaning[10][4] = 1; // gap + meaning[11][1] = meaning[11][2] = meaning[11][3] = meaning[11][0] = 1; meaning[11][4] = 0; // undefined + // open the file for reading + + try{ + fin.open(fn, ios::in); + if(!fin.is_open()) + { + cout << "\n" << fn << endl; + throw ExceptionManager(10); + } + + // read numseq and seqlen + if(!(fin >> numseqs >> seqlen)) + { + throw ExceptionManager(11); + } + seqs = new unsigned char[numseqs*seqlen]; + seqnames.resize(numseqs); + + for(int i=0; i < numseqs; i++) + { + fin >> seqnames[i]; + fin >> tmpseq; + for(int j=0; j=0 && seqnames[i].substr(0,10)!=s; i--); + return i; +} + + +// return true is the site is informative +bool Sequences::is_informative(unsigned int col) const +{ + unsigned char temp[16]; + int sum = 0; + memset( temp, 0, 16*sizeof(unsigned char)); + for(int j=0; j< numseqs; j++) + { + // check if the site is repeated + unsigned char l = (*this)[j][col]; //SeqData->pattern_pos(i,j); + if( temp[l] == 0) + { + temp[l] = 1; sum++; + } + } + // informative sites have at least two differents chars + return(sum > 1); +} + + +void Sequences::calculate_patterns() +{ + // map pattern and index in the vector pattern + num_patterns=num_inf_sites=0; + + map pattern_map; + struct PatternInfo p; + string s; + for(int i=0; i< seqlen; i++) + { + + s = position(i); + //if(s.find('?',0)== string::npos)cout << i << endl; + // new pattern + if( pattern_map.find(s) == pattern_map.end() ) + { + pattern_map[s] = num_patterns; + p.idx = i; + p.count = 1; + patterns.push_back(p); + // calculate informative site + if( is_informative(i) ) + { + inf_sites.push_back(num_patterns); + num_inf_sites++; + } + num_patterns++; + } + // old pattern + else + patterns[ pattern_map[s] ].count++; + } + //cout << "Padrones reconhecidos:" << num_patterns << endl; + //cout << "Sites informativos :" << num_inf_sites << endl; +} + +void Sequences::calculate_frequences() +{ + long num_undefined = 0; // number of undefined positions + freqs[0] = freqs[1] = freqs[2] = freqs[3] = 0.25; + double tmp[4]; + double divisor; + // i don't know why most program do this + for(int m=0; m<8; m++) + { + tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0; + for(int i=0; i< num_patterns; i++) + for(int j=0; j< numseqs; j++) + { + + unsigned char l = pattern_pos(i,j); + + if ( is_ambiguous(l) ) + { + divisor = 0; + for(int k=0; k<4; k++) + divisor += ambiguos_meaning(l)[k] * freqs[k]; + for(int k=0; k<4; k++) + tmp[k] += + (ambiguos_meaning(l)[k]*freqs[k]/divisor)* + patterns[i].count; + } + else if( is_undefined(l) || is_gap(l) ) + for(int k=0; k<4; k++) tmp[k] += freqs[k]*patterns[i].count; + else + tmp[l] += patterns[i].count; + } + for(int k=0; k<4; k++) freqs[k] = tmp[k] / (tmp[0]+tmp[1]+tmp[2]+tmp[3]); + } + // final frequences + //for(int i=0; i<4; i++){ + // cout << "frequencia de " << i << " -->:" << freqs[i] << endl; + //} + // cout << "suma de frequencias:" << freqs[0] + freqs[1] + freqs[2] + freqs[3] << endl; +} + + +// get the +string Sequences::position(int pos) +{ + string column; + unsigned char c; + char nucleotide; + for(int i=0; i < numseqs; i++) + { + c = seq_pos(i, pos); + switch(c) + { + case 0: nucleotide = 'A'; break; + case 1: nucleotide = 'C'; break; + case 2: nucleotide = 'G'; break; + case 3: nucleotide = 'T'; break; + case 4: nucleotide = 'M'; break; + case 5: nucleotide = 'R'; break; + case 6: nucleotide = 'W'; break; + case 7: nucleotide = 'S'; break; + case 8: nucleotide = 'Y'; break; + case 9: nucleotide = 'K'; break; + case 10: nucleotide = 'V'; break; + case 11: nucleotide = 'H'; break; + case 12: nucleotide = 'D'; break; + case 13: nucleotide = 'B'; break; + case 14: nucleotide = '-'; break; + case 15: nucleotide = '?'; break; + default: assert(1==0); break; + } + column+=nucleotide; + } + return column; +} + +void Sequences::save_seq_data(ostream &of=cout) +{ + of << "\nSequence Datafile Statistics\n"; + of << "----------------------------------------------\n"; + of << "Number of Sequences : " << this->numseqs << endl; + of << "Sequence Length : " << this->seqlen << endl; + of << "Number of Patterns : " << this->num_patterns << endl; + of << "Frequency A : " << this->freqs[0] << endl; + of << "Frequency C : " << this->freqs[1] << endl; + of << "Frequency G : " << this->freqs[2] << endl; + of << "Frequency T : " << this->freqs[3] << endl; +} diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/Sequences.h b/contribution/branches/PhyloMOEA/PhyloMOEA/Sequences.h new file mode 100644 index 000000000..c975c832a --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/Sequences.h @@ -0,0 +1,89 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef SEQUENCES_H +#define SEQUENCES_H +#include +#include +#include +#include +#include + +// class to store the sequences in a file + +struct PatternInfo +{ + + int idx; + int count; +}; + +class Sequences +{ + private: + //enum nucleotides = { A, C, G, T}; + unsigned int num_patterns; + unsigned int num_inf_sites; + unsigned int numseqs; + unsigned int seqlen; + unsigned char *seqs; + std::vector seqnames; + std::vector patterns; + std::vector inf_sites; + unsigned char meaning[12][5]; + double freqs[4]; + bool is_informative(unsigned int col) const; + public: + inline unsigned char* operator[](int n) const { return &(seqs[n*seqlen]); } + Sequences(const char *filename); + ~Sequences() { + delete [] seqs; + } + inline unsigned char seq_pos( int i, int j) const { return (*this)[i][j]; } + inline unsigned char pattern_pos(int i, int j) const + { return seq_pos( j, patterns[i].idx); } + inline unsigned char infsite_pos(int i, int j) const + { return pattern_pos( inf_sites[i], j); } + std::string position(int i); + inline int num_seqs() const { return numseqs; } + inline int seq_len() const { return seqlen; } + inline int pattern_count() const { return num_patterns; } + inline int pattern_count(int i) const { return patterns[i].count; } + inline int infsite_count() const { return num_inf_sites; } + inline int infsite_count(int i) const { return patterns[inf_sites[i]].count; } + inline double *frequences() { return freqs; } + inline double frequence(int i) const { return freqs[i]; } + inline std::string seqname(int i) const { return seqnames.at(i); } + inline bool is_defined(short int l) const { return l<4; } + inline bool is_ambiguous(short int l) const { return (l>3 && l<14); } + inline bool is_gap(short int l) const { return (l == 14); } + inline bool is_undefined(short int l) const { return (l ==15); } + inline std::vector& get_patterns() { return patterns; } + inline unsigned char* ambiguos_meaning(short int l) const { return + (unsigned char*) meaning[l-4]; } + void list_taxons() const; + int search_taxon_name(std::string &s) const; + void calculate_patterns(); + void calculate_frequences(); + void save_seq_data(std::ostream &of); + +}; +#endif + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/SubsModel.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/SubsModel.cpp new file mode 100644 index 000000000..ac66efc27 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/SubsModel.cpp @@ -0,0 +1,214 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#include "SubsModel.h" +//#include + +using namespace std; + +SubstModel::SubstModel(Sequences &p, int m) +{ + model = m; + patterns = &p; + eigensystem = NULL; + patterns->calculate_frequences(); + frequences = patterns->frequences(); + kappa = 4; + //kappa = 32.451; + a=b=c=d=e=1; + ievec = new double*[4]; + for(int i=0; i<4; i++)ievec[i] = new double[4]; +} + +void SubstModel::init() +{ + init_rate_matrix(); + switch(model) + { + case JC69: + jc69();break; + case F81: + f81();break; + case K2P: + k2p();break; + case HKY85: + hky85();break; + case GTR: + gtr();break; + default: + { + model = 1; + f81(); break; + } + } + // calculate eigenvalues and eigenvectors + if(eigensystem!=NULL)delete eigensystem; + eigensystem = new EigenSolver(rate, 4); + eigensystem->solve(); + // calculat inverse eigenvectors + luinverse(eigensystem->eigenvectors(), ievec, NUM_AA); +} + +void SubstModel::init_rate_matrix() +{ + rate[0] = rate[1] = rate[2] = rate[3] = + rate[4] = rate[5] = rate[6] = rate[7] = + rate[8] = rate[9] = rate[10] = rate[11] = + rate[12] = rate[13] = rate[14] = rate[15] = 1.0; +} + + +void SubstModel::construct_rate_matrix() +{ + // multiply the off-diagonal elements by the frequences + mult_frequences(); + // calculate digonal elements + set_diagonal(); + // normalize the rate matrix + normalize(); +} + + +// multiple the off-diagonal elementes by the frequences + +void SubstModel::mult_frequences() +{ + for (int i = 0; i < 4; i++) { + for (int j = i+1; j < 4; j++) { + set_rate(i,j, get_rate(i,j) * frequences[j]); + set_rate(j,i, get_rate(j,i) * frequences[i]); + } + } +} + + +void SubstModel::set_diagonal() +{ +// set the diagonal elements + for(int i=0; i<4; i++) + { + double sum = 0; + for(int j=0; j<4; j++) if(i!=j)sum+=get_rate(i,j); + set_rate(i,i, -sum); + } +} + +// normaliza the rate matrix +void SubstModel::normalize() +{ + double tmp = 0.0; + + for (int i = 0; i < 4; i++) + { + tmp += -get_rate(i,i)*frequences[i]; + } + //std::cout << "Rate matrix:" << std::endl; + //std::cout << "----------------------------------" << std::endl; + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + set_rate(i,j, get_rate(i,j)/tmp); + //std::cout << get_rate(i,j) << " | "; + } + //std::cout << std::endl; + } +} + + +void SubstModel::print_rate_matrix() +{ + std::cout << "Rate matrix:" << std::endl; + std::cout << "----------------------------------" << std::endl; + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + std::cout << get_rate(i,j) << " | "; + } + std::cout << std::endl; + } +} + + +// useful for models that don't complain about nucleotide frequences +void SubstModel::set_equal_frequences() +{ + frequences[0] = frequences[1] = frequences[2] = frequences[3] = 0.25; +} + +// initialize the variables and matrizes + + +void SubstModel::f81() +{ + // set the matrix + // parameter mu = 1.0 + // nothing to do, only construct the matrix + construct_rate_matrix(); +} + + +void SubstModel::jc69() +{ + // set equal frequences + set_equal_frequences(); + construct_rate_matrix(); +} + +void SubstModel::hky85() +{ + //cout << "criando hky85 com kappa:" << kappa << endl; + // transition/traversion ratio + set_rate(0,2, kappa); + set_rate(1,3, kappa); + set_rate(2,0, kappa); + set_rate(3,1, kappa); + // multfrequences + construct_rate_matrix(); + +} + +void SubstModel::k2p() +{ + // set equal frequences + set_equal_frequences(); + // the same of HKY85 except by the equal frequences + hky85(); +} + +void SubstModel::gtr() +{ + + a=1.23767538; b=3.58902963; + c=2.16811705; d=0.73102339; e=6.91039679; + set_rate(0,1, a); + set_rate(1,0, a); + set_rate(0,2, b); + set_rate(2,0, b); + set_rate(0,3, c); + set_rate(3,0, c); + set_rate(1,2, d); + set_rate(2,1, d); + set_rate(1,3, e); + set_rate(3,1, e); + // multfrequences + construct_rate_matrix(); +} diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/SubsModel.h b/contribution/branches/PhyloMOEA/PhyloMOEA/SubsModel.h new file mode 100644 index 000000000..cf5897a35 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/SubsModel.h @@ -0,0 +1,74 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef SUBSMODEL_H +#define SUBSMODEL_H +// substitution model class +// only nucleotides +#include "eigensolver.h" +#include "Patterns.h" +#include + +class SubstModel +{ + private: + + // instantaneus rate matrix 4x4 + double rate[16]; // rate matrix + double **ievec; // inverse eigenvectors + double *frequences; + double a,b,c,d,e; // model parameterss + double kappa; + int model; + Sequences *patterns; + EigenSolver *eigensystem; + // define the substituion model + void construct_rate_matrix(); + void setsubstmodel( int ); + void calculatepmatrix(double); + void mult_frequences(); + void set_diagonal(); + void normalize(); + void set_equal_frequences(); + void init_rate_matrix(); + void f81(); + void k2p(); + void jc69(); + void hky85(); + void gtr(); + inline double get_rate(int i, int j) { return rate[4*i+j]; } + inline void set_rate( int i, int j, double val) { rate[4*i +j] = val; } + public: + enum models { JC69, F81, K2P, HKY85, GTR }; + friend class ProbMatrix; + ~SubstModel() { delete eigensystem; for(int i=0; i<4; i++) delete[] ievec[i]; delete [] ievec; } + SubstModel(Sequences &p, int m = 0); + void init(); + void set_kappa(double t) { kappa = t; init(); } + double get_kappa() { return kappa; } + void print_rate_matrix(); +}; +#endif + + + + + + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/eigensolver.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/eigensolver.cpp new file mode 100644 index 000000000..32ed11ea7 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/eigensolver.cpp @@ -0,0 +1,77 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + + +#include "eigensolver.h" + +EigenSolver::EigenSolver(double *matrix, int dim) +{ + // allocate copy matrix + copymatrix = new double*[dim]; + evec = new double*[dim]; + eval = new double[dim]; + dimension = dim; + for(int i=0; i +#include + +// calculate eigenvalues and eigenvectors of a matrix + +class EigenSolver +{ + private: + double **copymatrix; + double *eval; + double **evec; + int dimension; + public: + EigenSolver( double *matrix, int dim); + EigenSolver( double **matrix, int dim); + ~EigenSolver(); + inline double **eigenvectors() { return evec; } + inline double *eigenvectors(int i) { return evec[ (i>dimension ? 0 : i) ]; } + inline double *eigenvalues() { return eval; } + void solve(); +}; +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/eoCountedFileMonitor.h b/contribution/branches/PhyloMOEA/PhyloMOEA/eoCountedFileMonitor.h new file mode 100644 index 000000000..ae04edc44 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/eoCountedFileMonitor.h @@ -0,0 +1,66 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#ifndef EOCOUNTEDFILEMONITOR_H_ +#define EOCOUNTEDFILEMONITOR_H_ + +#include + +class eoCountedFileMonitor: public eoFileMonitor +{ + public: + eoCountedFileMonitor(unsigned int _frequency=1, std::string _filename="generation_data", std::string _delim="\t", bool _firstcall=false, bool _lastcall=true): + eoFileMonitor(_filename, _delim, false, true), counter(0), frequency(_frequency), delim(_delim), firstcall(_firstcall), lastcall(_lastcall) {} + + virtual std::string className(void) const { return "eoCountedFileMonitor"; } + eoMonitor& operator()(std::ostream& os) + { + if( ! (++counter % frequency) || firstcall ) + { + firstcall = false; + os << counter << delim.c_str(); + iterator it = vec.begin(); + + os << (*it)->getValue(); + + for(++it; it != vec.end(); ++it) + { + os << delim.c_str() << (*it)->getValue(); + } + os << '\n'; + } + //counter++; + return *this; + } + + void lastCall() { + if(lastcall && (counter-- % frequency)) + { + firstcall = true; + ofstream os(getFileName().c_str(), ios_base::app); + this->operator()(os); + } + } + + private: + unsigned int counter, frequency; + std::string delim; + bool lastcall, firstcall; +}; +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/eoSingleFileCountedStateSaver.h b/contribution/branches/PhyloMOEA/PhyloMOEA/eoSingleFileCountedStateSaver.h new file mode 100644 index 000000000..4b4b933ec --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/eoSingleFileCountedStateSaver.h @@ -0,0 +1,79 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef EOSINGLEFILECOUNTEDSTATESAVER_H_ +#define EOSINGLEFILECOUNTEDSTATESAVER_H_ +#include +#include + +class eoSingleFileCountedStateSaver : public eoUpdater +{ + public : + eoSingleFileCountedStateSaver(unsigned _interval, const eoState& _state, std::string _filename, bool _saveOnFirstCall=false, bool _saveOnLastCall=true ) + : interval(_interval), state(_state), filename(_filename), saveOnFirstCall(_saveOnFirstCall), saveOnLastCall(_saveOnLastCall), counter(0) + { + os.open(filename.c_str()); + if (!os){ + std::string str = "eoFileMonitor: Could not open " + filename; + throw std::runtime_error(str); + } + } + + virtual std::string className(void) const { return "eoSingleFileCountedStateSaver"; } + + + void operator()(void) + { + if ( !(++counter % interval) || saveOnFirstCall) + { + doItNow(); + saveOnFirstCall = false; + } + } + + void lastCall(void) + { + if (saveOnLastCall && (counter-- % interval)) + { + saveOnFirstCall = true; + this->operator()(); + } + } + + + private: + + const eoState& state; + unsigned interval; + unsigned counter; + bool saveOnLastCall; + bool saveOnFirstCall; + std::ofstream os; + std::string filename; + + + void doItNow(void) + { + os << "Generation Number :" << counter << std::endl; + state.save(os); + } + + }; +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/likelihoodcalculator.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/likelihoodcalculator.cpp new file mode 100644 index 000000000..5cb035476 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/likelihoodcalculator.cpp @@ -0,0 +1,544 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#include "likelihoodcalculator.h" +#include "treeIterator.h" +#include +#include + + + + +#define BL_MIN 1.e-10 + +//pthread_mutex_t mdone_mutex; + +//extern "C" void *lik_slave(void *); + +void *lik_slave_class(void *ind) +{ + LikelihoodCalculator *t = (LikelihoodCalculator *)ind; + if(t->worker == 0) + { +// pthread_mutex_lock(&mdone_mutex); + t->worker = 1; +// pthread_mutex_unlock(&mdone_mutex); + t->calculate_partials( t->a, &(t->b)); + } + else + { +// pthread_mutex_lock(&mdone_mutex); + t->worker = 0; +// pthread_mutex_unlock(&mdone_mutex); + t->calculate_partials( t->b, &(t->a)); + } + return (NULL); +} + +void *lik_slave(void *ind) +{ + LikelihoodCalculator *t = (LikelihoodCalculator *)ind; + lik_slave_class(t); + return NULL; +} + + +LikelihoodCalculator::LikelihoodCalculator( phylotreeIND &ind, SubstModel &m, ProbMatrixContainer &p, unsigned int nr) +{ + if(nr<1)nrates=1; + else nrates=nr; + alpha = 2.0; + part_memory_internal = part_memory_taxons = NULL; + SeqData = NULL; rates_prob = rates_freq = NULL; prob = NULL; + model = &m; + probmatrixs = &p; + invalid_partials_taxons = true; + set_data( ind.get_patterns() ); + set_tree(ind); + build_rates_sites(); +} + +LikelihoodCalculator::LikelihoodCalculator( phylotreeIND &ind, Sequences &Seqs, SubstModel &m, ProbMatrixContainer &p, unsigned int nr) +{ + part_memory_internal = part_memory_taxons = NULL; + rates_prob = rates_freq = NULL; prob = NULL; + if(nr<1)nrates=1; + else nrates=nr; + alpha = 2.0; + SeqData = NULL; + model = &m; + probmatrixs = &p; + invalid_partials_taxons = true; + set_data( Seqs); + set_tree(ind); + build_rates_sites(); +} + +void LikelihoodCalculator::build_rates_sites() +{ + if(rates_prob!=NULL) delete [] rates_prob; + if(rates_freq!=NULL) delete [] rates_freq; + if(prob!=NULL) delete [] prob; + + rates_prob = new double[nrates]; + prob = new double*[nrates]; // points rate matrixs + rates_freq= new double[nrates]; + + if(nrates==1) + { + rates_prob[0] = rates_freq[0] = 1.0; + return; + + } + +/* + double mean = 0.0; + + for (int i = 0; i < nrates; i++) + { + rates_prob[i] = gsl_cdf_gamma_Pinv((2.0*i+1.0)/(2.0*nrates), alpha, (1.0)/alpha); + cout << "quantile : " << (2.0*i+1.0)/(2.0*nrates) << " " << rates_prob[i] << endl; + mean += rates_prob[i]; + } + + mean = mean/(double) nrates; + + cout << "mean : " << mean << endl; + + for (int i = 0; i < nrates; i++) + { + rates_prob[i] /= mean; + rates_freq[i] = 1.0/(double) nrates; + } +*/ + DiscreteGamma(rates_freq, rates_prob, alpha, alpha, nrates, 0); // 0 = phyml 2.4 + +/* for (int i=0; i> k; + +} + +// change the default data +void LikelihoodCalculator::set_data( const Sequences &seqs) +{ + // first assignment + if( SeqData == NULL ) + { + SeqData = (Sequences *)&seqs; + allocate_partials(); // first assignment, allocate memoryAlleleString + } + else + // if the data changes, invalidate partial taxons + { + if( SeqData != &seqs) + { + SeqData = (Sequences *)&seqs; + deallocate_partials(); + // allocate again + //cout << "allocating partials" << endl; + allocate_partials(); + // invalidate partial taxons + //cout << "warning.... changing patterns..." << endl; + invalid_partials_taxons = true; + } + } +} + + +void LikelihoodCalculator::set_tree( phylotreeIND &ind ) +{ + + tree_ptr = &ind; + long total_pos = SeqData->pattern_count(); + + set_data( tree_ptr->get_patterns() ); + + Partials.init(tree_ptr->TREE); + graph::node_iterator it = tree_ptr->TREE.nodes_begin(); + graph::node_iterator it2 = tree_ptr->TREE.nodes_end(); + + // allocate partial matrix for each pattern and internal node (taxon don't have partials) + + for(int i=0, j=0; it!=it2; it++, j++) + { + Factors[*it] = part_memory_factors + j * total_pos; + if(!tree_ptr->istaxon(*it)) + { + Partials[*it] = part_memory_internal + i * total_pos * nrates* 4; + i++; + } + } +} + +double LikelihoodCalculator::calculate_likelihood_exp(edge focus) +{ + + double lik; + +// pthread_t threads[2]; /* holds thread info */ +// int ids[2]; + + worker = 0; + + + // select an internal node as root + bfocus = focus; + + + a = tree_ptr->istaxon(bfocus.target()) ? bfocus.source() : bfocus.target(); + b = bfocus.opposite(a); + + //cout << "iniciando partials" << endl; + tree_ptr->convert_graph_to_tree(a, NULL); + init_partials(); + + + // traverse the tree and calculate the partials + + /*for(int i=0; i<2; i++) + pthread_create(&threads[i], NULL, lik_slave_class, (void*)this); + for(int i=0; i<2; i++) + pthread_join(threads[i],NULL);*/ + //cout << "calculando partials..." << endl; + calculate_partials( a, &b); + calculate_partials( b, &a); + //cout << "somando..." << endl; + // sum all partials + lik = sum_site_liks(); + return lik; +} + + +// double LikelihoodCalculator::calculate_all_likelihoods() +// { +// oldroot = invalid_node; +// graph::edge_iterator it = tree_ptr->TREE.edges_begin(); +// graph::edge_iterator it_end = tree_ptr->TREE.edges_end(); +// while(it != it_end) +// { +// +// // select a new root +// bfocus = *it; +// a = tree_ptr->istaxon(bfocus.target()) ? bfocus.source() : bfocus.target(); +// b = bfocus.opposite(a); +// +// for(int i=0; i< nrates; i++) +// { +// double len = tree_ptr->get_branch_length(bfocus)*rates_prob[i]; +// len = len < BL_MIN ? BL_MIN : len; +// ProbMatrix &p = (*probmatrixs)[ len]; +// prob[i] = p.p; +// } +// +// +// // make the new tree +// tree_ptr->convert_graph_to_tree(a, NULL); +// +// if( oldroot!=invalid_node) +// { +// update_partials(); //cj( oldroot, a, *it); +// cout << "likelihood ..." << sum_site_liks() <get_branch_length(bfocus); +// len = len < BL_MIN ? BL_MIN : len; +// probmatrixs->change_matrix( len*factor, tree_ptr->get_branch_length(bfocus)*factor); +// } +// +// oldroot = a; +// ++it; +// } +// } + +void LikelihoodCalculator::update_partials() //node *oldroot, node newroot, edge newedge) +{ + + // father of oldroot + node tmp = oldroot; + node father = bfocus.opposite(a); + do + { + recalculate_cj(tmp, father); + if(tmp == a) break; + tmp = tmp.in_edges_begin()->source(); + }while(1); +} + +// recalculate cj when t changes in one of his sons +// only work for trees +void LikelihoodCalculator::recalculate_cj(node n, node father) +{ + int seqlen = tree_ptr->number_of_positions(); + node::out_edges_iterator it; + node::out_edges_iterator it_end; + it = n.out_edges_begin(); + it_end = n.out_edges_end(); + node nodeaux; + + for(int i=0; itarget(); + if(nodeaux !=father)calculate_node_partial( n, nodeaux, *it); + it++; + } +} + + +double LikelihoodCalculator::calculate_likelihood() +{ + + double lik; + +// pthread_t threads[2]; /* holds thread info */ +// int ids[2]; + + worker = 0; + + + // select an internal node as root + bfocus = *(tree_ptr->TREE.edges_begin()); + + + a = tree_ptr->istaxon(bfocus.target()) ? bfocus.source() : bfocus.target(); + b = bfocus.opposite(a); + + + init_partials(); + + // traverse the tree and calculate the partials + + /*for(int i=0; i<2; i++) + pthread_create(&threads[i], NULL, lik_slave_class, (void*)this); + for(int i=0; i<2; i++) + pthread_join(threads[i],NULL);*/ + //cout << "calculando partials..." << endl; + calculate_partials( a, &b); + calculate_partials( b, &a); + //cout << "somando..." << endl; + // sum all partials + lik = sum_site_liks(); + return lik; +} + + +double LikelihoodCalculator::sum_site_liks( ) +{ + int seqlen = tree_ptr->number_of_positions(); + register double lik = 0; + register double factor_correct; + double len; + + for(int i=0; i< nrates; i++) + { + len = tree_ptr->get_branch_length(bfocus)*rates_prob[i]; + len = len < BL_MIN ? BL_MIN : len; + ProbMatrix &p = (*probmatrixs)[ len]; + prob[i] = p.p; + } + + for(int i=0; i < seqlen; i++) + { + factor_correct = Factors[a][i] + Factors[b][i] ; + site_liks[i] = sum_partials(i); + lik += ( log(site_liks[i]) + factor_correct)* SeqData->pattern_count(i); + } + return lik; +} + + + +double LikelihoodCalculator::sum_partials( int pos ) +{ + if(tree_ptr->istaxon( b)) + { + return sum_partials_a_to_taxon( pos ); + } + else return sum_partials_a_to_b_notaxon( pos); +} + + +double LikelihoodCalculator::sum_partials_a_to_taxon( int pos) +{ + double sum = 0; + unsigned char *meaning; + char char_state_b = SeqData->pattern_pos( pos, tree_ptr->taxon_id( b) ); + for(int i=0; iis_defined(char_state_b) ) + + sum += rates_freq[i]*SeqData->frequence(k)* prob[i][k*4+char_state_b]* Partials[a][index+k]; + // ambigous nucleotide + else if (SeqData->is_ambiguous( char_state_b) ) + { + meaning = SeqData->ambiguos_meaning( char_state_b ); + for(int l=0; l < 4; l++) + sum += rates_freq[i]*SeqData->frequence(k)* prob[i][k*4+l]* Partials[a][index+k] * meaning[l]; + //Partials[b][pos*4+l]; + } + // gap or undefined + else + { + for(int l=0; l < 4; l++) + sum += rates_freq[i]*SeqData->frequence(k)* prob[i][k*4+l]* Partials[a][index+k]; + } + } + } + return sum; +} + + +double LikelihoodCalculator::sum_partials_a_to_b_notaxon( int pos) +{ + double sum = 0; + + for(int j=0; jfrequence(k)* prob[j][k*4+l] + * Partials[a][index+k] * Partials[b][index+l]; + } + } + } + return sum; +} + + +// calculate conditional likelihoods + +void LikelihoodCalculator::calculate_node_partial( node father, node son, edge edgeaux) +{ + register double sum; + double corr_factor; + long index; + double len; + + register int seqlen = tree_ptr->number_of_positions(); + for(int k=0; kget_branch_length(edgeaux) * rates_prob[r]; + len = (len < BL_MIN ? BL_MIN : len); + ProbMatrix &p = (*probmatrixs)[ len ]; + + for(int i=0; i < 4; i++) + { + sum = 0; + if(tree_ptr->istaxon( son)) + { + unsigned char l=SeqData->pattern_pos( k, tree_ptr->taxon_id( son)); + if(SeqData->is_defined( l) ) sum = p.p_ij_t( i, l ); + else if(SeqData->is_ambiguous( l)) + { + unsigned char *meaning = SeqData->ambiguos_meaning( l); + for(int j=0; j < 4; j++) + sum +=meaning[j]* p.p_ij_t( i, j ); + //sum +=Partials[son][k*4+j]* p.p_ij_t( i, j ); + } + else sum = 1; + } + else{ + for(int j=0; j < 4; j++) + { + sum +=Partials[son][index+ r*4 +j]* p.p_ij_t( i, j ); + + } + } + Partials[father][index + r*4 +i] *= sum; + corr_factor = ( sum > corr_factor ? sum : corr_factor); + } + } + + if( corr_factor < UMBRAL || corr_factor > (1./LIM_SCALE_VAL)) + { + //cout << "escalado ..." << endl; + for(int r=0; r< nrates; r++) + for(int i=0; i<4; i++) + Partials[father][index + r*4+ i] /= corr_factor; + Factors[father][k] += log(corr_factor); + } + } +} + + +void LikelihoodCalculator::init_partials() +{ + // initialize the values of C for each node + long seqlen = tree_ptr->number_of_positions(); + + int num_taxons = tree_ptr->number_of_taxons(); + int num_internal_nodes = tree_ptr->TREE.number_of_nodes() - num_taxons; + + long size_part_factors = seqlen*(num_taxons +num_internal_nodes); + long size_part_memory_internal = nrates*num_internal_nodes*seqlen*4; + // correction factors + //for(long i=0; ipostorder_begin( n, *antecessor); + + while(*it!=n) + { + calculate_node_partial( it.ancestor(), *it, it.branch()); + ++it; + } +} + + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/likelihoodcalculator.h b/contribution/branches/PhyloMOEA/PhyloMOEA/likelihoodcalculator.h new file mode 100644 index 000000000..429672004 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/likelihoodcalculator.h @@ -0,0 +1,142 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#ifndef LIKELIHOODCALCULATOR_H +#define LIKELIHOODCALCULATOR_H +#include +#include +#include +//#include +#include +/** +@author Waldo Cancino +*/ + + +class LikelihoodCalculator{ +private: + edge bfocus; // branch focus + node a,b; // bfocus vertices (b can be a taxon, a never can be) + double **prob; // point to the probability matrix associated to bfocus + int worker; // for threading, not used really + int nrates; + double *rates_prob; // rate probabilities + double *rates_freq; // rate frequences + double alpha; + node root; // root of the likelihood calculator + node oldroot, invalid_node; // old root and first root when change the focus of the likelihood calculations + + // continous allocated memory to store partial results + double *part_memory_internal, // conditional likelihood of internal nodes + *part_memory_taxons, // conditional likelihood of internal nodes, no longer used + *part_memory_factors; // correction factors for nodes + + double *site_liks; // site likelihoods + + // maps nodes to the continous memory + node_map< double *> Partials; // maps nodes to the corresponding conditional likelihood + node_map Factors; // maps node to the corresponding correct factors + + // external data + phylotreeIND *tree_ptr; // point to the tree + SubstModel *model; // substitution model + ProbMatrixContainer *probmatrixs; // container of probmatrixs + Sequences *SeqData; // sequence data + + + bool invalid_partials_taxons; // when received a new tree, the partials are invalidated + + // init partials memory + void init_partials(); + + // prepare the post-order tree iterator + void calculate_partials(node n, node *); + // calculate conditional likelihood for the node father, from the son conditionals + void calculate_node_partial( node father, node son, edge edgeaux); + + // likelihood sum of partial for the focus branch + double sum_partials( int pos); + double sum_partials_a_to_taxon( int pos ); + double sum_partials_a_to_b_notaxon( int pos); + + // sum the site likelihoods + double sum_site_liks(); + + // allocate partial memory + void allocate_partials() + { + long total_pos = SeqData->pattern_count(); + int ntaxons = SeqData->num_seqs(); + part_memory_internal = new double[ (ntaxons-2) * nrates * total_pos * 4 ]; + part_memory_factors = new double [ (2*ntaxons-2) * total_pos]; + site_liks = new double[total_pos]; + } + + // destroy partial memory + void deallocate_partials() + { + delete [] part_memory_internal; + delete [] part_memory_factors; + delete [] site_liks; + } + + + + // deprecated + double sum_partials2( node a, node b, edge edgeaux, int pos, int which=0); + void build_rates_sites(); + +public: + bool corrected; + LikelihoodCalculator( phylotreeIND &ind, SubstModel &m, ProbMatrixContainer &p, unsigned int nr=1); + LikelihoodCalculator( phylotreeIND &ind, Sequences &Seqs, SubstModel &m, ProbMatrixContainer &p, unsigned int nr=1); + + // main functions, prepare the object to calculate likelihood + double calculate_likelihood(); + double get_site_lik(int i) { return site_liks[i]; } + double calculate_likelihood_exp(edge focus); + double calculate_all_likelihoods(); + void update_partials(); + void recalculate_cj(node n, node father); + void set_alpha(double a) { alpha = a; build_rates_sites(); }; + // change the tree ' + void set_tree( phylotreeIND &ind ); + double rate_probabilities(int i) { return rates_prob[i]; } + int number_rates() { return nrates; } + + + ~LikelihoodCalculator() + { + //cout << "deallocando partialas" << endl; + deallocate_partials(); + //cout << "deallocando o restante" << endl; + if(prob!=NULL) delete[] prob; + if(rates_prob!=NULL) delete [] rates_prob; + if(rates_freq!=NULL) delete [] rates_freq; + } + + // change the dataset + void set_data( const Sequences &seqs); + + friend void* lik_slave_class(void *); + + // frind class to do optimizations + friend class Step_LikOptimizer; +}; +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/likoptimizer.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/likoptimizer.cpp new file mode 100644 index 000000000..25e8b0c79 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/likoptimizer.cpp @@ -0,0 +1,234 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +// implement several optimizers + +#include "likoptimizer.h" + + +// traverse the tree edges and optimize it +void Step_LikOptimizer::optimize() +{ + graph::edge_iterator it, tmp; + + oldroot = invalid_node; + + // calculate conditional likelihood + //cout << "likelihood inicial" << Lik_calc->calculate_likelihood() << endl; + + graph::edge_iterator it_end = tree_ptr->TREE.edges_end(); + + double sum,aux; + do{ + it = tree_ptr->TREE.edges_begin(); + sum=0; + while(it != it_end) + { + + // select a new root + current_edge = *it; + a = tree_ptr->istaxon(current_edge.target()) ? current_edge.source() : current_edge.target(); + b = current_edge.opposite(a); + + Lik_calc->a = a; Lik_calc->b = b; Lik_calc->bfocus = current_edge; + + for(int i=0; iget_branch_length(current_edge) * Lik_calc->rate_probabilities(i); + len = len < BL_MIN ? BL_MIN : len; + p_current[i] = & ((*Lik_calc->probmatrixs)[len]); + } + // make the new tree + tree_ptr->convert_graph_to_tree(a, NULL); + + if( oldroot!=invalid_node) + update_partials(); //cj( oldroot, a, *it); + + // optimize the branch it + aux = tree_ptr->get_branch_length(current_edge); + optimize_branch(); //optimize_branch( root, *it); + + //tolerance + sum += fabs( tree_ptr->get_branch_length(current_edge) - aux ); + oldroot = a; + ++it; + } + cout << '.'; + cout.flush(); + }while(sum>=0.0001); + //cout << "\nlikelihood final" << Lik_calc->calculate_likelihood() << endl; +} + +// recalculate the value of c_j for nodes on the path from newroot to oldroot +void Step_LikOptimizer::update_partials() //node *oldroot, node newroot, edge newedge) +{ + + // father of oldroot + node tmp = oldroot; + node father = current_edge.opposite(a); + do + { + recalculate_cj(tmp, father); + if(tmp == a) break; + tmp = tmp.in_edges_begin()->source(); + }while(1); + //cout << "partials atualizados...." << endl; + //Lik_calc->sum_site_liks(); +} + +// recalculate cj when t changes in one of his sons +// only work for trees +void Step_LikOptimizer::recalculate_cj(node n, node father) +{ + int seqlen = tree_ptr->number_of_positions(); + node::out_edges_iterator it; + node::out_edges_iterator it_end; + it = n.out_edges_begin(); + it_end = n.out_edges_end(); + node nodeaux; + + for(int i=0; itarget(); + if(nodeaux !=father)Lik_calc->calculate_node_partial( n, nodeaux, *it); + it++; + } +} + +void Newton_LikOptimizer::optimize_branch() +{ + //cout << ".....next branch.........." << endl << endl; + double alpha = 0; + //*b = current_edge.source() == *a ? current_edge.target() : current_edge.source(); + double branch_l = tree_ptr->get_branch_length(current_edge); + + // calculate derivates of the probability matrix + for(int i=0; iinit_derivate(); + + calculate_1d2d_loglikelihood(); // calculate the derivates + + //cout << "likelihood calculada ..." << fnext << endl; + + //gsl_cheb_init (lk_aproximated, &F, 0.0, 1.0); + + // newton direction + //dk = (f1d/f2d>=0 && f1d>=0) || ((f1d/f2d<0 && f1d<0) ) + // ? f1d/f2d : f1d; + dk = -f1d/f2d; + + alpha = linear_decreasing(); + + // update branch lenght +// if( alpha!=0 ) +// { +// cout << fnext << " "; +// cout << "antigo branch " << branch_l << " novo branch " << branch_l + alpha*dk << endl; +// } + tree_ptr->set_branch_length(current_edge, + branch_l + alpha*dk); + + for(int i=0; i< nrates; i++) + { + double factor = Lik_calc->rate_probabilities(i); + double len = tree_ptr->get_branch_length(current_edge)*factor; + len = len < BL_MIN ? BL_MIN : len; + probmatrixs->change_matrix( branch_l*factor, len ); + } +} + +double Newton_LikOptimizer::linear_decreasing() +{ + double alpha = 2; + double fini, pnext; + double pini = tree_ptr->get_branch_length(current_edge); + fini = fnext; + //fini = gsl_cheb_eval(lk_aproximated,pini); + int i=0; + + while(1) + { + alpha*=0.5; + // next point + pnext = pini + alpha*dk; + + + if(pnext >= BL_MIN && pnext <=BL_MAX) + { + // modify the probability matrix with the new branch length + + //cout << fnext << " --> " << f1d << " --> " << f2d << "-->" << dk << endl; + + fnext = recalculate_likelihood(pnext); + //cout << pini << " -->" << pnext << " -->" << fnext << endl; + //cin >> i; + } + if(i==20) + { + alpha=0;break; + } + + if(fnext > fini && fpclassify(fnext)!= FP_NAN && fpclassify(fnext)!=FP_INFINITE) + { + //cout << fini << " " << fnext << " " << alpha << " " << pnext << endl; + break; + } + i++; + } + //if(fnextnumber_of_positions(); + + f1d = f2d = 0; + double f_h, f1d_h, f2d_h; + double fnext2 = 0; + fnext = 0; + double factor; + for(int i=0; i < seqlen; i++) + { + factor = exp( (*Factors)[a][i] + (*Factors)[b][i] ); // correction factor + f_h = sum_partials( i); + //f_h = Lik_calc->get_site_lik( i); + f1d_h = sum_partials1d( i) * factor; + f2d_h = sum_partials2d( i) * factor; + // first derivate + f1d += (f1d_h/f_h) * SeqData->pattern_count(i); + // second derivate + f2d += ( (f_h * f2d_h - f1d_h*f1d_h ) / (f_h*f_h) ) * SeqData->pattern_count(i); + // likelihood + fnext += (log(f_h ) + (*Factors)[a][i] + (*Factors)[b][i] )* SeqData->pattern_count(i); + } + //cout << "experimental ..." << " " << fnext << " " << fnext2 << endl; +} + + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/likoptimizer.h b/contribution/branches/PhyloMOEA/PhyloMOEA/likoptimizer.h new file mode 100644 index 000000000..ad8b38b50 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/likoptimizer.h @@ -0,0 +1,121 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef LIK_OPTIMIZER_H +#define LIK_OPTIMIZER_H +#include "likelihoodcalculator.h" +#include + + +// base class for all kind of Likelihood Optimizers + +class Lik_Optimizer +{ + protected: + LikelihoodCalculator *Lik_calc; // point to the calculator + + public: + Lik_Optimizer( LikelihoodCalculator &Lc) { Lik_calc = &Lc; } + virtual ~Lik_Optimizer() { }; + // call ot the optimize function + virtual void optimize() = 0; +}; + +// branch by branch optimizer + +class Step_LikOptimizer:public Lik_Optimizer +{ + protected: + edge current_edge; + node a, b, oldroot; + node invalid_node; + void update_partials(); + void recalculate_cj(node root, node father); + + Sequences *SeqData; + phylotreeIND *tree_ptr; + int nrates; + + ProbMatrixContainer *probmatrixs; // container of probmatrixs + node_map< double *> *Partials, *Factors; //points to partials + + ProbMatrix **p_current; + public: + virtual ~Step_LikOptimizer() { if(p_current!=NULL) delete [] p_current; }; + + Step_LikOptimizer( LikelihoodCalculator &Lc) : Lik_Optimizer(Lc) + { + tree_ptr = Lc.tree_ptr; + probmatrixs = Lc.probmatrixs; + + Partials = &Lc.Partials; + Factors = &Lc.Factors; + SeqData = Lc.SeqData; + nrates = Lc.number_rates(); + p_current = new ProbMatrix*[nrates]; // points to the matrixs for each rate + }; + void optimize(); + virtual void optimize_branch() = 0; + inline double recalculate_likelihood(double tnext) + { + for(int i=0; i< nrates; i++) + { + double len = tnext*Lik_calc->rate_probabilities(i); + //len = len < BL_MIN ? BL_MIN : len; + p_current[i]->set_branch_length(len); + } + return Lik_calc->sum_site_liks(); + } + inline double sum_partials(int pos) + { + for(int i=0; i< nrates; i++) + Lik_calc->prob[i] = (p_current[i])->p; + return Lik_calc->sum_partials(pos); + } + inline double sum_partials1d( int pos ) + { + for(int i=0; i< nrates; i++) + Lik_calc->prob[i] = (p_current[i])->p1d; + return Lik_calc->sum_partials( pos); + } + inline double sum_partials2d( int pos ) + { + for(int i=0; i< nrates; i++) + Lik_calc->prob[i] = (p_current[i])->p2d; + return Lik_calc->sum_partials( pos); + } +}; + + +// Newton Step by Step optimizer: +class Newton_LikOptimizer:public Step_LikOptimizer +{ + private: + double fnext; //likelihoods + double f1d, f2d; // derivates + double dk; // search direction + double linear_decreasing(); + public: + virtual ~Newton_LikOptimizer() {}; + Newton_LikOptimizer(LikelihoodCalculator &Lc): Step_LikOptimizer(Lc) {}; + void calculate_1d2d_loglikelihood(); + void optimize_branch(); +}; +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/matrixutils.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/matrixutils.cpp new file mode 100644 index 000000000..8cb851aed --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/matrixutils.cpp @@ -0,0 +1,907 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + + +#include + +using namespace std; + + +void luinverse(double **inmat, double **imtrx, int size) +{ + double eps = 1.0e-20; /* ! */ + int i, j, k, l, maxi=0, idx, ix, jx; + double sum, tmp, maxb, aw; + int index[NUM_AA]; + double *wk; + double omtrx[NUM_AA][NUM_AA]; + + /* copy inmat to omtrx */ + for (i = 0; i < NUM_AA; i++) + for (j = 0; j < NUM_AA; j++) + omtrx[i][j] = inmat[i][j]; + + wk = new double[size]; //(double *) malloc((unsigned)size * sizeof(double)); + aw = 1.0; + for (i = 0; i < size; i++) + { + maxb = 0.0; + for (j = 0; j < size; j++) + { + if (fabs(omtrx[i][j]) > maxb) + maxb = fabs(omtrx[i][j]); + } + if (maxb == 0.0) + { + /* Singular matrix */ + cout << "Error finding LU decomposition. Singular matrix."; + exit(1); + } + wk[i] = 1.0 / maxb; + } + for (j = 0; j < size; j++) + { + for (i = 0; i < j; i++) + { + sum = omtrx[i][j]; + for (k = 0; k < i; k++) + sum -= omtrx[i][k] * omtrx[k][j]; + omtrx[i][j] = sum; + } + maxb = 0.0; + for (i = j; i < size; i++) + { + sum = omtrx[i][j]; + for (k = 0; k < j; k++) + sum -= omtrx[i][k] * omtrx[k][j]; + omtrx[i][j] = sum; + tmp = wk[i] * fabs(sum); + if (tmp >= maxb) + { + maxb = tmp; + maxi = i; + } + } + if (j != maxi) + { + for (k = 0; k < size; k++) + { + tmp = omtrx[maxi][k]; + omtrx[maxi][k] = omtrx[j][k]; + omtrx[j][k] = tmp; + } + aw = -aw; + wk[maxi] = wk[j]; + } + index[j] = maxi; + if (omtrx[j][j] == 0.0) + omtrx[j][j] = eps; + if (j != size - 1) + { + tmp = 1.0 / omtrx[j][j]; + for (i = j + 1; i < size; i++) + omtrx[i][j] *= tmp; + } + } + for (jx = 0; jx < size; jx++) + { + for (ix = 0; ix < size; ix++) + wk[ix] = 0.0; + wk[jx] = 1.0; + l = -1; + for (i = 0; i < size; i++) + { + idx = index[i]; + sum = wk[idx]; + wk[idx] = wk[i]; + if (l != -1) + { + for (j = l; j < i; j++) + sum -= omtrx[i][j] * wk[j]; + } + else if (sum != 0.0) + l = i; + wk[i] = sum; + } + for (i = size - 1; i >= 0; i--) + { + sum = wk[i]; + for (j = i + 1; j < size; j++) + sum -= omtrx[i][j] * wk[j]; + wk[i] = sum / omtrx[i][i]; + } + for (ix = 0; ix < size; ix++) + imtrx[ix][jx] = wk[ix]; + } + delete [] wk; + //free(wk); + +} /*_ luinverse */ + +void elmhes(double **a, int ordr[], int n) +{ + int m, j, i; + double y, x; + + + for (i = 0; i < n; i++) + ordr[i] = 0; + for (m = 2; m < n; m++) + { + x = 0.0; + i = m; + for (j = m; j <= n; j++) + { + if (fabs(a[j - 1][m - 2]) > fabs(x)) + { + x = a[j - 1][m - 2]; + i = j; + } + } + ordr[m - 1] = i; /* vector */ + if (i != m) + { + for (j = m - 2; j < n; j++) + { + y = a[i - 1][j]; + a[i - 1][j] = a[m - 1][j]; + a[m - 1][j] = y; + } + for (j = 0; j < n; j++) + { + y = a[j][i - 1]; + a[j][i - 1] = a[j][m - 1]; + a[j][m - 1] = y; + } + } + if (x != 0.0) + { + for (i = m; i < n; i++) + { + y = a[i][m - 2]; + if (y != 0.0) + { + y /= x; + a[i][m - 2] = y; + for (j = m - 1; j < n; j++) + a[i][j] -= y * a[m - 1][j]; + for (j = 0; j < n; j++) + a[j][m - 1] += y * a[j][i]; + } + } + } + } +} /*_ elmhes */ + +void eltran(double **a, double **zz, int ordr[NUM_AA], int n) +{ + int i, j, m; + + + for (i = 0; i < n; i++) + { + for (j = i + 1; j < n; j++) + { + zz[i][j] = 0.0; + zz[j][i] = 0.0; + } + zz[i][i] = 1.0; + } + if (n <= 2) + return; + for (m = n - 1; m >= 2; m--) + { + for (i = m; i < n; i++) + zz[i][m - 1] = a[i][m - 2]; + i = ordr[m - 1]; + if (i != m) + { + for (j = m - 1; j < n; j++) + { + zz[m - 1][j] = zz[i - 1][j]; + zz[i - 1][j] = 0.0; + } + zz[i - 1][m - 1] = 1.0; + } + } +} /*_ eltran */ + +void hqr2(int n, int low, int hgh, double **h, + double **zz, double wr[NUM_AA], double wi[NUM_AA]) +{ + int i, j, k, l=0, m, en, na, itn, its; + double p=0, q=0, r=0, s=0, t, w, x=0, y, ra, sa, vi, vr, z=0, norm, tst1, tst2; + int notlas; /* boolean */ + + norm = 0.0; + k = 1; + /* store isolated roots and compute matrix norm */ + for (i = 0; i < n; i++) + { + for (j = k - 1; j < n; j++) + norm += fabs(h[i][j]); + k = i + 1; + if (i + 1 < low || i + 1 > hgh) + { + wr[i] = h[i][i]; + wi[i] = 0.0; + } + } + en = hgh; + t = 0.0; + itn = n * 30; + while (en >= low) + { /* search for next eigenvalues */ + its = 0; + na = en - 1; + while (en >= 1) + { + /* look for single small sub-diagonal element */ + for (l = en; l > low; l--) + { + s = fabs(h[l - 2][l - 2]) + fabs(h[l - 1][l - 1]); + if (s == 0.0) + s = norm; + tst1 = s; + tst2 = tst1 + fabs(h[l - 1][l - 2]); + if (tst2 == tst1) + goto L100; + } + l = low; + L100: + x = h[en - 1][en - 1]; /* form shift */ + if (l == en || l == na) + break; + if (itn == 0) + { + /* all eigenvalues have not converged */ + cout << "Eigenvalues have not converged!\n"; + exit(1); + } + y = h[na - 1][na - 1]; + w = h[en - 1][na - 1] * h[na - 1][en - 1]; + /* form exceptional shift */ + if (its == 10 || its == 20) + { + t += x; + for (i = low - 1; i < en; i++) + h[i][i] -= x; + s = fabs(h[en - 1][na - 1]) + fabs(h[na - 1][en - 3]); + x = 0.75 * s; + y = x; + w = -0.4375 * s * s; + } + its++; + itn--; + /* look for two consecutive small sub-diagonal elements */ + for (m = en - 2; m >= l; m--) + { + z = h[m - 1][m - 1]; + r = x - z; + s = y - z; + p = (r * s - w) / h[m][m - 1] + h[m - 1][m]; + q = h[m][m] - z - r - s; + r = h[m + 1][m]; + s = fabs(p) + fabs(q) + fabs(r); + p /= s; + q /= s; + r /= s; + if (m == l) + break; + tst1 = fabs(p) * + (fabs(h[m - 2][m - 2]) + fabs(z) + fabs(h[m][m])); + tst2 = tst1 + fabs(h[m - 1][m - 2]) * (fabs(q) + fabs(r)); + if (tst2 == tst1) + break; + } + for (i = m + 2; i <= en; i++) + { + h[i - 1][i - 3] = 0.0; + if (i != m + 2) + h[i - 1][i - 4] = 0.0; + } + for (k = m; k <= na; k++) + { + notlas = (k != na); + if (k != m) + { + p = h[k - 1][k - 2]; + q = h[k][k - 2]; + r = 0.0; + if (notlas) + r = h[k + 1][k - 2]; + x = fabs(p) + fabs(q) + fabs(r); + if (x != 0.0) + { + p /= x; + q /= x; + r /= x; + } + } + if (x != 0.0) + { + if (p < 0.0) /* sign */ + s = - sqrt(p * p + q * q + r * r); + else + s = sqrt(p * p + q * q + r * r); + if (k != m) + h[k - 1][k - 2] = -s * x; + else + { + if (l != m) + h[k - 1][k - 2] = -h[k - 1][k - 2]; + } + p += s; + x = p / s; + y = q / s; + z = r / s; + q /= p; + r /= p; + if (!notlas) + { + for (j = k - 1; j < n; j++) + { /* row modification */ + p = h[k - 1][j] + q * h[k][j]; + h[k - 1][j] -= p * x; + h[k][j] -= p * y; + } + j = (en < (k + 3)) ? en : (k + 3); /* min */ + for (i = 0; i < j; i++) + { /* column modification */ + p = x * h[i][k - 1] + y * h[i][k]; + h[i][k - 1] -= p; + h[i][k] -= p * q; + } + /* accumulate transformations */ + for (i = low - 1; i < hgh; i++) + { + p = x * zz[i][k - 1] + y * zz[i][k]; + zz[i][k - 1] -= p; + zz[i][k] -= p * q; + } + } + else + { + for (j = k - 1; j < n; j++) + { /* row modification */ + p = h[k - 1][j] + q * h[k][j] + r * h[k + 1][j]; + h[k - 1][j] -= p * x; + h[k][j] -= p * y; + h[k + 1][j] -= p * z; + } + j = (en < (k + 3)) ? en : (k + 3); /* min */ + for (i = 0; i < j; i++) + { /* column modification */ + p = x * h[i][k - 1] + y * h[i][k] + z * h[i][k + 1]; + h[i][k - 1] -= p; + h[i][k] -= p * q; + h[i][k + 1] -= p * r; + } + /* accumulate transformations */ + for (i = low - 1; i < hgh; i++) + { + p = x * zz[i][k - 1] + y * zz[i][k] + + z * zz[i][k + 1]; + zz[i][k - 1] -= p; + zz[i][k] -= p * q; + zz[i][k + 1] -= p * r; + } + } + } + } /* for k */ + } /* while infinite loop */ + if (l == en) + { /* one root found */ + h[en - 1][en - 1] = x + t; + wr[en - 1] = h[en - 1][en - 1]; + wi[en - 1] = 0.0; + en = na; + continue; + } + y = h[na - 1][na - 1]; + w = h[en - 1][na - 1] * h[na - 1][en - 1]; + p = (y - x) / 2.0; + q = p * p + w; + z = sqrt(fabs(q)); + h[en - 1][en - 1] = x + t; + x = h[en - 1][en - 1]; + h[na - 1][na - 1] = y + t; + if (q >= 0.0) + { /* real pair */ + if (p < 0.0) /* sign */ + z = p - fabs(z); + else + z = p + fabs(z); + wr[na - 1] = x + z; + wr[en - 1] = wr[na - 1]; + if (z != 0.0) + wr[en - 1] = x - w / z; + wi[na - 1] = 0.0; + wi[en - 1] = 0.0; + x = h[en - 1][na - 1]; + s = fabs(x) + fabs(z); + p = x / s; + q = z / s; + r = sqrt(p * p + q * q); + p /= r; + q /= r; + for (j = na - 1; j < n; j++) + { /* row modification */ + z = h[na - 1][j]; + h[na - 1][j] = q * z + p * h[en - 1][j]; + h[en - 1][j] = q * h[en - 1][j] - p * z; + } + for (i = 0; i < en; i++) + { /* column modification */ + z = h[i][na - 1]; + h[i][na - 1] = q * z + p * h[i][en - 1]; + h[i][en - 1] = q * h[i][en - 1] - p * z; + } + /* accumulate transformations */ + for (i = low - 1; i < hgh; i++) + { + z = zz[i][na - 1]; + zz[i][na - 1] = q * z + p * zz[i][en - 1]; + zz[i][en - 1] = q * zz[i][en - 1] - p * z; + } + } + else + { /* complex pair */ + wr[na - 1] = x + p; + wr[en - 1] = x + p; + wi[na - 1] = z; + wi[en - 1] = -z; + } + en -= 2; + } /* while en >= low */ + /* backsubstitute to find vectors of upper triangular form */ + if (norm != 0.0) + { + for (en = n; en >= 1; en--) + { + p = wr[en - 1]; + q = wi[en - 1]; + na = en - 1; + if (q == 0.0) + {/* real vector */ + m = en; + h[en - 1][en - 1] = 1.0; + if (na != 0) + { + for (i = en - 2; i >= 0; i--) + { + w = h[i][i] - p; + r = 0.0; + for (j = m - 1; j < en; j++) + r += h[i][j] * h[j][en - 1]; + if (wi[i] < 0.0) + { + z = w; + s = r; + } + else + { + m = i + 1; + if (wi[i] == 0.0) + { + t = w; + if (t == 0.0) + { + tst1 = norm; + t = tst1; + do + { + t = 0.01 * t; + tst2 = norm + t; + } + while (tst2 > tst1); + } + h[i][en - 1] = -(r / t); + } + else + { /* solve real equations */ + x = h[i][i + 1]; + y = h[i + 1][i]; + q = (wr[i] - p) * (wr[i] - p) + wi[i] * wi[i]; + t = (x * s - z * r) / q; + h[i][en - 1] = t; + if (fabs(x) > fabs(z)) + h[i + 1][en - 1] = (-r - w * t) / x; + else + h[i + 1][en - 1] = (-s - y * t) / z; + } + /* overflow control */ + t = fabs(h[i][en - 1]); + if (t != 0.0) + { + tst1 = t; + tst2 = tst1 + 1.0 / tst1; + if (tst2 <= tst1) + { + for (j = i; j < en; j++) + h[j][en - 1] /= t; + } + } + } + } + } + } + else if (q > 0.0) + { + m = na; + if (fabs(h[en - 1][na - 1]) > fabs(h[na - 1][en - 1])) + { + h[na - 1][na - 1] = q / h[en - 1][na - 1]; + h[na - 1][en - 1] = (p - h[en - 1][en - 1]) / + h[en - 1][na - 1]; + } + else + mcdiv(0.0, -h[na - 1][en - 1], h[na - 1][na - 1] - p, q, + &h[na - 1][na - 1], &h[na - 1][en - 1]); + h[en - 1][na - 1] = 0.0; + h[en - 1][en - 1] = 1.0; + if (en != 2) + { + for (i = en - 3; i >= 0; i--) + { + w = h[i][i] - p; + ra = 0.0; + sa = 0.0; + for (j = m - 1; j < en; j++) + { + ra += h[i][j] * h[j][na - 1]; + sa += h[i][j] * h[j][en - 1]; + } + if (wi[i] < 0.0) + { + z = w; + r = ra; + s = sa; + } + else + { + m = i + 1; + if (wi[i] == 0.0) + mcdiv(-ra, -sa, w, q, &h[i][na - 1], + &h[i][en - 1]); + else + { /* solve complex equations */ + x = h[i][i + 1]; + y = h[i + 1][i]; + vr = (wr[i] - p) * (wr[i] - p); + vr = vr + wi[i] * wi[i] - q * q; + vi = (wr[i] - p) * 2.0 * q; + if (vr == 0.0 && vi == 0.0) + { + tst1 = norm * (fabs(w) + fabs(q) + fabs(x) + + fabs(y) + fabs(z)); + vr = tst1; + do + { + vr = 0.01 * vr; + tst2 = tst1 + vr; + } + while (tst2 > tst1); + } + mcdiv(x * r - z * ra + q * sa, + x * s - z * sa - q * ra, vr, vi, + &h[i][na - 1], &h[i][en - 1]); + if (fabs(x) > fabs(z) + fabs(q)) + { + h[i + 1] + [na - 1] = (q * h[i][en - 1] - + w * h[i][na - 1] - ra) / x; + h[i + 1][en - 1] = (-sa - w * h[i][en - 1] - + q * h[i][na - 1]) / x; + } + else + mcdiv(-r - y * h[i][na - 1], + -s - y * h[i][en - 1], z, q, + &h[i + 1][na - 1], &h[i + 1][en - 1]); + } + /* overflow control */ + t = (fabs(h[i][na - 1]) > fabs(h[i][en - 1])) ? + fabs(h[i][na - 1]) : fabs(h[i][en - 1]); + if (t != 0.0) + { + tst1 = t; + tst2 = tst1 + 1.0 / tst1; + if (tst2 <= tst1) + { + for (j = i; j < en; j++) + { + h[j][na - 1] /= t; + h[j][en - 1] /= t; + } + } + } + } + } + } + } + } + /* end back substitution. vectors of isolated roots */ + for (i = 0; i < n; i++) + { + if (i + 1 < low || i + 1 > hgh) + { + for (j = i; j < n; j++) + zz[i][j] = h[i][j]; + } + } + /* multiply by transformation matrix to give vectors of + * original full matrix. */ + for (j = n - 1; j >= low - 1; j--) + { + m = ((j + 1) < hgh) ? (j + 1) : hgh; /* min */ + for (i = low - 1; i < hgh; i++) + { + z = 0.0; + for (k = low - 1; k < m; k++) + z += zz[i][k] * h[k][j]; + zz[i][j] = z; + } + } + } + return; + +} /*_ hqr2 */ + +void mcdiv(double ar, double ai, double br, double bi, double *cr, double *ci) +{ + double s, ars, ais, brs, bis; + + s = fabs(br) + fabs(bi); + ars = ar / s; + ais = ai / s; + brs = br / s; + bis = bi / s; + s = brs * brs + bis * bis; + *cr = (ars * brs + ais * bis) / s; + *ci = (ais * brs - ars * bis) / s; +} + + +double gammln(double xx) +{ + double x,tmp,ser; + static double cof[6]={76.18009173,-86.50532033,24.01409822, + -1.231739516,0.120858003e-2,-0.536382e-5}; + int j; + + x=xx-1.0; + tmp=x+5.5; + tmp -= (x+0.5)*log(tmp); + ser=1.0; + for (j=0;j<=5;j++) { + x += 1.0; + ser += cof[j]/x; + } + return -tmp+log(2.50662827465*ser); +} + + +double LnGamma (double alpha) +{ +/* returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places. + Stirling's formula is used for the central polynomial part of the procedure. + Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function. + Communications of the Association for Computing Machinery, 9:684 +*/ + double x=alpha, f=0, z; + + if (x<7) { + f=1; z=x-1; + while (++z<7) f*=z; + x=z; f=-log(f); + } + z = 1/(x*x); + return f + (x-0.5)*log(x) - x + .918938533204673 + + (((-.000595238095238*z+.000793650793651)*z-.002777777777778)*z + +.083333333333333)/x; +} + +/*********************************************************/ + +double IncompleteGamma (double x, double alpha, double ln_gamma_alpha) +{ +/* returns the incomplete gamma ratio I(x,alpha) where x is the upper + limit of the integration and alpha is the shape parameter. + returns (-1) if in error + ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant. + (1) series expansion if (alpha>x || x<=1) + (2) continued fraction otherwise + RATNEST FORTRAN by + Bhattacharjee GP (1970) The incomplete gamma integral. Applied Statistics, + 19: 285-287 (AS32) +*/ + int i; + double p=alpha, g=ln_gamma_alpha; + double accurate=1e-8, overflow=1e30; + double factor, gin=0, rn=0, a=0,b=0,an=0,dif=0, term=0, pn[6]; + + if (x==0) return (0); + if (x<0 || p<=0) return (-1); + + factor=exp(p*log(x)-x-g); + if (x>1 && x>=p) goto l30; + /* (1) series expansion */ + gin=1; term=1; rn=p; + l20: + rn++; + term*=x/rn; gin+=term; + + if (term > accurate) goto l20; + gin*=factor/p; + goto l50; + l30: + /* (2) continued fraction */ + a=1-p; b=a+x+1; term=0; + pn[0]=1; pn[1]=x; pn[2]=x+1; pn[3]=x*b; + gin=pn[2]/pn[3]; + l32: + a++; b+=2; term++; an=a*term; + for (i=0; i<2; i++) pn[i+4]=b*pn[i+2]-an*pn[i]; + if (pn[5] == 0) goto l35; + rn=pn[4]/pn[5]; dif=fabs(gin-rn); + if (dif>accurate) goto l34; + if (dif<=accurate*rn) goto l42; + l34: + gin=rn; + l35: + for (i=0; i<4; i++) pn[i]=pn[i+2]; + if (fabs(pn[4]) < overflow) goto l32; + for (i=0; i<4; i++) pn[i]/=overflow; + goto l32; + l42: + gin=1-factor*gin; + + l50: + return (gin); +} + + + +double PointNormal (double prob) +{ +/* returns z so that Prob{x.999998 || v<=0) return (-1); + + g = LnGamma (v/2); + xx=v/2; c=xx-1; + if (v >= -1.24*log(p)) goto l1; + + ch=pow((p*xx*exp(g+xx*aa)), 1/xx); + if (ch-e<0) return (ch); + goto l4; +l1: + if (v>.32) goto l3; + ch=0.4; a=log(1-p); +l2: + q=ch; p1=1+ch*(4.67+ch); p2=ch*(6.73+ch*(6.66+ch)); + t=-0.5+(4.67+2*ch)/p1 - (6.73+ch*(13.32+3*ch))/p2; + ch-=(1-exp(a+g+.5*ch+c*aa)*p2/p1)/t; + if (fabs(q/ch-1)-.01 <= 0) goto l4; + else goto l2; + +l3: + x=PointNormal (p); + p1=0.222222/v; ch=v*pow((x*sqrt(p1)+1-p1), 3.0); + if (ch>2.2*v+6) ch=-2*(log(1-p)-c*log(.5*ch)+g); +l4: + q=ch; p1=.5*ch; + if ((t=IncompleteGamma (p1, xx, g))<0) { + printf ("\nerr IncompleteGamma"); + return (-1); + } + p2=p-t; + t=p2*exp(xx*aa+g+p1-c*log(ch)); + b=t/ch; a=0.5*t-b*c; + + s1=(210+a*(140+a*(105+a*(84+a*(70+60*a))))) / 420; + s2=(420+a*(735+a*(966+a*(1141+1278*a))))/2520; + s3=(210+a*(462+a*(707+932*a)))/2520; + s4=(252+a*(672+1182*a)+c*(294+a*(889+1740*a)))/5040; + s5=(84+264*a+c*(175+606*a))/2520; + s6=(120+c*(346+127*c))/5040; + ch+=t*(1+0.5*t*s1-b*c*(s1-b*(s2-b*(s3-b*(s4-b*(s5-b*s6)))))); + if (fabs(q/ch-1) > e) goto l4; + + return (ch); +} + +/*********************************************************/ + +/*********************************************************/ + +int DiscreteGamma (double *freqK, double *rK, + double alfa, double beta, int K, int median) +{ +/* discretization of gamma distribution with equal proportions in each + category +*/ + int i; + double gap05=1.0/(2.0*K), t, factor=alfa/beta*K, lnga1; + + //printf("Discrete gamma called with median: %d \n", median); + + if(K==1) + { + rK[0] = 1.0; + return 0; + } + + if (median) { + for (i=0; i +#include +#include +#include +#define PointGamma(prob,alpha,beta) PointChi2(prob,2.0*(alpha))/(2.0*(beta)) + +void mcdiv(double ar, double ai, double br, double bi, double *cr, double *ci); +void luinverse(double **inmat, double **imtrx, int size); +void elmhes(double **a, int ordr[], int n); +void eltran(double **a, double **zz, int ordr[NUM_AA], int n); +void hqr2(int n, int low, int hgh, double **h, double **zz, double wr[NUM_AA], double wi[NUM_AA]); +//void hqr21(int n, int low, int hgh, double **h, double **zz, double wr[NUM_AA], double wi [NUM_AA]); +void mcdiv(double ar, double ai, double br, double bi, double *cr, double *ci); + +double gammln(double xx); +double LnGamma (double alpha); +double IncompleteGamma (double x, double alpha, double ln_gamma_alpha); +double PointNormal (double prob); +double PointChi2 (double prob, double v); +int DiscreteGamma (double *freqK, double *rK, double alfa, double beta, int K, int median); +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/moeoObjVecStat.h b/contribution/branches/PhyloMOEA/PhyloMOEA/moeoObjVecStat.h new file mode 100644 index 000000000..08f4d29d9 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/moeoObjVecStat.h @@ -0,0 +1,148 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef MOEOOBJVECSTAT_H_ +#define MOEOOBJVECSTAT_H_ + +#include + +#if defined(_MSC_VER) && (_MSC_VER < 1300) +template +class moeoObjVecStat : public eoStat +#else +template +class moeoObjVecStat : public eoStat +#endif +{ + public: + using eoStat::value; + + typedef typename MOEOT::ObjectiveVector ObjectiveVector; + typedef typename MOEOT::ObjectiveVector::Traits Traits; + + moeoObjVecStat(std::string _description = "") + : eoStat(ObjectiveVector(), _description) + {} + + virtual void operator()(const eoPop& _pop) { doit(_pop); }; + private: + virtual void doit(const eoPop &_pop) = 0; +}; + +template +class moeoBestObjVecStat : public moeoObjVecStat +{ +public: + + using moeoObjVecStat::value; + typedef typename moeoObjVecStat::ObjectiveVector ObjectiveVector; + + + moeoBestObjVecStat(std::string _description = "Best ") + : moeoObjVecStat(_description) + {} + + + virtual std::string className(void) const { return "moeoBestObjVecStat"; } + + const MOEOT & bestindividuals(unsigned int objective) { return *(best_individuals[objective]); } + +private : + + std::vector::const_iterator> best_individuals; + + struct CmpObjVec + { + CmpObjVec(unsigned _which, bool _maxim) : which(_which), maxim(_maxim) {} + + bool operator()(const MOEOT& a, const MOEOT& b) + { + if (maxim) + return a.objectiveVector()[which] < b.objectiveVector()[which]; + + return a.objectiveVector()[which] > b.objectiveVector()[which]; + } + + unsigned which; + bool maxim; + }; + + // Specialization for objective vector + void doit(const eoPop& _pop) + { + typedef typename moeoObjVecStat::Traits traits; + + value().resize(traits::nObjectives()); + + for (unsigned o = 0; o < traits::nObjectives(); ++o) + { + typename eoPop::const_iterator it = std::max_element(_pop.begin(), _pop.end(), CmpObjVec(o, traits::maximizing(o))); + value()[o] = it->objectiveVector()[o]; + best_individuals.push_back( it ); + } + } + // default +}; + +template class moeoAverageObjVecStat : public moeoObjVecStat +{ +public : + + using moeoObjVecStat::value; + typedef typename moeoObjVecStat::ObjectiveVector ObjectiveVector; + + + moeoAverageObjVecStat(std::string _description = "Average Objective Vector") + : moeoObjVecStat(_description) {} + + + virtual std::string className(void) const { return "moeoAverageObjVecStat"; } + +private : + + typedef typename MOEOT::ObjectiveVector::Type Type; + + template struct sumObjVec + { + sumObjVec(unsigned _which) : which(_which) {} + + Type operator()(Type &result, const MOEOT& _obj) + { + return result + _obj.objectiveVector()[which]; + } + + unsigned which; + }; + + // Specialization for pareto fitness + void doit(const eoPop& _pop) + { + typedef typename moeoObjVecStat::Traits traits; + value().resize(traits::nObjectives()); + + for (unsigned o = 0; o < value().size(); ++o) + { + value()[o] = std::accumulate(_pop.begin(), _pop.end(), Type(), sumObjVec(o)); + value()[o] /= _pop.size(); + } + } +}; + +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/moeoPtrComparator.h b/contribution/branches/PhyloMOEA/PhyloMOEA/moeoPtrComparator.h new file mode 100644 index 000000000..c8436e5fe --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/moeoPtrComparator.h @@ -0,0 +1,41 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef MOEOPTRCOMPARATOR_H_ +#define MOEOPTRCOMPARATOR_H_ + +#include +/** + * Functor allowing to compare two solutions.referenced by pointers + */ +template < class MOEOT > +class moeoPtrComparator : public eoBF < const MOEOT *, const MOEOT *, const bool > + { + public: + moeoPtrComparator( moeoComparator & _cmp) : cmp(_cmp) {} + const bool operator() (const MOEOT *ptr1, const MOEOT *ptr2) + { + return cmp(*ptr1, *ptr2); + } + private: + moeoComparator &cmp; + }; + +#endif /*MOEOPTRCOMPARATOR_H_*/ diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/parsimonycalculator.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/parsimonycalculator.cpp new file mode 100644 index 000000000..bc591c087 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/parsimonycalculator.cpp @@ -0,0 +1,332 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#include "parsimonycalculator.h" +#include "treeIterator.h" +#include + +ParsimonyCalculator::ParsimonyCalculator(phylotreeIND &t) +{ + set_internal_memory_allocate = set_taxon_memory_allocate = NULL; + char_internal_memory_allocate = char_taxon_memory_allocate = NULL; + set_memory_allocate=NULL; + char_memory_allocate=NULL; + parsimony = 0; + tree_ptr = &t; + invalid_set_taxons = true; + SeqData = (Sequences*)&(t.get_patterns()); + set_tree( t); +} + + +ParsimonyCalculator::~ParsimonyCalculator() +{ + delete [] set_internal_memory_allocate; + delete [] set_taxon_memory_allocate; + delete [] char_internal_memory_allocate; + delete [] char_taxon_memory_allocate; + delete [] set_memory_allocate; + delete [] char_memory_allocate; +} + + + +// change the tree for calculate parsimony +void ParsimonyCalculator::set_tree(phylotreeIND &t) +{ + + int ntaxons = t.number_of_taxons(); + + + if( set_internal_memory_allocate != NULL ) + { + Sequences *new_data = (Sequences *)&(t.get_patterns()); + + if( SeqData!=new_data) + { + SeqData = new_data; + delete [] set_internal_memory_allocate; + delete [] set_taxon_memory_allocate; + delete [] char_internal_memory_allocate; + delete [] char_taxon_memory_allocate; + //cout << "warning.... changing patterns..." << endl; + invalid_set_taxons = true; + set_internal_memory_allocate = new unsigned char[ (2*ntaxons-2) * SeqData->infsite_count() * 5 ]; + set_taxon_memory_allocate = new unsigned char[ ntaxons*SeqData->infsite_count()*5]; + char_internal_memory_allocate = new unsigned char[ (2*ntaxons-2) * SeqData->infsite_count()]; + char_taxon_memory_allocate = new unsigned char[ ntaxons * SeqData->infsite_count()]; + } + + } + else // first assignment, allocate memory + { + set_internal_memory_allocate = new unsigned char[ (2*ntaxons-2) * SeqData->infsite_count() * 5 ]; + set_taxon_memory_allocate = new unsigned char[ ntaxons*SeqData->infsite_count()*5]; + char_internal_memory_allocate = new unsigned char[ (2*ntaxons-2) * SeqData->infsite_count()]; + char_taxon_memory_allocate = new unsigned char[ ntaxons * SeqData->infsite_count()]; + } + + tree_ptr = &t; + + set_internal.init(tree_ptr->TREE); + char_internal.init(tree_ptr->TREE); + + graph::node_iterator it = tree_ptr->TREE.nodes_begin(); + graph::node_iterator it2 = tree_ptr->TREE.nodes_end(); + + // initialize internal sets + for(int i=0 ; it!=it2; it++) + { + if(tree_ptr->istaxon(*it)) + { + set_internal[*it] = set_taxon_memory_allocate + tree_ptr->taxon_id(*it) * SeqData->infsite_count() * 5; + char_internal[*it] = char_taxon_memory_allocate + tree_ptr->taxon_id(*it) * SeqData->infsite_count(); + } + else + { + set_internal[*it] = set_internal_memory_allocate + i * SeqData->infsite_count() * 5; + char_internal[*it] = char_internal_memory_allocate + i * SeqData->infsite_count(); + i++; + } + } +} + + +void ParsimonyCalculator::init_sets_chars() +{ + int total_nodes = tree_ptr->TREE.number_of_nodes(); + int ntaxons = tree_ptr->number_of_taxons(); + int num_inf_sites = SeqData->infsite_count(); + unsigned char *set_taxon, *char_taxon, l; + + // init the internal sets + memset( set_internal_memory_allocate, 1, (2*ntaxons-2)* SeqData->infsite_count() * 5); + if(!invalid_set_taxons)return; + //cout << "warning... inicializando taxons parcimonia..." << endl; + // init internal set and character for taxons + memset( set_taxon_memory_allocate, 0, ntaxons*SeqData->infsite_count()*5*sizeof(unsigned char)); + + + for(int k=0; kinfsite_pos(j, k); + // '?' states may be any state + if ( SeqData->is_ambiguous(l) || SeqData->is_gap(l) || SeqData->is_undefined(l) ) + { + unsigned char *meaning = SeqData->ambiguos_meaning(l); + for(int m=0; m<5; m++)set_taxon[j*5+m] = meaning[m]; + } + else set_taxon[j*5+l] = 1; + char_taxon[j] = l; + } + } + invalid_set_taxons = false; +} + +// initialize set taxon and characters of taxon +void ParsimonyCalculator::init_set_char_taxon(node n) +{ + int num_inf_sites = SeqData->infsite_count(); + unsigned char l; + // set for taxaon are set to 0 + memset( set_internal[n], 0, num_inf_sites*5*sizeof(unsigned char)); + // informative sites + int taxon_id = tree_ptr->taxon_id(n); + for(int j=0; j< num_inf_sites; j++) + { + l = SeqData->infsite_pos(j, taxon_id); + //pattern_pos(*it, taxon_id); + // '?' states may be any state + if ( SeqData->is_ambiguous(l) || SeqData->is_gap(l) || SeqData->is_undefined(l) ) + { + unsigned char *meaning = SeqData->ambiguos_meaning(l); + for(int k=0; k<5; k++)set_internal[n][j*5+k] = meaning[k]; + } + else set_internal[n][j*5+l] = 1; + char_internal[n][j] = l; + } +} + + +// calculate the informative sites in the patterns; +void ParsimonyCalculator::save_informative(char *fn) +{ + //std::vector &vec_patterns = patterns->get_patterns(); + //const Sequences &patterns = tree_ptr->get_patterns(); + char nucleotide; + string sequence; + ofstream salida(fn, ios::out); + + for(int j=0; j< tree_ptr->number_of_taxons(); j++) + { + sequence.clear(); + for(int i=0 ; i < SeqData->infsite_count(); i++) + { + int l = SeqData->infsite_pos(i,j); + switch(l) + { + case 0: nucleotide = 'A'; break; + case 1: nucleotide = 'C'; break; + case 2: nucleotide = 'G'; break; + case 3: nucleotide = 'T'; break; + } + for(int k=0; k< SeqData->infsite_count(i);k++) + sequence += nucleotide; + } + salida.setf(ios::left); + salida.width(20); + salida << SeqData->seqname(j) << "\t" << sequence << '\n'; + } + salida.close(); +} + + +// calculate the intersection of two sets returning the number of +// intersected elements; +int ParsimonyCalculator::set_intersection( unsigned char *a, unsigned char *b, unsigned char *result) +{ + int sum = 0; + for(int i=0; i<5; i++) + sum += (result[i] = a[i] && b[i]); + return sum; +} + + +// calculate the union of two sets returning the number of +void ParsimonyCalculator::set_union( unsigned char *a, unsigned char *b, unsigned char *result) +{ + for(int i=0; i<5; i++) + result[i] = a[i] || b[i]; + +} + + + +// calculate the parsimony between two sets +int ParsimonyCalculator::set_parsimony( unsigned char *a, unsigned char *b, unsigned char *result) +{ + int intersected = set_intersection(a, b, result); + if(intersected == 0) + { + // no common characters, increase parsimony + set_union(a,b,result); + return 1; + } + return 0; // intersection, parsimony value remains equal +} + + +// calculate a parsimony between the sets of the father and the children +// for all relevant sites +int ParsimonyCalculator::node_parsimony( node a, node b, unsigned char *result) +{ + // calculate parsimony for taxon child, just union + int sum_parsy = 0; + int num_inf_sites = SeqData->infsite_count(); + for(int j=0; j< num_inf_sites; j++) + sum_parsy += set_parsimony( &set_internal[a][j*5], &set_internal[b][j*5], &result[j*5]) * SeqData->infsite_count(j); + return sum_parsy; +} + + +// first stage of fitch algorithm +void ParsimonyCalculator::fitch_post_order(node n, node *antecessor) +{ + postorder_Iterator it = tree_ptr->postorder_begin( n, *antecessor); + //it.begin(); + while(*it!=n) + { + unsigned char tmpresult[ SeqData->infsite_count()*5]; + parsimony += node_parsimony( it.ancestor(), *it , tmpresult); + // copy the results to the node father and continue calculating for another nodes + memcpy( set_internal[it.ancestor()], tmpresult, SeqData->infsite_count()*5*sizeof(unsigned char) ); + ++it; + } +} + + +// sequence assignment from ancestor sequence +void ParsimonyCalculator::seq_assignment(node n, node ancestor) +{ + int num_inf_sites = SeqData->infsite_count(); + unsigned char parent_char; + for(int i=0; i< num_inf_sites; i++) + { + parent_char = char_internal[ancestor][i]; + if( set_internal[n][i*5+parent_char] ) char_internal[n][i] = parent_char; + else + { + // get the first character in set + int j = 0; + while(!set_internal[n][i*5+j]) j++; + char_internal[n][i] = j; + } + } +} + + +// phase II of Fitch algorithms: pre-order (internal node sequence assignment) +void ParsimonyCalculator::fitch_pre_order(node n, node *antecessor) +{ + node nodeaux; + // ignore the taxons + + if(tree_ptr->istaxon(n)) return; + else seq_assignment( n, *antecessor); + node::inout_edges_iterator it; + node::inout_edges_iterator it_end; + it = n.inout_edges_begin(); + it_end = n.inout_edges_end(); + while( it != it_end ) + { + if(antecessor==NULL || ( it->source()!=*antecessor && it->target()!=*antecessor) ) + { + nodeaux = it->source() == n ? it->target() : it->source(); + fitch_pre_order( nodeaux, &n); + } + it++; + } +} + +long int ParsimonyCalculator::fitch() +{ + node root_aux; + + + root_aux = tree_ptr->taxon_number(0); + + edge edgeaux = *(root_aux.in_edges_begin()); + node nodeaux = edgeaux.source(); + + parsimony = 0; + init_sets_chars(); + + // if is an unrooted tree, assign an taxa as root to calculate + // parismony (see PAUP) + unsigned char tmp[ SeqData->infsite_count() * 5 ]; + fitch_post_order(nodeaux, &root_aux); + parsimony += node_parsimony( root_aux, nodeaux, tmp ); + //fitch_pre_order(nodeaux, &root_aux); + return parsimony; + //cout << "parsimonia total:" << parsimony; +} diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/parsimonycalculator.h b/contribution/branches/PhyloMOEA/PhyloMOEA/parsimonycalculator.h new file mode 100644 index 000000000..78a021a7b --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/parsimonycalculator.h @@ -0,0 +1,61 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#ifndef PARSIMONYCALCULATOR_H +#define PARSIMONYCALCULATOR_H + +#include + +/** +@author Waldo Cancino +*/ +class ParsimonyCalculator +{ + private: + phylotreeIND *tree_ptr; // point to a tree; + unsigned char *set_memory_allocate; + unsigned char *char_memory_allocate; + unsigned char *set_internal_memory_allocate; // sequences for sets in an internal node + unsigned char *set_taxon_memory_allocate; // sequences for sets in an taxon nodes + unsigned char *char_internal_memory_allocate; // assignment chars for internal nodes + unsigned char *char_taxon_memory_allocate; // assignment chars for taxon nodes + bool invalid_set_taxons; + long int parsimony; + node_map set_internal; // internal set for fitch phase I + node_map char_internal; // internal characters for fitch phase II + Sequences *SeqData; // maintain the current pattern + int set_intersection( unsigned char *a, unsigned char *b, unsigned char *result); + void set_union( unsigned char *a, unsigned char *b, unsigned char *result); + int set_parsimony( unsigned char *a, unsigned char *b, unsigned char *result); + int node_parsimony( node a, node b, unsigned char *result); + void fitch_post_order(node n, node *antecessor); + void seq_assignment(node n, node ancestor); + void fitch_pre_order(node n, node *antecessor); + void init_set_char_taxon(node n); + void init_sets_chars(); + + public: + void set_tree(phylotreeIND &t); + long int fitch(); + void save_informative(char *fn); + ParsimonyCalculator(phylotreeIND &t); + ~ParsimonyCalculator(); + +}; +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.cpp new file mode 100644 index 000000000..f4022d05f --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.cpp @@ -0,0 +1,2161 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + + +struct temp_info +{ + int left, right, num_nodes; + node left_most; + temp_info() {}; + temp_info(int n) : left(n), right(-1), num_nodes(0) {}; +}; + + +// return the element at position pos of the list +template const T& select_edge_at_pos( const list &l, int pos) +{ + if(pos >= l.size()) + return l.back(); + typename list::const_iterator it =l.begin(); + for( int i=0 ; i < pos; i++)it++; + return *it; +} + +template const int& select_edge_at_pos( const list &, int ); + + +double phylotreeIND::randomgsl_alpha = 500; + +// ---------------- constructors + +// constructor + +phylotreeIND::~phylotreeIND() +{ + graph::edge_iterator it = TREE.edges_begin(); + graph::edge_iterator it_e = TREE.edges_end(); + if(split_bits!=NULL)delete [] split_bits; +} + +phylotreeIND::phylotreeIND( RandomNr *g, Sequences &p, gsl_rng *gsr) : + random_gsl(gsr), randomNr(g) +{ + seqpatterns = &p; + root = NULL; + valid_splits = false; + split_bits = NULL; + + // init maps + + MAPNODETAXON.init(TREE); + + nnode = p.num_seqs(); + MAPTAXONNODE.resize(nnode); + MAPIDEDGE.resize( 2*nnode - 3); + NJDISTANCES.resize( 2*nnode - 3); + //split_bits = new bool [ (2*nnode-3) * (2*nnode-2)]; + // crete the taxons + for(int i=0; i < nnode; i++) + { + new_taxon(i); + } +} + +// copy constructor +phylotreeIND::phylotreeIND( const phylotreeIND &org) : random_gsl( org.random_gsl), randomNr(org.randomNr) +{ + // copy the data of IND class + split_bits = NULL; + copy(org); +} + + +// copy an individual +void phylotreeIND::copy( const phylotreeIND &C ) +{ + root = C.root; + parsimony = C.parsimony; + valid_splits = false; + // likelihood = C.likelihood; + nnode = C.nnode; + seqpatterns = C.seqpatterns; + // map nodes and exes + list cnodes = C.TREE.all_nodes(); + list cedges = C.TREE.all_edges(); + if(split_bits != NULL) delete [] split_bits; + split_bits = new bool [ (2*nnode-3) * (2*nnode-2)]; + + TREE.clear(); + + MAPNODETAXON.init(TREE); + MAPIDEDGE.resize( 2*nnode -3); + NJDISTANCES.resize( 2*nnode -3 ); + MAPTAXONNODE.resize( nnode ); + + construct_graph( C, cnodes, cedges); + // if is a tree constructed, calculate the splits + if( nnode < TREE.number_of_nodes() ) + this->calculate_splits(); +} + + + +// clone functions + +// copy the entire individual +phylotreeIND* phylotreeIND::clone() const +{ + + phylotreeIND* p=new phylotreeIND(*this); + return p; +} + + +// copy only the taxons and perform an stepwise addition +phylotreeIND* phylotreeIND::randomClone() const +{ + // clone the individual (only taxons) + phylotreeIND* p=new phylotreeIND( *this); + p->stepwise_addition(); + // set the branch lenghts + graph::edge_iterator it = p->TREE.edges_begin(); + graph::edge_iterator it_e = p->TREE.edges_end(); + + // init the edge lenghts + while( it != it_e) + { + p->set_branch_length(*it, randomNr->doubleuniformMinMax(0, 0.05)); + ++it; + } + return p; +} + +// change the data set + +void phylotreeIND::set_data( Sequences &s) +{ + if ( s.num_seqs() != seqpatterns->num_seqs() ) + { + cout << "error --> incompatible data change\n"; + exit(1); + } + else if(seqpatterns!=&s) + { + //cout << "data change on individual...\n"; + seqpatterns = &s; + } +} + +// create an internal node +node phylotreeIND::new_internal_node() +{ + node n = TREE.new_node(); + //assert( n.id() < 2*nnode-2); + MAPNODETAXON[n] = -1; + return n; +} +// create a node that maps the sequence id +node phylotreeIND::new_taxon( int id) +{ + node n = new_internal_node(); + //node n = TREE.new_node(); + //assert( n.id() < 2*nnode-2); + MAPNODETAXON[n] = id; + MAPTAXONNODE[id] = n; + return n; +} + +edge phylotreeIND::new_branch( node source, node dest) +{ + edge edgeaux = TREE.new_edge(source, dest); + MAPIDEDGE[edgeaux.id()] = edgeaux; + return edgeaux; +} + +edge phylotreeIND::new_branch( node source, node dest, double bl) +{ + edge edgeaux = new_branch(source, dest); + set_branch_length(edgeaux, bl); + return edgeaux; +} + +void phylotreeIND::remove_branch( edge edgeaux) +{ + MAPIDEDGE[edgeaux.id()] = invalid_edge; + TREE.del_edge(edgeaux); +} + +// select edge and a pos +edge phylotreeIND::select_edge() const +{ + int j; + edge edgeaux; + do + { + j =randomNr->uniform0Max(MAPIDEDGE.size()); + edgeaux = MAPIDEDGE[j]; + }while (edgeaux == invalid_edge); + return edgeaux; +} + +// select an edge off-side subtree defined by source_edge +edge phylotreeIND::select_edge_outsidetree( edge source_edge) const +{ + return choose_edge_fromside( source_edge.id() * TREE.number_of_nodes(), false); +} + + +// select and edge from a side of a split +edge phylotreeIND::choose_edge_fromside( int idx, bool side ) const +{ + edge edgeaux; + do + { + edgeaux = select_edge(); + assert( edgeaux.source().id() < 2*nnode-2); + assert( edgeaux.target().id() < 2*nnode-2); + } + while( split_bits[ idx + edgeaux.source().id()] !=side && + split_bits[ idx + edgeaux.target().id()] != side); + return edgeaux; +} + + +// genetic operators +// change subtrees to form childs +void phylotreeIND::crossover(float pcross, const phylotreeIND& dad, phylotreeIND*& sis, phylotreeIND*& bro) const +{ + // prevent discard qualifiers erros (because const function) + phylotreeIND &mom = (phylotreeIND &)(*this); + phylotreeIND &dady = (phylotreeIND &)dad; + sis = dady.clone(); + bro = mom.clone(); + phylotreeIND &sister = (phylotreeIND &)(*sis); + phylotreeIND &brother = (phylotreeIND &)(*bro); + if( randomNr->flipP(pcross) ) + { + // swap subtreesf + mom.export_subtree( sister ); + dady.export_subtree( brother); + sister.invalidate_splits(); + brother.invalidate_splits(); + } +} + +void phylotreeIND::mutate(float pmut) +{ + double type = randomNr->ran2(); + //double type = 0.5; + if( !splits_valid() ) calculate_splits(); + if( randomNr->flipP(pmut) ) + { + // both kinds of mutatio if type>0.67 + // topological mutation + if( type < 0.34 || type > 0.67 ) + { + //TBR(); + //change_subtrees(); + NNI(); + //SPR(); + invalidate_splits(); + + // branch mutation + } + if( type > 0.34 ) + { + // no topological change + mutate_branch_lenght( pmut); + //this->invalidateCaches(); + } + } +} + + +void phylotreeIND::mutate_branch_lenght(float pmut) +{ + double gamma; + + graph::edge_iterator it = TREE.edges_begin(); + graph::edge_iterator it_e = TREE.edges_end(); + while(it!= it_e) + { + if (randomNr->flipP(0.5)) + { + gamma = gsl_ran_gamma (random_gsl, randomgsl_alpha, (1.0/randomgsl_alpha) ); + if( get_branch_length(*it) * gamma != 0 ) + set_branch_length( *it, get_branch_length(*it) * gamma); + } + ++it; + } +} + + +// select an internal edge randomly +edge phylotreeIND::choose_internal_edge() const +{ + edge edgeaux; + do + edgeaux = select_edge(); + while( istaxon(edgeaux.source()) || istaxon(edgeaux.target()) ); + return edgeaux; +} + + +// create a graph of n nodes whithout edges +void phylotreeIND::init() +{ + +} + + + + + +// implements the crossover_operator of GAML (Lewis, 1998) +// place the result in TREE2 +void phylotreeIND::crossover_gaml(phylotreeIND &TREE2) +{ + graph::edge_iterator edgeaux = TREE.edges_end(); // point to subtree in parent 1 + edgeaux--;edgeaux--; + node root1, parent1; // point to the root of the subtree in parent1 + list subtree_edges; + list subtree_nodes; + // select the root of the tree to be pruned (the root is market with 1 in the split + // corresponding to edgeaux + root1 = split(*edgeaux, edgeaux->source()) ? edgeaux->source() : edgeaux->target(); + // select the root of the subtree pruned + parent1 = root1 == edgeaux->source() ? edgeaux->target() : edgeaux->source(); + // save the distance of edgeaux + double dist = get_branch_length(*edgeaux); + + obtain_subtree( root1, &parent1, &subtree_edges, &subtree_nodes); + + // eliminate the taxons in subtree from TREE2 + list::iterator it= subtree_nodes.begin(); + list::iterator it_e= subtree_nodes.end(); + while( it != it_e ) + { + if( istaxon(*it) ) TREE2.remove_taxon( MAPNODETAXON[*it] ); + it++; + } + + + // point to the last node in TREE2 + graph::node_iterator root2 = TREE2.TREE.nodes_end(); root2--; + + // construct the subtree1 in the TREE2 + TREE2.construct_graph( *this, subtree_nodes, subtree_edges); + // connect the subtree created in an edge of TREE2; + graph::edge_iterator edgeaux2 = TREE2.TREE.edges_begin(); + // the root of the new subtree in TREE2 is root2++ given the root2 was + // inserted at end of the list of nodes + TREE2.insert_node_in_edge( *(++root2) , *edgeaux2, dist); +} + + + +// copy a subtree to another individual, preventing the duplicated nodes +// when applied two times is the same that interchange subtrees +// note: dest tree MUST have the same number of taxas (related to the same problem) +void phylotreeIND::export_subtree( phylotreeIND &dest) +{ + + // DEBUG INFO + graph::edge_iterator itdebug = TREE.edges_begin(); + + // subtree nodes and edges + list subtree_nodes; + list subtree_edges; + // points to the root of the subtree and the parent of the subtree + node root_subtree, parent_subtree; + edge source_edge, dest_edge; + // select the subtree to be exported + source_edge = select_edge(); + //select_edge_at_pos( TREE.all_edges(), j); + // save the distance of the edge comming to the root_subtree + double dist = get_branch_length(source_edge); + + root_subtree = split(source_edge,source_edge.source()) ? source_edge.source() : source_edge.target(); + // points to the parent of the subtree in order to do the percurse + + // DEBUG INFO + //cout << "raiz da arvore" << root_subtree << endl; + + parent_subtree = source_edge.opposite(root_subtree); + // obtain the subtree + obtain_subtree( root_subtree, &parent_subtree, &subtree_edges, &subtree_nodes); + // eliminate duplicate taxons in dest + list::iterator it = subtree_nodes.begin(); + list::iterator it_e = subtree_nodes.end(); + + while(it != it_e) + { + if( istaxon(*it) ) dest.remove_taxon( MAPNODETAXON[ *it] ); + ++it; + } + + // DEBUG INFO + //cout << "arvore destino podada" << endl; + //dest.printtree(); + + // select the destination edge in dest + //dest_edge = select_edge_at_pos( dest.TREE.all_edges(), j); + dest_edge = dest.select_edge(); + + // help to find the root of the subtree regrafted + + // DEBUG INFO + //cout << "eixo regrafting" << dest_edge << endl; + + graph::node_iterator dest_root_subtree = --dest.TREE.nodes_end(); + // copy the subtree in dest + dest.construct_graph( *this, subtree_nodes, subtree_edges); + // now, as dest_parent_node point the last node in dest before insertion, + // then dest_parent_node++ point to the root of the subtree recently inserted + // Only remain insert it to the dest_edge in order to connect the tree + dest.insert_node_in_edge( *(++dest_root_subtree), dest_edge, dist); + + //DEBUG INFO + itdebug = dest.TREE.edges_begin(); +} + +// parsigal crossover work in rooted an unrooted trees +// note: crossover_gaml and parsigal +void phylotreeIND::crossover_parsigal( phylotreeIND & TREE2,phylotreeIND & SON1, phylotreeIND & SON2) +{ + list subtree1_nodes, subtree2_nodes; + list subtree1_edges, subtree2_edges; + graph::edge_iterator crossoverpoint1, crossoverpoint2, edgeaux1, edgeaux2; + node root1, root2; + node parent1, parent2; + + edgeaux1 = TREE.edges_begin(); + edgeaux1++;edgeaux1++;edgeaux1++; + edgeaux2 = TREE2.TREE.edges_end(); + edgeaux2--;edgeaux2--; + + SON1.copy(*this); + SON2.copy(TREE2); + + + // works from both rooted and unrooted trees + root1 = split(*edgeaux1,edgeaux1->source()) ? edgeaux1->source() : edgeaux1->target(); + root2 = TREE2.split(*edgeaux2,edgeaux2->source()) ? edgeaux2->source() : edgeaux2->target(); + + // its imposible to swap the entire tree + if(root!=NULL && (root1 == *root || root2 == *TREE2.root)) return; + + // again works from both rooted and unrooted + parent1 = root1==edgeaux1->source() ? edgeaux1->target() : edgeaux1->source(); + parent2 = root2==edgeaux2->source() ? edgeaux2->target() : edgeaux2->source(); + + + // point to the position to graft the subtrees + + obtain_subtree( root1, &parent1, &subtree1_edges, &subtree1_nodes); + TREE2.obtain_subtree( root2, &parent2, &subtree2_edges, &subtree2_nodes); + + // eliminate taxons from the subtree1 in SON2 and subtree2 in SON1 + + list::iterator it = subtree1_nodes.begin(); + list::iterator it_e = subtree1_nodes.end(); + while(it != it_e) + { + if( istaxon(*it) ) SON2.remove_taxon( MAPNODETAXON[ *it] ); + ++it; + } + it = subtree2_nodes.begin(); + it_e = subtree2_nodes.end(); + while(it != it_e) + { + if( TREE2.istaxon(*it) ) SON1.remove_taxon( TREE2.MAPNODETAXON[ *it] ); + ++it; + } + std::copy( SON1.TREE.edges_begin() , SON1.TREE.edges_end(), ostream_iterator( cout, " , " )); + cout << endl; + std::copy( SON2.TREE.edges_begin() , SON2.TREE.edges_end(), std::ostream_iterator( cout, " , " )); + cout << endl; + + // select the crossover sites in son1 and son2 + + crossoverpoint1 = SON1.TREE.edges_begin(); + crossoverpoint2 = SON2.TREE.edges_begin(); + + // select the root of the subtree to be conected + graph::node_iterator connect_1 = SON1.TREE.nodes_end(); connect_1--; + graph::node_iterator connect_2 = SON2.TREE.nodes_end(); connect_2--; + + SON1.construct_graph( TREE2, subtree2_nodes, subtree2_edges); + SON2.construct_graph( *this, subtree1_nodes, subtree1_edges); + + // conect to the parent 1 + SON1.insert_node_in_edge( *(++connect_1), *crossoverpoint1); + SON2.insert_node_in_edge( *(++connect_2), *crossoverpoint2); + + for(int i=0; i < SON1.nnode; i++) cout << i << "--> " << SON1.MAPTAXONNODE[i] << endl; + for(int i=0; i < SON2.nnode; i++) cout << i << "--> " << SON2.MAPTAXONNODE[i] << endl; + +} + + + + +void phylotreeIND::remove_taxon( int id) +{ + node delnode = MAPTAXONNODE[id]; + // verify remaining two nodes in tree + if (TREE.number_of_nodes() < 3) TREE.del_node(delnode); + else + { + edge edgeaux = *delnode.in_edges_begin(); + node parent = edgeaux.source(); + remove_branch(edgeaux); + TREE.del_node(delnode); + // added by me + collapse_node(parent); + } +} + + + +// this function gerate the list of edges that are separated by the +// edge edgeaux +void phylotreeIND::separategraphs(edge edgeaux, list &graph1, list &graph2) +{ + // map to the visited edges + node nodeaux = edgeaux.target(); + obtain_subtree(edgeaux.source(), &nodeaux, &graph1, NULL); + nodeaux = edgeaux.source(); + obtain_subtree(edgeaux.target(), &nodeaux, &graph2, NULL); +} + + +void phylotreeIND::calculate_splits_exp() +{ + // init the split memory + edge edgeaux = choose_internal_edge(); + allocate_split_memory(); + memset( split_bits, false, (2*nnode-3)*(2*nnode-2)*sizeof(bool)); + //cout << "memoria alocada......" << endl; + postorder_Iterator it = postorder_begin( edgeaux.source()); + + // calculate the splits + while(*it!=edgeaux.source()) + { + int idx_begin1 = it.branch().id()*TREE.number_of_nodes(); + split_bits[ idx_begin1 + (*it).id() ] = true; + int ones = 0; + if( !istaxon(*it) ) + { + child_Iterator son = child_begin( *it, it.ancestor() ); + child_Iterator son_end = child_end( *it, it.ancestor() ); + + while( son != son_end) + { + int idx_begin2 = son.branch().id()*TREE.number_of_nodes(); + for(int i=0; i< TREE.number_of_nodes(); i++) + { + split_bits[idx_begin1 + i] = split_bits[idx_begin1 + i] || split_bits[idx_begin2 + i]; + if( split_bits[idx_begin2 + i] ) ones++; + } + // the son splits is not used anymore + if (ones >= TREE.number_of_nodes()/2)invert_split( son.branch() ); + ++son; + } + } + ++it; + } + valid_splits = true; + //print_splits(); +} + +void phylotreeIND::calculate_splits() +{ + allocate_split_memory(); + memset( split_bits, false, (2*nnode-3)*(2*nnode-2)*sizeof(bool)); + //fill(split_bits.begin(), split_bits.end(), false); + if( root!=NULL) + { + int i; + cin >> i; + node::out_edges_iterator it = root->out_edges_begin(); + node::out_edges_iterator it_e = root->out_edges_end(); + while ( it != it_e ) + { + calculate_splits_from_edge2( *it, it->target()); + it++; + } + } + else + { + graph::edge_iterator edgeaux_it = TREE.edges_begin(); + graph::edge_iterator edgeaux_it_e = TREE.edges_end(); + edge edgeaux; + // select an internal edge + while( edgeaux_it!= edgeaux_it_e ) + { + if( ! istaxon( edgeaux_it->target() ) ) break; + edgeaux_it++; + } + // else select any edge + if(edgeaux_it==TREE.edges_end())edgeaux_it=TREE.edges_begin(); + + edgeaux = *edgeaux_it; + + calculate_splits_from_edge2( edgeaux, edgeaux.source()); + // clean the splits + memset( split_bits + edgeaux.id()*TREE.number_of_nodes(), false, + TREE.number_of_nodes()); + calculate_splits_from_edge2( edgeaux, edgeaux.target()); + + int ones = 0; + int nn = TREE.number_of_nodes(); + int idx_begin = edgeaux.id() * nn; + for( int i=0; i < nn; i++) + if( split_bits[ idx_begin + i] )ones++; + if (ones >= TREE.number_of_nodes()/2) + invert_split( edgeaux ); + // display splits + } + valid_splits = true; +} + + + +void phylotreeIND::calculate_splits_from_edge2(edge edgeaux, node from) +{ + node::inout_edges_iterator it = from.inout_edges_begin(); + node::inout_edges_iterator it_e = from.inout_edges_end(); + //cout << "chamando --> " << edgeaux << "," << from << endl; + //SPLITS[edgeaux].clear(); + //SPLITS[edgeaux].init(TREE,0); + int idx_begin1 = edgeaux.id()*TREE.number_of_nodes(); + // clean the split + //memset( &split_bits[idx_begin1], 0, TREE.number_of_nodes()); + if( edgeaux.id() >= 2*nnode-3 || from.id() >= 2*nnode-2) + { + cout << "edge_id:" << edgeaux.id() << " node_id:" << from.id() << endl; + assert( edgeaux.id() < 2*nnode-3 && from.id() < 2*nnode-2); + } + split_bits[ idx_begin1 + from.id() ] = true; + //SPLITS[edgeaux][from] = 1; + if( istaxon(from) ) return; + + + while( it != it_e ) + { + if( *it != edgeaux ) + { + node target = it->source() == from ? it->target() : it->source(); + calculate_splits_from_edge2(*it, target); + + int n = TREE.number_of_nodes(); + int ones = 0; + int idx_begin2 = it->id()*TREE.number_of_nodes(); + for(int i=0; i= TREE.number_of_nodes()/2) + invert_split( *it ); + } + it++; + } + // invert the split in the case that the graph is unrooted +} + + +void phylotreeIND::print_splits() const +{ + graph::edge_iterator it = TREE.edges_begin(); + graph::edge_iterator it_e = TREE.edges_end(); + while( it!= it_e) + { + cout << get_split_key( *it) << endl; + //print_split(*it); + ++it; + } +} + +void phylotreeIND::print_split(edge edgeaux) const +{ + graph::node_iterator it = TREE.nodes_begin(); + graph::node_iterator it_e = TREE.nodes_end(); + cout << "eixo " << edgeaux << "divide" << endl; + while( it != it_e ) + { + if( !split(edgeaux,*it) ) cout << *it << " - "; + else cout << " *" << *it << " - "; + it++; + } + cout << endl; +} + + +// invert the zeros and ones in the split +void phylotreeIND::invert_split( edge edgeaux) +{ + int n= TREE.number_of_nodes(); + int idx_begin = edgeaux.id()*n; + for(int i=0; i< n; i++) + split_bits[idx_begin+i] = !split_bits[idx_begin+i]; +} + + + + + +// return if an edgeaux is what set of the split in consensus +// 0 -> the taxas in edgeaux are in the "0" side +// 1 -> the taxas in edgeaux are in the "1" side +int phylotreeIND::split_set_edge( edge consensus, edge edgeaux) +{ + graph::node_iterator it = TREE.nodes_begin(); + graph::node_iterator it_e = TREE.nodes_end(); + int setconsensus = -1; + int setedgeaux, setedgeaux2; + bool flag = true; + while(it != it_e) + { + // only check taxons + if(istaxon(*it)) + { + if(setconsensus == -1){ + setedgeaux = split(edgeaux,*it); + setconsensus = split(consensus,*it); + } + // checks only for taxon from split in edgeaux whose split taxon values = setedgeaux + // if this value don't coincide with setconsensus then the corresponding set + // in split of edgeaux is the opposite and finalize the loop + else if( split(edgeaux,*it) == setedgeaux && split(consensus,*it) != setconsensus ) + flag = false; + else if( split(edgeaux,*it) != setedgeaux ) + { + setedgeaux2 = split(consensus,*it); + if(!flag)break; + } + } + ++it; + } + return ( flag ? setconsensus : setedgeaux2 ); +} + + + +void phylotreeIND::stepwise_addition() +{ + list auxlist = TREE.all_nodes(); + edge edgeaux; + node node1, node2; + + // choose two nodes randomly add + int j = randomNr->uniform0Max( auxlist.size() ); + int k = randomNr->uniform0Max( auxlist.size() ); + while(k == j)k = randomNr->uniform0Max(TREE.number_of_nodes()); + + // choose the nodes + node1 = select_edge_at_pos( auxlist, j); + node2 = select_edge_at_pos( auxlist, k); + + // first pick two nodes and insert edge in it + //edgeaux = TREE.new_edge(node1, node2); + + //MAPIDEDGE[edgeaux.id()] = edgeaux; + + edgeaux = new_branch(node1, node2); + + auxlist.remove( node1); + auxlist.remove( node2); + // the remaining edges + while( auxlist.size()>0 ) + { + j = randomNr->uniform0Max( auxlist.size() ); + node1 = select_edge_at_pos( auxlist, j); + insert_node_in_edge(node1, edgeaux); + auxlist.remove( node1); + // selec the next edge to add a new node + j = randomNr->uniform0Max( TREE.number_of_edges() ); + edgeaux = select_edge_at_pos( TREE.all_edges(), j); + } + calculate_splits(); +} + + +void phylotreeIND::start_decomposition() +{ + list auxlist = TREE.all_nodes(); + node node1, node2, cluster; + edge edgeaux; + while( auxlist.size() > 2) + { + // pick two nodes followingan criteria + //cluster = TREE.new_node(); + cluster = new_internal_node(); + new_branch(cluster, node1); + new_branch(cluster, node2); + //edgeaux = TREE.new_edge( cluster, node1 ); + //MAPIDEDGE[edgeaux.id()] = edgeaux; + //edgeaux = TREE.new_edge( cluster, node2 ); + //MAPIDEDGE[edgeaux.id()] = edgeaux; + auxlist.remove( node1); + auxlist.remove( node2); + auxlist.push_back( cluster ); + } + new_branch( cluster, *(auxlist.begin()) ); + //edgeaux = TREE.new_edge( cluster, *(auxlist.begin()) ); + //MAPIDEDGE[edgeaux.id()] = edgeaux; +} + + + + + +void phylotreeIND::SPR() +{ + + + // spr no makes sense in trees with less than four edges (three or less taxons) + if( TREE.number_of_edges() <5) return; + + // choose and edge and divides the three + edge edgeaux1 = select_edge(); + //select_edge_at_pos( TREE.all_edges(), j); + + node nodeaux, delnode, nodeaux2; + + // because the edgeaux will be deleted we save it split + int idx_begin = edgeaux1.id() * TREE.number_of_nodes(); + + // save the distance of edgeaux1 + double d_edgeaux1 = get_branch_length(edgeaux1); + + nodeaux = split(edgeaux1,edgeaux1.source()) ? edgeaux1.source() : edgeaux1.target(); + // select the source preventing remove a taxon + delnode = edgeaux1.opposite(nodeaux); + // prune edge1 and graft in edge 2 + //cout << "eixo escolhido " << edgeaux1 << endl; + remove_branch(edgeaux1); + //MAPIDEDGE[edgeaux1.id()] = invalid_edge; + //TREE.del_edge(edgeaux1); + collapse_node(delnode); + + edge edgeaux2 = choose_edge_fromside(idx_begin, false); + + // finally, regraft in edgeaux2 + // insert the subtree rooted at nodeaux in edgeaux2; + insert_node_in_edge(nodeaux, edgeaux2, d_edgeaux1); + +} + +void phylotreeIND::TBR() +{ + // tbr no makes sense in trees with less than four edges (three or less taxons) + if( TREE.number_of_edges() <7) return; + list tree1, tree2; + edge edgeaux1, edgeaux2, prune_edge; + node source, dest, nodeaux1, nodeaux2; + + // select the source and dest edges + prune_edge = choose_internal_edge(); + // the graph was divide by removing edgeaux1 + + int idx_begin = prune_edge.id() * TREE.number_of_nodes(); + + + source = prune_edge.source(); + dest = prune_edge.target(); + //TREE.del_edge(prune_edge); + remove_branch(prune_edge); + // delete the source and target nodes*/ + collapse_node(source); + collapse_node(dest); + + edgeaux1 = choose_edge_fromside( idx_begin, true); + edgeaux2 = choose_edge_fromside( idx_begin, false); + // create the new nodes to be reconected in the respective subtrees + nodeaux1 = divide_edge(edgeaux1); + nodeaux2 = divide_edge(edgeaux2); + // reconect the tree + new_branch(nodeaux1, nodeaux2); + //edgeaux1 = TREE.new_edge(nodeaux1, nodeaux2); + //MAPIDEDGE[edgeaux1.id()] = edgeaux1; +} + +// subtrees in the tree +void phylotreeIND::change_subtrees() +{ + edge source_edge, dest_edge; + list remain_edges; + node root_subtree1, root_subtree2, parent_subtree1, parent_subtree2; + // select the edge of the subtrees + source_edge = select_edge(); + root_subtree1 = split(source_edge, source_edge.source()) + ? source_edge.source() : source_edge.target(); + // points to the parent of the subtree in order to do the percurse + parent_subtree1 = source_edge.opposite(root_subtree1); + + dest_edge = select_edge_outsidetree(source_edge); + + bool lado = !split(dest_edge,root_subtree1); + root_subtree2 = split(dest_edge,dest_edge.source()) == lado + ? dest_edge.source() : dest_edge.target(); + + parent_subtree2 = dest_edge.opposite(root_subtree2); + + // reconecting the nodes + if( parent_subtree1 == source_edge.source() ) + source_edge.change_target(root_subtree2); + else + source_edge.change_source(root_subtree2); + if( parent_subtree2 == dest_edge.source() ) + dest_edge.change_target(root_subtree1); + else + dest_edge.change_source(root_subtree1); + + if( istaxon(source_edge.source()) ) source_edge.reverse(); + if( istaxon(dest_edge.source()) ) dest_edge.reverse(); +} + + +// divides an edge by creating a new node and it with the +// edge extrems and return the new node; +// set distance dist to new edge to be inserted +void phylotreeIND::insert_node_in_edge(node nodeaux, edge edgeaux, double dist) +{ + // divide an edgeaux + node new_internal_node = divide_edge(edgeaux); + // new_node is not a taxon + //MAPNODETAXON[new_internal_node] = -1; + // connect the new node + new_branch( new_internal_node, nodeaux, dist); + //edge new_edge = TREE.new_edge( new_internal_node, nodeaux); + //set_branch_length(new_edge, dist); + + //MAPIDEDGE[new_edge.id()] = new_edge; +} + + +// breaks an edge with an new root edge that joins the edgeaux extremes +node phylotreeIND::divide_edge(edge edgeaux) +{ + // create the node that divides edgeaux + //node r = TREE.new_node(); + //assert(r.id() < 2*nnode -2); + // because the edge is divided, the distance of the edge is also divided + node r = new_internal_node(); + double d = get_branch_length(edgeaux)/2; + // join the edgeaux vertices to root + new_branch( r, edgeaux.source(), d); + //edge new_edge = TREE.new_edge( r, edgeaux.source()); + // each part of the edge receives the half of the original distance + //set_branch_length(new_edge, d); + set_branch_length(edgeaux, d); + //MAPIDEDGE[new_edge.id()] = new_edge; + edgeaux.change_source(r); + return r; +} + + +void phylotreeIND::collapse_node(node delnode) +{ + edge edgeaux; // new edge to be inserted + // case the delnode have a degree 3, the graph is valid + if(delnode.degree() > 2) return; + // the node is incomplete, remove them and link the adjacent noddes + else if(delnode.degree() == 2) + { + node adj1, adj2; + node::inout_edges_iterator it1, it2; + it1 = it2 = delnode.inout_edges_begin(); + it2++; + adj1 = it1->opposite(delnode); + adj2 = it2->opposite(delnode); + // if adj1 is a taxon, connect adj2->adj1 + if( istaxon(adj1) ) + new_branch( adj2, adj1, get_branch_length(*it1) + get_branch_length(*it2)); + //edgeaux = TREE.new_edge( adj2, adj1); + else + new_branch( adj1, adj2, get_branch_length(*it1) + get_branch_length(*it2)); + //edgeaux = TREE.new_edge( adj1, adj2); + // set the new distance as the sum of edge lenghts + //set_branch_length( edgeaux, get_branch_length(*it1) + get_branch_length(*it2) ); + //MAPIDEDGE[edgeaux.id()] = edgeaux; + remove_branch( *it1); + remove_branch( *it2); + //MAPIDEDGE[it1->id()] = invalid_edge; + //MAPIDEDGE[it2->id()] = invalid_edge; + } + // invalidate edges + TREE.del_node(delnode); +} + + +void phylotreeIND::collapse_zero_edges() +{ + graph::edge_iterator it = TREE.edges_begin(); + graph::edge_iterator it_end = TREE.edges_end(); + graph::edge_iterator tmp; + node source, dest; + while(it!=it_end) + { + tmp = it; + ++tmp; + if(is_internal( *it) && get_branch_length( *it) == 0) + { + source = it->source(); + dest = it->target(); + //cout << "apagando branch" << endl; + remove_branch(*it); + //cout << "reconectando nos..." << endl; + reconnect_nodes( source, dest ); + } + it = tmp; + } +} + +// connect all adjacent nodes from source to dest and delete source +void phylotreeIND::reconnect_nodes(node source, node dest) +{ + node::inout_edges_iterator it = source.inout_edges_begin(); + node::inout_edges_iterator it_end = source.inout_edges_end(); + node::inout_edges_iterator tmp_it; + edge tmp; + while(it != it_end) + { + tmp = *it; + tmp_it = it; + ++tmp_it; + if( it->source() == source) + tmp.change_source( dest); + else if(it->target() == source) + tmp.change_target( dest); + it = tmp_it; + } + TREE.del_node(source); +} + + + + +// select an neighboor defined by the edge edgeaux and node start +edge phylotreeIND::choose_neighboor(edge edgeaux, node start) const +{ + + int n_neighboors = start.degree(); + // choose an neighbooord edge differnet from edgeaux + node::inout_edges_iterator it; + do + { + int k = randomNr->uniform0Max( n_neighboors ); + it = start.inout_edges_begin(); + for(int i=0; i < k; i++) it++; + }while( *it == edgeaux ); + // select the neighboor + return *it; +} + + +void phylotreeIND::taxa_swap() +{ + node node1, node2; + node1 = taxon_number( randomNr->uniform0Max( nnode ) ); + node2 = taxon_number( randomNr->uniform0Max( nnode ) ); + edge extern_edge1 = *node1.inout_edges_begin(); + edge extern_edge2 = *node2.inout_edges_begin(); + extern_edge1.change_target(node2); + extern_edge2.change_target(node1); +} + + + +void phylotreeIND::NNI() +{ + edge select_edge; + edge edge_neighboor1, edge_neighboor2; + node node_neighboor1, node_neighboor2; + + // select an internal edge + select_edge = choose_internal_edge(); + + + // choose the neighboors + edge_neighboor1 = choose_neighboor( select_edge, select_edge.source() ); + + node_neighboor1 = edge_neighboor1.opposite( select_edge.source() ); + + edge_neighboor2 = choose_neighboor( select_edge, select_edge.target() ); + + node_neighboor2 = edge_neighboor2.opposite( select_edge.target() ); + + + //change the neighboors + + if( edge_neighboor1.source() == node_neighboor1 ) + edge_neighboor1.change_source( node_neighboor2); + else + edge_neighboor1.change_target( node_neighboor2); + + if( edge_neighboor2.source() == node_neighboor2 ) + edge_neighboor2.change_source( node_neighboor1); + else + edge_neighboor2.change_target( node_neighboor1); + + if(istaxon( edge_neighboor1.source() ))edge_neighboor1.reverse(); + if(istaxon( edge_neighboor2.source() ))edge_neighboor2.reverse(); +} + + + +void phylotreeIND::insert_root(edge edgeaux) +{ + // first divide the edge with a new node that connects + // its vertices + root = new node(divide_edge(edgeaux)); + // finally convert the graph to thee with root nodeaux + convert_graph_to_tree(*root, NULL); + +} + + +void phylotreeIND::convert_graph_to_tree(node n, node *antecessor) +{ + edge edgeaux; + // reverse all the incoming edges that are not coming from antecessor + node::inout_edges_iterator it,tmp; + it = tmp = n.inout_edges_begin(); + node::inout_edges_iterator it_e = n.inout_edges_end(); + while(it!= it_e) + { + //neccesary if the variable "it" is deferenced + tmp++; + edgeaux = *it; + if(antecessor==NULL || edgeaux.source()!=*antecessor) + { + // reverse the edge and deferefence it + if( edgeaux.source() != n) + edgeaux.reverse(); + convert_graph_to_tree(edgeaux.target(), &n); + } + it=tmp; + } +} + +// return the list of nodes and edges that conform the tree rooted at node root + +void phylotreeIND::obtain_subtree(node n, node *antecessor, list *edgelist, list *nodelist) +{ + //cout << "chamado con ...." << n; + //cout << *antecessor << endl; + node::inout_edges_iterator it, tmp, it_e; + it = tmp = n.inout_edges_begin(); + it_e = n.inout_edges_end(); + edge edgeaux; + // + if(nodelist)nodelist->push_back(n); + while( it != it_e ) + { + if(antecessor==NULL || (it->source()!=*antecessor && it->target()!=*antecessor) ) + { + if(edgelist) edgelist->push_back(*it); + if(edgelist->size() > TREE.number_of_edges()) + { + cout << "hubo un error..." << endl; + int lll; + cin >> lll; + } + if( it->source() == n ) + obtain_subtree( it->target(), &n, edgelist, nodelist); + else obtain_subtree( it->source(), &n, edgelist, nodelist); + } + it++; + } +} + + +bool phylotreeIND::isparent(node parent, node children) const +{ + if( parent == *root) return true; + else + { + while( children.indeg()!=0 ) + { + if (parent == children) return true; + children = children.in_edges_begin()->source(); + } + } + return false; +} + +node phylotreeIND::firstcommonancestor(node node1, node node2) const +{ + if( isparent( node1, node2 )) return node1; + else + { + while(node2.indeg()!=0 ) + { + if ( isparent( node2 , node1) ) + return node2; + node2 = node2.in_edges_begin()->source(); + } + } + // root is the parent + return *root; +} + +node phylotreeIND::firstcommonancestor( list &listnode) const +{ + // points to the second element + list::iterator it = ++listnode.begin(); + list::iterator it_e = listnode.end(); + node ancestor = *listnode.begin(); + while(it!= it_e) { ancestor = firstcommonancestor( ancestor, *it); ++it; } + return ancestor; +} + + + + +// return true when edgeaux is an internal edge +bool phylotreeIND::is_internal(edge edgeaux) const +{ + return ( !istaxon(edgeaux.source()) && !istaxon(edgeaux.target()) ); +} + +// parsimony for rooted and non-rooted threes + + +// construct an graph with nodes and edges in the lists provided +void phylotreeIND::construct_graph( const phylotreeIND &G, list &gnodes, list &gedges) +{ + // maps the nodes from list nodes to the new nodes in tree + map fromto; + list::iterator it = gnodes.begin(); + list::iterator it_e = gnodes.end(); + edge aux; + node n; + while(it!=it_e) + { + //cout << "insertando taxa " << *it << G.MAPNODETAXON[*it] << endl; + if( G.istaxon(*it) ) + n = new_taxon( G.MAPNODETAXON[*it] ); + else + n = new_internal_node(); + fromto.insert( pair (*it, n)); + /*node n = TREE.new_node(); + assert(n.id() < 2*nnode -2); + fromto.insert( pair (*it, n)); + MAPNODETAXON[n] = G.MAPNODETAXON[*it]; + if(G.istaxon(*it)) + { + inserido = true; + MAPTAXONNODE[ MAPNODETAXON[n] ] = n; + }*/ + ++it; + } + list::iterator it2 = gedges.begin(); + list::iterator it2_e = gedges.end(); + while(it2!=it2_e) + { + new_branch( fromto[it2->source()], fromto[it2->target()], G.get_branch_length(*it2)); + //aux = TREE.new_edge( fromto[it2->source()], fromto[it2->target()] ); + //set_branch_length( aux, G.get_branch_length(*it2) ); + //MAPIDEDGE[aux.id()] = aux; + ++it2; + } +} + + + +void phylotreeIND::printtree() const +{ + graph::node_iterator it, it_e; + node::out_edges_iterator it2, it2_e; + it = TREE.nodes_begin(); + it_e = TREE.nodes_end(); + while(it!=it_e) + { + if(istaxon(*it)) cout << "*" << "(" << MAPNODETAXON[*it] << ")" << *it << " :"; + it2 = it->out_edges_begin(); + it2_e = it->out_edges_end(); + for( ; it2 != it2_e; it2++) + cout << *it2 << "(" << get_branch_length(*it2) << ")" << ", " ; + cout << endl; + ++it; + } +} + + +void phylotreeIND::printNewick(ostream &os) +{ + //edge edgeaux = choose_internal_edge(); + //string s = "("; + //newick_traverse( edgeaux.source(), NULL, edgeaux, s); + //s += ");\n"; + string s = newick_traverse2(true, true); + os << s << endl; +} + + +string phylotreeIND::newick_traverse2( bool brlens, bool longnames) +{ + string s; + ostringstream s_2; + s_2.precision(15); + s_2.setf(ios::fixed); + + edge edgeaux = * ( MAPTAXONNODE[0].inout_edges_begin()); + node root_traverse = edgeaux.opposite( MAPTAXONNODE[0] ); + + convert_graph_to_tree( root_traverse, NULL); + + postorder_Iterator it = postorder_begin( root_traverse ); + postorder_Iterator it2 = postorder_end( root_traverse ); + + // imprimir ordem dos filhos +// child_Iterator it3 = child_begin( root_traverse); +// child_Iterator it4 = child_end( root_traverse); +// +// cout << "ordem de percorrido" << endl; +// +// while (it3!=it4) +// { +// if(istaxon(*it3)) cout << taxon_id(*it3) << ","; +// else cout << "I," << endl; +// ++it3; +// } + +// cout << endl; + int prev_nivel = 1; + while(it!= it2) + { + s_2.str(""); + if( istaxon(*it) ) + { + //cout << "gravando\t" << MAPNODETAXON[*it] << endl; + for(int i=0; i < it.stack_size() - prev_nivel; i++) s+="("; + if( longnames) + s_2 << seqpatterns->seqname( MAPNODETAXON[*it] ); + else s_2 << MAPNODETAXON[*it]; + if ( brlens) + s_2 << ":" << get_branch_length( it.branch() ); + s_2 << ","; + s += s_2.str(); + + } + else + { + if(s[ s.size() -1] == ',') s[s.size()-1] = ')'; + else s+=")"; + if( *it != edgeaux.source() && brlens) + s_2 << ":" << get_branch_length( it.branch() ); + s_2 << ","; + s+= s_2.str(); + } + prev_nivel = it.stack_size(); + ++it; + } + s[s.size()-1] = ';'; + return s; +} + + + +void phylotreeIND::newick_traverse( node n, node *ancestor, edge edgeaux, string &s) +{ + ostringstream s_2; + s_2.precision(15); + s_2.setf(ios::fixed); + if(istaxon(n)) { + + s_2 << seqpatterns->seqname( MAPNODETAXON[n] ) << ":" << get_branch_length(edgeaux); + s+= s_2.str(); + return; + } + else if(ancestor!=NULL)s+='('; + node::inout_edges_iterator it, it_e; + it = n.inout_edges_begin(); + it_e = n.inout_edges_end(); + node nodeaux; + bool first_son = true; + while( it != it_e ) + { + if(ancestor==NULL || (it->source()!=*ancestor && it->target()!=*ancestor) ) + { + if(!first_son)s += ","; + first_son=false; + nodeaux = it->source() == n ? it->target() : it->source(); + newick_traverse( nodeaux, &n, *it, s ); + + } + it++; + } + if(ancestor!=NULL)s_2 << "):" << get_branch_length(edgeaux); + s+= s_2.str(); +} + +void phylotreeIND::read_newick( string newickstring) +{ + TREE.clear(); + edge invalid_edge; + list pilha; + int pos = 0; + node father; + edge internal_edge; + string taxon_name; + double blen; + char token; + int tottaxons = 0; + int totinternals = 0; + try{ + do + { + token = newickstring[pos]; + switch(token) + { + case '(': + { + //father = TREE.new_node(); + father = new_internal_node(); + //cout << "no interno..." << father.id() << endl; + totinternals++; + pilha.push_back(father); + break; + } + case ')': + { + node tmp = father; + assert(pilha.size() >0); + pilha.pop_back(); + if(pilha.size()>0) + { + father = pilha.back(); + internal_edge = new_branch( father, tmp); + //internal_edge = TREE.new_edge(father, tmp); + //MAPIDEDGE[internal_edge.id()] = internal_edge; + //cout << "eixo interno..." << internal_edge.id() << endl; + } + break; + } + case ',': + break; + case ':': + { + read_bl(newickstring, blen, (++pos)); + //cout << "longitude de ramo:" << blen << endl; +// cout << "lendo brlen" << blen << endl; + set_branch_length( internal_edge, blen); + break; + } + case ';': + break; + default: + { + read_taxonname_bl(newickstring, taxon_name, blen, pos); + // create the taxon + + cout << "agregando taxon" << taxon_name << endl; + string short_taxonname = taxon_name.substr(0,10); + int taxon_id = seqpatterns->search_taxon_name(short_taxonname); + //cout << "tentando agregar taxon ..." << taxon_name; + if( taxon_id<0) + { + cout << "falla al agregar taxon..." << taxon_name; + throw ExceptionManager(11); + //assert( taxon_id >=0 ); + } + //cout << "...sucess" << endl; + + //cout << "eixo interno..." << e.id() << endl; + //cout << "longitude de ramo:" << blen << endl; + node n = new_taxon( taxon_id); + tottaxons++; + //cout << "taxon..." << n.id() << endl; + new_branch( father, n, blen); + //edge e = TREE.new_edge( father, n); + //MAPIDEDGE[e.id()] = e; + //set_branch_length( e, blen); + //MAPNODETAXON[n] = taxon_id; + //MAPTAXONNODE[taxon_id] = n; + break; + } + } + pos++; + }while (token!=';'); + } + catch(ExceptionManager e) + { + e.Report(); + } + cout << "total... --> nos:" << TREE.number_of_nodes() << " eixos:" << TREE.number_of_edges() << endl; + cout << "contados..--> taxons:" << tottaxons << " internals:" << totinternals << endl; +} + + +void phylotreeIND::read_newick2(string newickstring) +{ + //cout << "leyendo arvore\n"; + TREE.clear(); + edge invalid_edge; + list pilha; + int pos = 0; + node father; + edge internal_edge; + string taxon_name; + double blen; + char token; + int tottaxons = 0; + int totinternals = 0; + int taxon_id; + do + { + token = newickstring[pos]; + switch(token) + { + case '(': + { + //father = TREE.new_node(); + father = new_internal_node(); + //cout << "no interno..." << father.id() << endl; + totinternals++; + pilha.push_back(father); + break; + } + // close internal node + case ')': + { + pos++; + if( read_taxonname_bl(newickstring, taxon_name, blen, pos) == -1) + { + cout << "sintax error in position " << pos << endl; + throw ExceptionManager(11); + exit(1); + } + node tmp = father; + assert(pilha.size() >0); + pilha.pop_back(); + if(pilha.size()>0) + { + //cout << "warning... ignorin internal taxon_name" << taxon_name + // << " with blen:" << blen << endl; + if(blen>1.e+05){ + cout << "warning ... blen >1"; + blen=1.e+05; + } + if(blen == 0){ + //cout << "warning.... blen = 0 , posicao " << pos << endl; + //blen = BL_MIN; + } + if(blen <0){ + cout << "warning.... blen negativo " << blen << endl; + cout << "posicao " << pos << endl; + cout << "taxonname" << taxon_name << endl; + blen=0; + } + father = pilha.back(); + internal_edge = new_branch( father, tmp); + set_branch_length( internal_edge, blen); + } + break; + } + case ',': + case ';': + break; + default: + { + if(read_taxonname_bl(newickstring, taxon_name, blen, pos) == -1) + { + cout << "sintax error at position " << pos << endl; + exit(1); + + } + // create the taxon + // check if a taxon is a number (no long name!) + istringstream convert(taxon_name); + if( convert >> taxon_id) + { + if(taxon_id < 0 || taxon_id > ( number_of_taxons() -1) ) + taxon_id=-1; + + } + // long name + else + { + + string short_taxonname = taxon_name.substr(0,10); + taxon_id = seqpatterns->search_taxon_name(short_taxonname); +// cout << "tentando agregar taxon ..." << taxon_name << "con blen" << blen +// << "posicon " << pos << endl; + } + if( taxon_id<0) + { + cout << "falla al agregar taxon..." << taxon_name; + throw ExceptionManager(11); + assert( taxon_id >=0 ); + } + //cout << "leyendo\t" << taxon_id << endl; + node n = new_taxon( taxon_id); + tottaxons++; + if(blen<0) + { + cout << "negative set to 0"; + blen=0; + } + new_branch( father, n, blen); + break; + } + } + pos++; + }while (token!=';'); + //cout << "total... --> nos:" << TREE.number_of_nodes() << " eixos:" << TREE.number_of_edges() << endl; + //cout << "contados..--> taxons:" << tottaxons << " internals:" << totinternals << endl; +} + + +int phylotreeIND::read_taxonname_bl( string &s, string &taxonname, double &blen, int &pos) +{ + int first_pos = s.size(); + + int first_par = s.find( ')', pos); //first parentesis + first_pos = (first_par !=string::npos && first_par < first_pos) + ? first_par : first_pos; + + int first_comma = s.find( ',', pos); + first_pos = (first_comma !=string::npos && first_comma < first_pos) + ? first_comma : first_pos; + int first_points = s.find( ':', pos); + first_pos = (first_points !=string::npos && first_points < first_pos) + ? first_points : first_pos; + int first_semicolon = s.find( ';', pos); + + first_pos = (first_semicolon !=string::npos && first_semicolon < first_pos) + ? first_semicolon : first_pos; + + // assert + if(first_pos == string::npos) return -1; // error ! + // get_taxon_name + taxonname = s.substr( pos, first_pos - pos ); + if( first_pos == first_points) + { + pos = first_pos+1; + return read_bl( s, blen, pos ); + } + //else cout << "no brlens..." << endl; + pos = first_pos-1; + return 0; +} + +int phylotreeIND::read_bl( string &s, double &blen, int &pos) +{ + int first_pos = s.find( ';', pos); // final semicolor ex. (a,b):xx.yy; + int first_par = s.find( ')', pos); //first parentesis + first_pos = (first_par !=string::npos && first_par < first_pos) + ? first_par : first_pos; + int first_comma = s.find( ',', pos); + first_pos = (first_comma !=string::npos && first_comma < first_pos) + ? first_comma : first_pos; + + if(first_pos == string::npos) return -1; + + //assert(first_pos != string::npos); + + string bl = s.substr( pos, first_pos - pos ); + istringstream convert(bl); + if (convert >> blen) + { + pos = first_pos-1; + return 0; + } + else return -1; + //blen = atof(bl.c_str()); + //return first_pos - 1; +} + +string phylotreeIND::get_split_key( edge edgeaux) const +{ + int n= TREE.number_of_nodes(); + int idx_begin = edgeaux.id()*n; + string s; + graph::node_iterator it = TREE.nodes_begin(); + graph::node_iterator it_e = TREE.nodes_end(); + s.resize(nnode); + while(it!=it_e) + { + if(istaxon(*it)) + s[ MAPNODETAXON[*it] ] = + split_bits[ idx_begin + it->id() ] == 1 ? '*':'.'; + ++it; + } + return s; +} + +string phylotreeIND::get_invert_split_key( edge edgeaux) const +{ + int n= TREE.number_of_nodes(); + int idx_begin = edgeaux.id()*n; + string s; + s.resize(nnode); + graph::node_iterator it = TREE.nodes_begin(); + graph::node_iterator it_e = TREE.nodes_end(); + while(it!=it_e) + { + if(istaxon(*it)) + s[ MAPNODETAXON[*it] ] = + split_bits[ idx_begin + it->id() ] == 1 ? '.':'*'; + ++it; + } + return s; +} + + +double phylotreeIND::compare_topology(phylotreeIND &other) +{ + map split_repository; + map::iterator it_aux; + graph &tree2 = other.TREE; + int num_different_edges=0; + // create repository for splits in tree1 + graph::edge_iterator it= TREE.edges_begin(); + graph::edge_iterator it_e= TREE.edges_end(); + while(it!=it_e) + { + if(!istaxon(it->target())) + { + split_repository[get_split_key(*it)] = *it; + } + ++it; + } + + // count for different splits + it = tree2.edges_begin(); + it_e = tree2.edges_end(); + + while(it!=it_e) + { + if(!istaxon(it->target())) + { + if( (it_aux = split_repository.find( other.get_split_key(*it))) != split_repository.end() + || (it_aux = split_repository.find( other.get_invert_split_key(*it)) ) !=split_repository.end() ) + split_repository.erase( it_aux ); + else + num_different_edges++; + } + //} + ++it; + } + // different tree1->tree2 + different tree2->tree1 (remaining) + return num_different_edges + split_repository.size(); +} + + +// day algorithms + +double phylotreeIND::compare_topology_2(phylotreeIND &other) +{ + // hash table + int *hash_table; + // node mapes + int *map_nodes; + int node_count = 0; + int n = number_of_taxons(); + int good_edges = 0; + + node_map interior_node(TREE, temp_info(n)); + + + + // allocate memory + hash_table = new int[n*2]; + for(int i=0; isource(); + + interior_node.init(other.TREE, temp_info(n)); + + it = postorder_begin( no_root2, root2); + while( *it != no_root2) + { + struct temp_info &father_info = interior_node [ it.ancestor() ] ; + struct temp_info ¤t_info = interior_node [ *it ] ; + + if( istaxon(*it) ) + { + if( father_info.left_most == invalid_node ) father_info.left_most = *it; + if( map_nodes[ other.taxon_id( *it) ] < father_info.left) father_info.left = map_nodes[ other.taxon_id(*it) ]; + if( map_nodes[ other.taxon_id( *it) ] > father_info.right) father_info.right = map_nodes[ other.taxon_id(*it) ]; + father_info.num_nodes++; + } + else + { + + // check hash tables + if ( current_info.right - current_info.left + 1 == + current_info.num_nodes) + { + if( (hash_table[ current_info.left*2 ] == current_info.left && + hash_table[ current_info.left*2+1 ] == current_info.right) + || + (hash_table[ current_info.right*2 ] == current_info.left && + hash_table[ current_info.right*2+1 ] == current_info.right) ) + good_edges++; + } + + if( father_info.left_most == invalid_node )father_info.left_most = *it; + if( current_info.left < father_info.left) father_info.left = current_info.left; + if( current_info.right > father_info.right) father_info.right = current_info.right; + father_info.num_nodes += current_info.num_nodes; + } + ++it; + } + delete [] map_nodes; + delete [] hash_table; + return TREE.number_of_edges() - number_of_taxons() - good_edges + + other.TREE.number_of_edges() - number_of_taxons() - good_edges; +} + + + +double phylotreeIND::robinson_foulds_distance(phylotreeIND &other, int debug) +{ + map split_repository; + graph &tree2 = other.TREE; + double distance = 0; + // create repository for splits in tree1 + graph::edge_iterator it= TREE.edges_begin(); + graph::edge_iterator it_e= TREE.edges_end(); + map::iterator it_aux; + + while(it != it_e) + { + split_repository[get_split_key(*it)] = *it; ++it; + } + + // count for different splits + it = tree2.edges_begin(); + it_e = tree2.edges_end(); + + while(it!=it_e) + { + if( (it_aux = split_repository.find( other.get_split_key(*it))) != split_repository.end() + || (it_aux = split_repository.find( other.get_invert_split_key(*it)) ) != split_repository.end() ) + { + distance += fabs( other.get_branch_length(*it) + - get_branch_length( (*it_aux).second )); + split_repository.erase( it_aux ); + } + // edge in tree2 not found in tree1 + else + { + distance += pow(other.get_branch_length(*it),2); + } + ++it; + } + // edges in tree1 not found in tree2 + it_aux = split_repository.begin(); + while( it_aux != split_repository.end() ) + { + distance += get_branch_length( (*it_aux).second ); + ++it_aux; + } + return distance; +} + +void phylotreeIND::printtreeinfo() const +{ + cout << this->TREE; + graph::node_iterator it=TREE.nodes_begin(); + graph::node_iterator it_e=TREE.nodes_end(); + cout << "taxa information" << endl; + while(it!=it_e) + { + if(istaxon(*it))cout << " * " << MAPNODETAXON[*it]; + cout << endl; + ++it; + } +} + + + +double phylotreeIND::compare_topology_3(phylotreeIND &other) +{ + // hash table + int n = number_of_taxons(); + struct temp_info **hash_table; // hash that points struct info + struct temp_info *interior_node_info = new struct temp_info[n-1]; + struct temp_info *interior_node_info_2 = new struct temp_info[n-1]; + int idx_interior = 0; + node_map interior_node(TREE, NULL); + node_map interior_node_2(other.TREE, NULL); + edge_map interior_edge(TREE, NULL); + // node mapes + int *map_nodes; + int node_count = 0; + int good_edges = 0; + + + // allocate memory + hash_table = new struct temp_info*[n]; + for(int i=0; ileft = node_count; + } + //else father_info.right = node_count; + node_count++; + ++it; + } + else + { + int idx; + l = current_info->left; + interior_edge[ it.branch() ] = current_info; + + if( father_info == NULL ) + { + interior_node [ it.ancestor() ] = father_info = &interior_node_info[idx_interior]; + idx_interior++; + father_info->left = current_info->left; + } + + ++it; + if (istaxon(*it) || *it==root1) idx = r; + else idx = l; + + current_info->right = r; + // fill hash table + hash_table[ idx ] = current_info; + } + } + // step 4 + node root2 = other.taxon_number( n-1); + // father of root2 + node no_root2 = root2.in_edges_begin()->source(); + + interior_node_2.init(other.TREE, NULL); + + it = postorder_begin( no_root2, root2); + + + idx_interior = 0; + + while( *it != no_root2) + { + struct temp_info *father_info = interior_node_2 [ it.ancestor() ] ; + struct temp_info *current_info = interior_node_2 [ *it ] ; + + if( istaxon(*it) ) + { + + if( father_info == NULL) + { + interior_node_2 [ it.ancestor() ] = father_info = &interior_node_info_2[idx_interior]; + father_info->left = n; + father_info->right= -1; + father_info->num_nodes = 0; + idx_interior++; + //.left_most == invalid_node ) father_info.left_most = *it; + } + if( map_nodes[ other.taxon_id( *it) ] < father_info->left) father_info->left = map_nodes[ other.taxon_id(*it) ]; + if( map_nodes[ other.taxon_id( *it) ] > father_info->right) father_info->right = map_nodes[ other.taxon_id(*it) ]; + father_info->num_nodes++; + } + else + { + + + // check hash tables + if ( current_info->right - current_info->left + 1 == + current_info->num_nodes) + { + //cout << "inicio checkeando hash" << current_info->left << " " << current_info->right << endl; + //cout << "hash table adress" << hash_table[current_info->left] << " " << hash_table[current_info->right] << endl; + if( hash_table[current_info->left]!=NULL) + if( hash_table[current_info->left]->left == current_info->left && + hash_table[ current_info->left]->right == current_info->right) good_edges++; + if(hash_table[current_info->right]!=NULL) + if(hash_table[ current_info->right]->left == current_info->left && + hash_table[ current_info->right]->right == current_info->right) good_edges++; + //cout << "fin checkeando hash " << good_edges << endl; + } + + if( father_info == NULL) + { + interior_node_2 [ it.ancestor() ] = father_info = &interior_node_info_2[idx_interior]; + father_info->left = n; + father_info->right= -1; + father_info->num_nodes = 0; + idx_interior++; + //.left_most == invalid_node ) father_info.left_most = *it; + } + if( current_info->left < father_info->left) father_info->left = current_info->left; + if( current_info->right > father_info->right) father_info->right = current_info->right; + father_info->num_nodes += current_info->num_nodes; + } + ++it; + } + interior_edge.clear(); + interior_node.clear(); + interior_node_2.clear(); + delete [] interior_node_info; + delete [] interior_node_info_2; + delete [] map_nodes; + delete [] hash_table; + return TREE.number_of_edges() - number_of_taxons() - good_edges + + other.TREE.number_of_edges() - number_of_taxons() - good_edges; +} diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.h b/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.h new file mode 100644 index 000000000..d02fbde66 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/phylotreeIND.h @@ -0,0 +1,290 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + + + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifndef _phylotreeIND_H_ +#define _phylotreeIND_H_ +#define BL_MIN 1.e-10 + +#include +#include +#include +#include +#include +#include +#include +#include + + +class phylotreeIND +{ + private: + bool valid_splits; + edge invalid_edge; + node_map MAPNODETAXON; // ecah node maps a taxon id in the vector + vector MAPTAXONNODE; // each taxon number points to a node + vector MAPIDEDGE; + valarray NJDISTANCES; // distances stored in the edges + //edge_map< node_map > SPLITS; // the splits that each edge separate the graph + bool *split_bits; + Sequences *seqpatterns; + + int nnode; // number of nodes + static double randomgsl_alpha; // shape paramter of gamma distribution + gsl_rng *random_gsl; // GSL (GNU Scientific Library) random generator for gamma numbers) + node *root; + int parsimony; + + + void init(); + void SPR(); //SPR operator + void NNI(); // NNI operator + void TBR(); // TBR operator + void taxa_swap(); // taxa swap operator + void collapse_node ( node ); + + void insert_root ( edge ); + node divide_edge ( edge ); + void insert_node_in_edge ( node, edge, double d=0 ); + + bool isparent ( node, node ) const; + node firstcommonancestor ( node, node ) const; + node firstcommonancestor ( list & ) const; + void calculate_splits_from_edge2 ( edge, node ); + int split_set_edge ( edge , edge ); + void print_split ( edge ) const; + void obtain_subtree ( node , node *, list *, list * ); + void construct_graph ( const phylotreeIND &, list & , list & ); + void change_subtrees(); + void separate_subtree_from_edge ( edge, list &, bool ); + void crossover_gaml ( phylotreeIND & ); + + void crossover_parsigal ( phylotreeIND &,phylotreeIND &, phylotreeIND & ); + void crossover_gaphyl ( phylotreeIND & ); + void invert_split ( edge ); + void remove_taxon ( int ); + void start_decomposition(); + void stepwise_addition(); + void separategraphs ( edge , list &, list & ); + void visit_edges_from_node ( node, list & ); + + + void mutate_branch_lenght ( float ); + edge select_edge_outsidetree ( edge source_edge ) const; + edge choose_edge_fromside ( int id, bool side ) const; + edge choose_neighboor ( edge, node ) const; + + + int read_taxonname_bl ( string &s, string &taxonname, double &blen, int &pos ); + int read_bl ( string &s, double &blen, int &pos ); + edge new_branch ( node, node ); + edge new_branch ( node, node, double ); + void remove_branch ( edge ); + + void reconnect_nodes ( node source, node dest ); + node new_taxon ( int id ); + node new_internal_node(); + + public: + RandomNr *randomNr; + phylotreeIND& operator= (const phylotreeIND& ind) { copy(ind); return *this; } + virtual phylotreeIND* clone() const; + virtual phylotreeIND* randomClone() const; + + + GTL::graph TREE; // final tree calculated by NJ + // constructors + void loadsequences ( char * ); + void copy ( const phylotreeIND & ); + edge select_edge() const; + phylotreeIND ( const phylotreeIND &org ); + phylotreeIND ( RandomNr *g, Sequences &p, gsl_rng *gslr ); + ~phylotreeIND(); + + // genetic operators + void export_subtree ( phylotreeIND &dest ); + virtual void crossover ( float pcross, const phylotreeIND& dad, phylotreeIND*& sis, phylotreeIND*& bro ) const; + virtual void mutate ( float pcross ); + + //IND& operator= (const IND& ind) { copy( (phylotreeIND &)ind); return *this; } + //phylotreeIND& operator= (phylotreeIND& ind) { copy(ind); return *this; } + + edge choose_internal_edge() const; + void convert_graph_to_tree ( node, node * ); + + void calculate_splits(); + void calculate_splits_exp(); + inline void invalidate_splits() { valid_splits = false; } + inline void remove_split_memory() + { + if ( split_bits!=NULL ) delete [] split_bits; + invalidate_splits(); + split_bits=NULL; + } + + inline void allocate_split_memory() + { + if ( split_bits==NULL ) split_bits = new bool [ ( 2*nnode-3 ) * ( 2*nnode-2 ) ]; + } + + string get_split_key ( edge edgeaux ) const; + string get_invert_split_key ( edge edgeaux ) const; + + bool is_internal ( edge ) const; + + void print_splits() const; + + void set_data ( Sequences &s ); + + void collapse_zero_edges(); + + inline double get_branch_length ( edge edgeaux ) const { return NJDISTANCES[edgeaux.id() ]; } + inline int taxon_id ( node nodeaux ) const { return MAPNODETAXON[nodeaux]; } + inline int number_of_taxons() const { return seqpatterns->num_seqs(); } + inline const Sequences & get_patterns() { return *seqpatterns; } + inline Sequences *get_patterns2() { return seqpatterns; } + inline node taxon_number ( int n ) const { return MAPTAXONNODE[n]; }; + inline int number_of_positions() const { return seqpatterns->pattern_count(); } + inline void set_branch_length ( edge edgeaux, double f ) + { + if ( f < BL_MIN ) + NJDISTANCES[edgeaux.id() ] = BL_MIN; + else if ( f> BL_MAX ) + NJDISTANCES[edgeaux.id() ] = BL_MAX; + else + NJDISTANCES[edgeaux.id() ] = f; + } + inline bool istaxon ( node nodeaux ) const { return ( nodeaux.degree() <= 1 ); } + inline edge edge_number ( int n ) const { return MAPIDEDGE[n]; } + inline bool splits_valid() const { return valid_splits; } + inline bool split ( edge edgeaux, node nodeaux ) const + { return split_bits[ edgeaux.id() * TREE.number_of_nodes() + nodeaux.id() ]; } + + void read_newick ( string newickstring ); + void read_newick2 ( string newickstring ); + void printtree() const; + void printtreeinfo() const; + void printNewick ( ostream &os ); + void newick_traverse ( node n, node *ancestor, edge edgeaux, string &s ); + string newick_traverse2 ( bool brlens=true, bool longnames=true ); + double compare_topology ( phylotreeIND &other ); + double compare_topology_2 ( phylotreeIND &other ); + double compare_topology_3 ( phylotreeIND &other ); + double robinson_foulds_distance ( phylotreeIND &other, int debug=0 ); + + + // iterator + postorder_Iterator postorder_begin ( node root, node father ) const + { + postorder_Iterator it = postorder_Iterator ( root, father ); + it.first_node(); + return it; + } + + postorder_Iterator postorder_begin ( node root ) const + { + postorder_Iterator it = postorder_Iterator ( root ); + it.first_node(); + return it; + } + + preorder_Iterator preorder_begin ( node root, node father ) const + { + preorder_Iterator it = preorder_Iterator ( root, father ); + it.first_node(); + return it; + } + + preorder_Iterator preorder_begin ( node root ) const + { + preorder_Iterator it = preorder_Iterator ( root ); + it.first_node(); + return it; + } + + + postorder_Iterator postorder_end ( node root, node father ) const + { + postorder_Iterator it = postorder_Iterator ( root, father ); + it.last_node(); + return it; + } + + postorder_Iterator postorder_end ( node root ) const + { + postorder_Iterator it = postorder_Iterator ( root ); + it.last_node(); + return it; + } + + preorder_Iterator preorder_end ( node root, node father ) const + { + preorder_Iterator it = preorder_Iterator ( root, father ); + it.last_node(); + return it; + } + + preorder_Iterator preorder_end ( node root ) const + { + preorder_Iterator it = preorder_Iterator ( root ); + it.last_node(); + return it; + } + + + child_Iterator child_begin ( node root, node father ) const + { + child_Iterator it = child_Iterator ( root, father ); + it.first_node(); + return it; + } + + child_Iterator child_begin ( node root ) const + { + child_Iterator it = child_Iterator ( root ); + it.first_node(); + return it; + } + + child_Iterator child_end ( node root, node father ) const + { + child_Iterator it = child_Iterator ( root, father ); + it.last_node(); + return it; + } + + child_Iterator child_end ( node root ) const + { + child_Iterator it = child_Iterator ( root ); + it.last_node(); + return it; + } + + +}; + +template const T& select_edge_at_pos ( const list &, int ); +#endif + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/probmatrixcontainer.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/probmatrixcontainer.cpp new file mode 100644 index 000000000..6afe4211c --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/probmatrixcontainer.cpp @@ -0,0 +1,23 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#include "probmatrixcontainer.h" + + + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/probmatrixcontainer.h b/contribution/branches/PhyloMOEA/PhyloMOEA/probmatrixcontainer.h new file mode 100644 index 000000000..fd4553c30 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/probmatrixcontainer.h @@ -0,0 +1,93 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#ifndef PROBMATRIXCONTAINER_H +#define PROBMATRIXCONTAINER_H +#include "ProbMatrix.h" +#include +/** +@author Waldo Cancino +*/ +// container for an arbitrary number of Probmatrix under certain +// evolution model + + +class ProbMatrixContainer{ + +private: + SubstModel *model; + // contains the probamatirx + std::map container; +public: + ProbMatrix &operator[] (double branchlength) + { + if (container.find(branchlength)!=container.end() ) + { + return *container[branchlength]; + } + else + { + ProbMatrix *new_prob_matrix = new ProbMatrix(model, branchlength); + new_prob_matrix->init(); + container[branchlength] = new_prob_matrix; + return *new_prob_matrix; + } + } + + ProbMatrixContainer( SubstModel &m ) + { + model = &m; + } + + void change_matrix(double bl_old, double bl_new) + { + ProbMatrix &p = (*this)[bl_old]; + p.set_branch_length(bl_new); + std::map::iterator it = container.find(bl_old); + container.erase(it); + it = container.find(bl_new); + // prevent duplication + if(it!=container.end()) delete (*it).second; + /*{ + std::cout << " duplication " << bl_new << std::endl; + container.erase(it); + }*/ + // replace the container + container[bl_new] = &p; + } + + void clear() + { + std::map::iterator it = container.begin(); + std::map::iterator end = container.end(); + while(it!=end) + { + delete (*it).second; + it++; + } + container.clear(); + } + + ~ProbMatrixContainer() { + //std::cout << "limpando ..." << container.size() << " matrizes alocadas\n"; + clear(); } + +}; + +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/treeIterator.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/treeIterator.cpp new file mode 100644 index 000000000..607221e37 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/treeIterator.cpp @@ -0,0 +1,179 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#include "treeIterator.h" + + + +node::inout_edges_iterator& treeIterator::first_valid_edge() +{ + assert(curnode != curfather); + current_it = curnode.inout_edges_begin(); + node tmp = current_it->opposite(curnode); + // a valid edge from current_node can't point to the father + if(tmp == curfather) current_it++; + return current_it; +} + +node::inout_edges_iterator& treeIterator::next_valid_edge() +{ + assert(curnode != curfather); + current_it++; + if( current_it == curnode.inout_edges_end() ) return current_it; + node tmp = current_it->opposite(curnode); + // a valid edge from current_node can't point to the father + if( tmp == curfather ) current_it++; + return current_it; +} + +void child_Iterator::first_node() +{ + curnode = root; + curfather = father; + current_it = first_valid_edge(); + if(current_it != root.inout_edges_end()) + curnode = current_it->opposite(curnode); +} + +child_Iterator& child_Iterator::operator++() +{ + if( curnode == root) curnode = father; + else + { + curnode = root; + current_it = next_valid_edge(); + // end + if(current_it == root.inout_edges_end()) + curnode = father; + else + curnode = current_it->opposite(curnode); + } + return (*this); +} + + +void postorder_Iterator::first_node() +{ + curnode = root; + curfather = father; + node::inout_edges_iterator it; + pilha.clear(); + pilha.push_back( stack_info (father,it) ); + // seek for the first terminal node + while( (current_it = first_valid_edge()) != curnode.inout_edges_end() ) + { + pilha.push_back( stack_info (curnode, current_it) ); + //cout << curnode << " (" << curnode.degree() << ")" << ","; + curfather = curnode; + curnode = current_it->opposite(curnode); + } +} + +postorder_Iterator& postorder_Iterator::operator++() +{ + // final node curnode == root --> curnode_next = end + if( curnode == root) curnode = father; + else + { + // current father becomes curent node + // and grapndfather becomes father + curnode = curfather; + current_it = pilha.back().second; + curfather = (++pilha.rbegin())->first; + // get next children + //current_it = next_valid_edge(); + //; + + if( next_valid_edge() != curnode.inout_edges_end() ) + { + // update current node iterator + + pilha.back().second = current_it; + curfather = curnode; + curnode = current_it->opposite(curnode); + // get the next post-order node + while( first_valid_edge() != curnode.inout_edges_end() ) + { + pilha.push_back( stack_info (curnode, current_it) ); + curfather = curnode; + curnode = current_it->opposite(curnode); + } + } + else + { + // pop the stack... update curnode and curfather + //curnode = pilha.back().first; + //current_it = pilha.back().second; + pilha.pop_back(); + //curfather = pilha.back().first; + } + } + return (*this); +} + +void preorder_Iterator::first_node() +{ + // the first node is root, clean the stack + pilha.clear(); + node::inout_edges_iterator it; + pilha.push_back( stack_info (father,it) ); + curnode = root; curfather = father; +} + +preorder_Iterator& preorder_Iterator::operator++() +{ + if( first_valid_edge() != curnode.inout_edges_end() ) + { + pilha.push_back( stack_info (curnode, current_it) ); + curfather = curnode; + curnode = current_it->opposite(curnode); + //cout << "proximo no: " << curnode << endl; + } + else + { + curnode = curfather; + current_it = pilha.back().second; + curfather = (++pilha.rbegin())->first; + // pop all nodes which all childs has been already visited + while( curnode != father && next_valid_edge() == curnode.inout_edges_end() ) + { + + //cout << " eliminando.." << curnode; + + pilha.pop_back(); + curnode = pilha.back().first; + current_it = pilha.back().second; + if(curnode !=father)curfather = (++pilha.rbegin())->first; + } + // final node reached + if( curnode == father){ + // curnode = father; + cout << "putz cheguei ao final...!" << endl; + } + else + { + curfather = curnode; + curnode = current_it->opposite(curnode); + pilha.back().second = current_it; + //cout << "pai.." << curfather << "filho..." << curnode << " !"; + } + } + return (*this); +} diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/treeIterator.h b/contribution/branches/PhyloMOEA/PhyloMOEA/treeIterator.h new file mode 100644 index 000000000..915e6c70b --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/treeIterator.h @@ -0,0 +1,104 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef _treeIterator_H_ +#define _treeIterator_H_ + +#include +#include + + +typedef pair stack_info; +// Virtual Base Class for Pre-Order and Post-Order iterators +class treeIterator +{ + protected: + node invalid_node; // points to the virtual root + node root; // root of the tree (subtree) + node father; // father of the root (tree or subtree) + node curnode; // current node + node curfather; // father of the current node + // stack saves the father nodes and the iterator position for the + // current node (child of father) + node::inout_edges_iterator current_it; + + list< stack_info > pilha; + public: + // constructor for an sub-tree pre-order traversal + treeIterator(node r, node f): root(r), father(f) {}; + // constructor for a tree pre-order traversal father = invalid_node + treeIterator(node r): root(r), father(invalid_node) {}; + virtual ~treeIterator() {}; + // compare two iterators + inline bool operator!=(treeIterator &other) + { + return curnode != other.curnode; + } + inline void last_node() { pilha.clear(); curnode = curfather = father; } + // pointer operator + inline node * operator->() { return &curnode; } + // dereference operator + inline node & operator*() { return curnode; } + // ancestor of the current_node + inline node ancestor() { return curfather; } + // branch with endpoints (curnode, curfather) + inline edge branch() { return *(pilha.back().second); } + // size of the stack + inline int stack_size() { return pilha.size(); } + // point to the first child of curnode + node::inout_edges_iterator& first_valid_edge(); + // point to the next child of curnode + node::inout_edges_iterator& next_valid_edge(); + // defined in derived classes + virtual void first_node() = 0; +}; + +class child_Iterator: public treeIterator +{ + public: + child_Iterator(node r, node f):treeIterator(r,f) {}; + child_Iterator(node r):treeIterator(r) {}; + child_Iterator& operator++(); + void first_node(); + edge branch() { return *current_it; }; + virtual ~child_Iterator() {}; +}; + +class postorder_Iterator: public treeIterator +{ + public: + postorder_Iterator(node r, node f):treeIterator(r,f) {}; + postorder_Iterator(node r):treeIterator(r) {}; + postorder_Iterator& operator++(); + void first_node(); + virtual ~postorder_Iterator() {}; +}; + +class preorder_Iterator: public treeIterator +{ + public: + preorder_Iterator(node r, node f): treeIterator(r,f) {}; + preorder_Iterator(node r):treeIterator(r) {}; + preorder_Iterator& operator++(); + void first_node(); + virtual ~preorder_Iterator() {}; +}; + +#endif diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/tree_limits.h b/contribution/branches/PhyloMOEA/PhyloMOEA/tree_limits.h new file mode 100644 index 000000000..04756e430 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/tree_limits.h @@ -0,0 +1,25 @@ +/*************************************************************************** + * Copyright (C) 2005 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#define MDBL_MIN 2.225074E-308 +#define LIM_SCALE_VAL 1.E-50 +#define UMBRAL 1.E-50 +#define BL_MIN 1.e-10 +#define BL_MAX 1.e+05 diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/utils.cpp b/contribution/branches/PhyloMOEA/PhyloMOEA/utils.cpp new file mode 100644 index 000000000..4a3c5e057 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/utils.cpp @@ -0,0 +1,111 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#include +#include +#include +#include +#include +#include + +extern gsl_rng *rn2; +extern RandomNr *rn; +//Sequences *seq; +extern long seed; +//vector arbores; +extern string datafile,usertree, expid, path; +extern double pcrossover, pmutation, kappa, alpha; +extern unsigned int ngenerations, popsize, ncats; + +void welcome_message() +{ + cout << "\nPhyloMOEA, a program for multi-criteria phylogenetic inference\n"; + cout << "using maximum parsimony and maximum likelihood\n"; + cout << "Version 0.2 (ParadisEO backend) (c) 2009, Waldo Cancino"; + cout << "\n"; +} + +void save_exp_params(ostream &of=cout) +{ + of << "PhyloMOEA Experiment Parameters" << endl; + of << "--------------------------------------------" << endl; + of << "Sequence Datafile : " << datafile << endl; + of << "Initial Trees File : " << usertree << endl; + of << "N. Generations : " << ngenerations << endl; + of << "Population Size : " << popsize << endl; + of << "Crossover Rate : " << pcrossover << endl; + of << "Mutation Rate : " << pmutation << endl; + of << "Discrete-Gamma Categs: " << ncats << endl; + of << "Gamma Shape : " << alpha << endl; + of << "HKY85+GAmma Kappa : " << kappa << endl; + of << "Experiment ID : " << expid << endl; + of << "Ramdom Seed : " << seed << endl; + of << "Working Path : " << path << endl; +} + + + +void optimize_solutions( eoPop &pop, LikelihoodCalculator &lik_calc) +{ + int n = pop.size(); + for(int i=0; icalculate_likelihood() << endl; + Newton_LikOptimizer test(lik_calc); + test.optimize(); + + pop[i].invalidate(); + //lik_calc->maximizelikelihood(); + //lik_calc->set_tree(*sol); +// lik_calc->set_tree( *sol); +// cout << endl << "likelihood final:" << lik_calc->calculate_likelihood() << endl; + } +} + + +void readtrees(const char *fname, eoPop &poptree) +{ + int ntrees; + string s; + fstream in_file; + try{ + + in_file.open(fname, ios::in); + if(!in_file.is_open()) + { + cout << fname << endl; + throw ExceptionManager(10); + } + poptree.readFrom( in_file ); + return; + } + catch(ExceptionManager e) + { + e.Report(); + } + in_file.close(); +} + + + + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/utils.h b/contribution/branches/PhyloMOEA/PhyloMOEA/utils.h new file mode 100644 index 000000000..eb9f11077 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/utils.h @@ -0,0 +1,34 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include +#include + + +void welcome_message(); +void save_exp_params(ostream &); +void optimize_solutions( eoPop &, LikelihoodCalculator &); +void readtrees(const char *, eoPop &); +#endif + diff --git a/contribution/branches/PhyloMOEA/PhyloMOEA/vectorSortIndex.h b/contribution/branches/PhyloMOEA/PhyloMOEA/vectorSortIndex.h new file mode 100644 index 000000000..a02fb4108 --- /dev/null +++ b/contribution/branches/PhyloMOEA/PhyloMOEA/vectorSortIndex.h @@ -0,0 +1,50 @@ +/*************************************************************************** + * Copyright (C) 2008 by Waldo Cancino * + * wcancino@icmc.usp.br * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#ifndef VECTORSORTEDINDEX_H_ +#define VECTORSORTEDINDEX_H_ + +#include +#include + + +template struct CmpObjIdx +{ + std::vector &realvector; + C ∁ + + CmpObjIdx(std::vector &_realvector, C &_comp): realvector(_realvector), comp(_comp) {} + bool operator()(unsigned int a, unsigned int b) + { + return comp(realvector[a],realvector[b]); + } +}; + +template< typename T, typename C > +void vectorSortIndex( std::vector &realvector, std::vector &sortedvectoridx, C &comparator ) +{ + CmpObjIdx compidx(realvector, comparator); + sortedvectoridx.resize( realvector.size()); + for(int i=0; i< realvector.size(); i++) sortedvectoridx[i] =i; + std::sort( sortedvectoridx.begin(), sortedvectoridx.end(), compidx); +} +#endif + +