Dataset.cpp

00001 /*          
00002  *             Copyright (C) 2005 Maarten Keijzer
00003  *
00004  *          This program is free software; you can redistribute it and/or modify
00005  *          it under the terms of version 2 of the GNU General Public License as 
00006  *          published by the Free Software Foundation. 
00007  *
00008  *          This program is distributed in the hope that it will be useful,
00009  *          but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *          MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011  *          GNU General Public License for more details.
00012  *
00013  *          You should have received a copy of the GNU General Public License
00014  *          along with this program; if not, write to the Free Software
00015  *          Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00016  */
00017 
00018 #include "Dataset.h"
00019 #include <fstream>
00020 #include <sstream>
00021 
00022 #include <iostream>
00023 
00024 using namespace std;
00025 
00026 class DataSetImpl {
00027     public: 
00028     vector< vector<double> > inputs;
00029     vector<double> targets;
00030 
00031     void read_data(vector<string> strings) {
00032         // find the number of inputs
00033         
00034         istringstream cnt(strings[0]);
00035         unsigned n = 0;
00036         for (;;) {
00037             string s;
00038             cnt >> s;
00039             if (!cnt) break;
00040             ++n;
00041         }
00042 
00043         inputs.resize(strings.size(), vector<double>(n-1));
00044         targets.resize(strings.size());
00045 
00046         for (unsigned i = 0; i < strings.size(); ++i) {
00047             istringstream is(strings[i]);
00048             for (unsigned j = 0; j < n; ++j) {
00049                 
00050                 if (!is) {
00051                     cerr << "Too few targets in record " << i << endl;
00052                     exit(1);
00053                 }
00054                 
00055                 if (j < n-1) {
00056                     is >> inputs[i][j];
00057                 } else {
00058                     is >> targets[i];
00059                 }
00060                 
00061             }
00062         }
00063         
00064     }
00065     
00066 };
00067 
00068 Dataset::Dataset() { pimpl = new DataSetImpl; }
00069 Dataset::~Dataset() { delete pimpl; }
00070 Dataset::Dataset(const Dataset& that) { pimpl = new DataSetImpl(*that.pimpl); }
00071 Dataset& Dataset::operator=(const Dataset& that) { *pimpl = *that.pimpl; return *this; }
00072 
00073 unsigned Dataset::n_records() const { return pimpl->targets.size(); }
00074 unsigned Dataset::n_fields()  const { return pimpl->inputs[0].size(); }
00075 const std::vector<double>& Dataset::get_inputs(unsigned record) const { return pimpl->inputs[record]; }
00076 double Dataset::get_target(unsigned record) const { return pimpl->targets[record]; }
00077 
00078 double error(string errstr);
00079 
00080 void Dataset::load_data(std::string filename) {
00081     vector<string> strings; // first load it in strings
00082 
00083     ifstream is(filename.c_str());
00084 
00085     for(;;) {
00086         string s;
00087         getline(is, s);
00088         if (!is) break;
00089 
00090         if (s[0] == '#') continue; // comment, skip
00091 
00092         strings.push_back(s);
00093     }
00094    
00095     is.close();
00096 
00097     if (strings.size() == 0) {
00098         error("No data could be loaded");
00099     }
00100     
00101     pimpl->read_data(strings);
00102     
00103 }
00104 
00105 std::vector<double> Dataset::input_minima() const {
00106     vector<vector<double> >& in = pimpl->inputs;
00107     
00108     vector<double> mn(in[0].size(), 1e+50);
00109     for (unsigned i = 0; i < in.size(); ++i) {
00110         for (unsigned j = 0; j < in[i].size(); ++j) {
00111             mn[j] = std::min(mn[j], in[i][j]);
00112         }
00113     }
00114     
00115     return mn;
00116 }
00117 
00118 vector<double> Dataset::input_maxima() const {
00119     vector<vector<double> >& in = pimpl->inputs;
00120     
00121     vector<double> mx(in[0].size(), -1e+50);
00122     for (unsigned i = 0; i < in.size(); ++i) {
00123         for (unsigned j = 0; j < in[i].size(); ++j) {
00124             mx[j] = std::max(mx[j], in[i][j]);
00125         }
00126     }
00127     
00128     return mx;
00129 }
00130 
00131 
00132 
00133 

Generated on Thu Oct 19 05:06:34 2006 for EO by  doxygen 1.3.9.1