Go to the documentation of this file.00001
00002
00003
00004
00010 #if !defined(_YADT_H__INCLUDED_)
00011 #define _YADT_H__INCLUDED_
00012
00013 #include <string>
00014 #include <vector>
00015 #include <stdexcept>
00016 #include <ostream>
00017 #include <fstream>
00018 #include <iostream>
00019
00021 namespace yadt
00022 {
00024 const char *get_version();
00026 const int get_build();
00028 const char *get_copyright();
00029
00030
00039 class datasource
00040 {
00041 public:
00050 datasource(const std::string &specs);
00052 ~datasource();
00053 private:
00054 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00055 friend class dtree;
00056 friend class table;
00057 #endif
00058 std::string specifications;
00059 };
00060
00061
00062 class conf_matrix;
00063 class dtree;
00064
00068 class table
00069 {
00070 public:
00074 table(const std::string &name);
00076 ~table();
00078 table( const table& );
00080 const table& operator=( const table& );
00082 void load_meta_data(const datasource &ds)
00083 throw(std::runtime_error);
00085 std::string get_name() const;
00086
00091 void load_data(const datasource &ds, const std::string &unknown = "?")
00092 throw(std::runtime_error);
00094 size_t get_no_columns() const;
00096 size_t get_no_rows() const;
00099 float get_weight(size_t pos) const;
00101 std::string get_class(size_t pos) const;
00102
00106 void toBinary(const std::string &filename) const;
00110 static table* fromBinary(const std::string &filename);
00111
00112
00113
00117 std::ostream *set_log(std::ostream *new_log);
00122 size_t set_verbosity(size_t newverbosity);
00126 double get_elapsed() const;
00128 void toXML_data_dictionary(std::ostream &os = std::cout) const;
00130 void toXML_mining_schema(std::ostream &os = std::cout) const;
00131
00140 class subset
00141 {
00142 public:
00144 subset();
00146 ~subset();
00148 subset( const subset& );
00150 const subset& operator=( const subset& );
00152 size_t size() const;
00154 void resize(size_t newsize);
00156 void reserve(size_t size);
00161 void push_back(size_t tablepos, float weight);
00166 std::pair<size_t, float> get(size_t subsetpos);
00172 void set(size_t subsetpos, size_t tablepos, float weight);
00173
00174 private:
00175 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00176 friend class dtree;
00177 friend class table;
00178 #endif
00179
00180 subset(void *actual);
00181 void *real;
00182 };
00183
00187 subset* get_wsubset_all() const;
00191 subset* get_wsubset_first_n(size_t n) const;
00196 subset* get_wsubset_random(size_t n) const;
00202 subset* get_wsubset_difference(subset *subtable) const;
00203
00204 private:
00205 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00206 friend class conf_matrix;
00207 friend class dtree;
00208 #endif
00209
00210 void *real;
00211 };
00212
00227 class conf_matrix
00228 {
00229 public:
00231 ~conf_matrix();
00233 conf_matrix( const conf_matrix& );
00235 const conf_matrix& operator=( const conf_matrix& );
00239 conf_matrix* clone() const;
00241 size_t size() const;
00243 float get_element(size_t actual, size_t predicted) const;
00248 float mis_perc() const;
00252 float cases() const;
00254 double get_elapsed() const;
00259 void toTEXT(std::ostream& os = std::cout, size_t space = 0) const;
00261 void toXML(std::ostream& os = std::cout) const;
00262
00263 private:
00264 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00265 friend class dtree;
00266 #endif
00267
00271 conf_matrix(table *maintable)
00272 throw(std::runtime_error);
00273 conf_matrix(const conf_matrix *cm)
00274 throw(std::runtime_error);
00276 void *real;
00277 };
00278
00283 class dtree
00284 {
00285 public:
00287 dtree(const std::string &name="my_decision_tree");
00289 ~dtree();
00291 dtree( const dtree& );
00293 const dtree& operator=( const dtree& );
00295 dtree* clone() const;
00296
00298 double get_elapsed() const;
00300 size_t size() const;
00302 size_t depth() const;
00304 size_t training_n_rows() const;
00305
00307 typedef enum {
00309 PRUNING_NO,
00311 PRUNING_C45,
00313 PRUNING_DT
00314 } PruningStrategy;
00315
00317 typedef enum {
00319 ST_GAIN,
00321 ST_GAIN_RATIO
00322 } SplitType;
00323
00325 typedef enum {
00327 SET_ABSOLUTE_CORRECTION,
00329 SET_WEIGHTED_CORRECTION
00330 } Options;
00331
00335 bool set_pruning_strategy(PruningStrategy strategy);
00339 bool set_split_type(SplitType st);
00343 bool set_option(Options opt);
00348 bool set_min_obj(float min_objects);
00351 bool set_conf_level(float conf_level);
00352
00363 void build(table* maintable, table::subset *subtable, bool evaluate = true, int ff_worker = 0)
00364 throw (std::runtime_error);
00370 conf_matrix* get_prediction();
00377 conf_matrix* predict(const datasource &ds) const
00378 throw(std::runtime_error);
00389 double evaluate(const datasource &ds, std::ostream &output, char sep = '\t') const
00390 throw(std::runtime_error);
00399 std::pair<std::string, float> predict(table* cases, size_t pos, float weight = 1) const;
00408 std::pair<std::string, float> predict(const std::vector<std::string> &attributes, float weight = 1) const;
00409
00419 conf_matrix* predict(table* cases, table::subset *subtable) const
00420 throw(std::runtime_error);
00421
00423 void toTEXT(std::ostream& os = std::cout) const;
00425 void toDOT(std::ostream& os = std::cout) const;
00427 void toXML(std::ostream &os = std::cout, const conf_matrix *cmTest = NULL) const;
00428
00431 void toBinary(const std::string &filename);
00435 static dtree *fromBinary(const std::string &filename);
00436
00437 private:
00439 void *real;
00440 };
00441
00442 }
00443
00887 #endif // !defined(_YADT_H__INCLUDED_)