00001
00002
00003
00004
00010 #if !defined(_YADT_H__INCLUDED_)
00011 #define _YADT_H__INCLUDED_
00012
00013 #include <string>
00014 #include <vector>
00015 #include <stdexcept>
00016 #include <iostream>
00017 #include <fstream>
00018
00019 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00020 using namespace std;
00021 #endif
00022
00035 namespace yadt
00036 {
00038 const char *get_version();
00040 const int get_build();
00042 const char *get_copyright();
00043
00044
00053 class datasource
00054 {
00055 public:
00064 datasource(const string &specs);
00066 ~datasource();
00067 private:
00068 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00069 friend class dtree;
00070 friend class table;
00071 #endif
00072 string specifications;
00073 };
00074
00075
00076 class conf_matrix;
00077 class dtree;
00078
00082 class table
00083 {
00084 public:
00088 table(const string &name);
00090 ~table();
00092 table( const table& );
00094 const table& operator=( const table& );
00096 void load_meta_data(const datasource &ds)
00097 throw(runtime_error);
00099 string get_name() const;
00100
00105 void load_data(const datasource &ds, const string &unknown = "?")
00106 throw(runtime_error);
00108 unsigned get_no_columns() const;
00110 unsigned get_no_rows() const;
00111
00115 void toBinary(const string &filename) const;
00119 static table* fromBinary(const string &filename);
00120
00121
00122
00126 ostream *set_log(ostream *new_log);
00131 unsigned set_verbosity(unsigned newverbosity);
00135 double get_elapsed() const;
00137 void toXML_data_dictionary(ostream &os = cout) const;
00139 void toXML_mining_schema(ostream &os = cout) const;
00140
00149 class subset
00150 {
00151 public:
00153 ~subset();
00155 subset( const subset& );
00157 const subset& operator=( const subset& );
00158 private:
00159 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00160 friend class dtree;
00161 friend class table;
00162 #endif
00163
00164 subset(void *actual);
00165 void *real;
00166 };
00167
00171 subset* get_wsubset_all() const;
00175 subset* get_wsubset_first_n(unsigned n) const;
00180 subset* get_wsubset_random(unsigned n) const;
00186 subset* get_wsubset_difference(subset *subtable) const;
00187
00188 private:
00189 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00190 friend class conf_matrix;
00191 friend class dtree;
00192 #endif
00193
00194 void *real;
00195 };
00196
00211 class conf_matrix
00212 {
00213 public:
00215 ~conf_matrix();
00217 conf_matrix( const conf_matrix& );
00219 const conf_matrix& operator=( const conf_matrix& );
00223 conf_matrix* clone() const;
00225 size_t size() const;
00227 float get_element(size_t actual, size_t predicted) const;
00232 float mis_perc() const;
00236 float cases() const;
00238 double get_elapsed() const;
00243 void toTEXT(ostream& os = cout, unsigned space = 0) const;
00245 void toXML(ostream& os = cout) const;
00246
00247 private:
00248 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00249 friend class dtree;
00250 #endif
00251
00255 conf_matrix(table *maintable)
00256 throw(runtime_error);
00257 conf_matrix(const conf_matrix *cm)
00258 throw(runtime_error);
00260 void *real;
00261 };
00262
00267 class dtree
00268 {
00269 public:
00271 dtree(const string &name="my_decision_tree");
00273 ~dtree();
00275 dtree( const dtree& );
00277 const dtree& operator=( const dtree& );
00279 dtree* clone() const;
00280
00282 double get_elapsed() const;
00284 unsigned size() const;
00286 unsigned depth() const;
00288 unsigned training_n_rows() const;
00289
00291 typedef enum {
00293 PRUNING_NO,
00295 PRUNING_C45,
00297 PRUNING_DT
00298 } PruningStrategy;
00299
00301 typedef enum {
00303 ST_GAIN,
00305 ST_GAIN_RATIO
00306 } SplitType;
00307
00311 bool set_pruning_strategy(PruningStrategy strategy);
00315 bool set_split_type(SplitType st);
00320 bool set_min_obj(float min_objects);
00323 bool set_conf_level(float conf_level);
00324
00334 void build(table* maintable, table::subset *subtable, bool evaluate = true)
00335 throw (runtime_error);
00341 conf_matrix* get_prediction();
00348 conf_matrix* predict(const datasource &ds) const
00349 throw(runtime_error);
00360 double evaluate(const datasource &ds, ostream &output, char sep = '\t') const
00361 throw(runtime_error);
00370 pair<string, float> predict(vector<string> &attributes, float weight = 1) const;
00371
00381 conf_matrix* predict(table* cases, table::subset *subtable) const
00382 throw(runtime_error);
00383
00385 void toTEXT(ostream& os = cout) const;
00387 void toDOT(ostream& os = cout) const;
00389 void toXML(ostream &os = cout, const conf_matrix *cmTest = NULL) const;
00390
00393 void toBinary(const string &filename);
00397 static dtree *fromBinary(const string &filename);
00398
00399 private:
00401 void *real;
00402 };
00403
00404 }
00405
00406 using namespace yadt;
00407
00408 #endif // !defined(_YADT_H__INCLUDED_)