YaDT.h

Go to the documentation of this file.
00001 /*
00002                          Salvatore Ruggieri (c), 2002-2005
00003 */
00004 
00010 #if !defined(_YADT_H__INCLUDED_)
00011 #define _YADT_H__INCLUDED_
00012 
00013 #include <string>
00014 #include <vector>
00015 #include <stdexcept>
00016 #include <iostream> /* standard iostream and fstream */
00017 #include <fstream> 
00018 
00019 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00020 using namespace std;
00021 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00022 
00035 namespace yadt
00036 {
00038         const char *get_version();
00040         const int get_build();
00042         const char *get_copyright();
00043 
00044 
00053         class datasource
00054         {
00055         public:
00064                 datasource(const string &specs);
00066                 ~datasource();
00067         private:
00068         #ifndef DOXYGEN_SHOULD_SKIP_THIS
00069                         friend class dtree;
00070                         friend class table;
00071         #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00072                 string specifications;
00073         };
00074 
00075         // forward references
00076         class conf_matrix;
00077         class dtree;
00078 
00082         class table
00083         {
00084         public:
00088                 table(const string &name);
00090                 ~table();
00092                 table( const table& );
00094                 const table& operator=( const table& );
00096                 void load_meta_data(const datasource &ds)
00097                                  throw(runtime_error);
00099                 string get_name() const;
00100 
00105                 void load_data(const datasource &ds, const string &unknown = "?") 
00106                         throw(runtime_error);
00108                 unsigned get_no_columns() const;
00110                 unsigned get_no_rows() const;
00111 
00115                 void toBinary(const string &filename) const; 
00119                 static table* fromBinary(const string &filename); 
00120 
00121                 /* utility methods */
00122 
00126                 ostream *set_log(ostream *new_log);
00131                 unsigned set_verbosity(unsigned newverbosity);
00135                 double get_elapsed() const;
00137                 void toXML_data_dictionary(ostream &os = cout) const; 
00139                 void toXML_mining_schema(ostream &os = cout) const; 
00140 
00149                 class subset
00150                 {
00151                 public:
00153                         ~subset();
00155                         subset( const subset& );
00157                         const subset& operator=( const subset& );
00158                 private:
00159         #ifndef DOXYGEN_SHOULD_SKIP_THIS
00160                         friend class dtree;
00161                         friend class table;
00162         #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00163 
00164                         subset(void *actual);
00165                         void *real;
00166                 };
00167 
00171                 subset* get_wsubset_all() const;
00175                 subset* get_wsubset_first_n(unsigned n) const;
00180                 subset* get_wsubset_random(unsigned n) const;
00186                 subset* get_wsubset_difference(subset *subtable) const;
00187 
00188         private:
00189         #ifndef DOXYGEN_SHOULD_SKIP_THIS
00190                 friend class conf_matrix;
00191                 friend class dtree;
00192         #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00193 
00194                 void *real;
00195         };
00196 
00211         class conf_matrix 
00212         {
00213         public:
00215                 ~conf_matrix();
00217                 conf_matrix( const conf_matrix& );
00219                 const conf_matrix& operator=( const conf_matrix& );
00223                 conf_matrix* clone() const;
00225                 size_t size() const;
00227                 float get_element(size_t actual, size_t predicted) const;
00232                 float mis_perc() const;
00236                 float cases() const;
00238                 double get_elapsed() const;
00243                 void toTEXT(ostream& os = cout, unsigned space = 0) const;
00245                 void toXML(ostream& os = cout) const;
00246 
00247         private:
00248         #ifndef DOXYGEN_SHOULD_SKIP_THIS
00249                 friend class dtree;
00250         #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00251 
00255                 conf_matrix(table *maintable)
00256                         throw(runtime_error);
00257                 conf_matrix(const conf_matrix *cm)
00258                         throw(runtime_error);
00260                 void *real;
00261         };
00262 
00267         class dtree 
00268         {
00269         public:
00271                 dtree(const string &name="my_decision_tree");
00273                 ~dtree();
00275                 dtree( const dtree& );
00277                 const dtree& operator=( const dtree& );
00279                 dtree* clone() const;
00280 
00282                 double get_elapsed() const;
00284                 unsigned size() const;
00286                 unsigned depth() const;
00288                 unsigned training_n_rows() const;
00289 
00291                 typedef enum {
00293                         PRUNING_NO, 
00295                         PRUNING_C45, 
00297                         PRUNING_DT
00298                 } PruningStrategy;
00299 
00301                 typedef enum {
00303                         ST_GAIN,
00305                         ST_GAIN_RATIO
00306                 } SplitType;
00307 
00311                 bool set_pruning_strategy(PruningStrategy strategy);
00315                 bool set_split_type(SplitType st);
00320                 bool set_min_obj(float min_objects);
00323                 bool set_conf_level(float conf_level);
00324 
00334                 void build(table* maintable, table::subset *subtable, bool evaluate = true)
00335                         throw (runtime_error);
00341                 conf_matrix* get_prediction();
00348                 conf_matrix* predict(const datasource &ds) const
00349                         throw(runtime_error);
00360                 double evaluate(const datasource &ds, ostream &output, char sep = '\t') const
00361                         throw(runtime_error);
00370                 pair<string, float> predict(vector<string> &attributes, float weight = 1) const;
00371                 // predict all cases in a given table
00381                 conf_matrix* predict(table* cases, table::subset *subtable) const
00382                         throw(runtime_error);
00383 
00385                 void toTEXT(ostream& os = cout) const;
00387                 void toDOT(ostream& os = cout) const;
00389                 void toXML(ostream &os = cout, const conf_matrix *cmTest = NULL) const; 
00390 
00393                 void toBinary(const string &filename);
00397                 static dtree *fromBinary(const string &filename);
00398 
00399         private:
00401                 void *real;
00402         };
00403 
00404 } // namespace yadt
00405 
00406 using namespace yadt;
00407 
00408 #endif // !defined(_YADT_H__INCLUDED_)

Generated on Wed Feb 21 12:23:50 2007 for YaDT by  doxygen 1.5.1-p1