source: orange/source/orange/earth.hpp @ 8920:00df413757bc

Revision 8920:00df413757bc, 12.4 KB checked in by ales_erjavec <ales.erjavec@…>, 3 years ago (diff)

Changed EvalSubsetsUsingXtx in earth package to return an error code if there are lin. dep. terms in bx.
Fixes #932 (not completely - there are still alot of places where exit() can get called but are nested more deeply (most importantly malloc errors) and would require to much work, but this one is the only one that fails under normal circumstances).

Line 
1
2// This program is free software; you can redistribute it and/or modify
3// it under the terms of the GNU General Public License as published by
4// the Free Software Foundation; either version 2 of the License, or
5// (at your option) any later version.
6//
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10// GNU General Public License for more details.
11//
12// A copy of the GNU General Public License is available at
13// http://www.r-project.org/Licenses
14
15/*
16    This file is part of Orange.
17
18    Copyright 1996-2011 Faculty of Computer and Information Science, University of Ljubljana
19    Contact: janez.demsar@fri.uni-lj.si
20
21    Orange is free software: you can redistribute it and/or modify
22    it under the terms of the GNU General Public License as published by
23    the Free Software Foundation, either version 3 of the License, or
24    (at your option) any later version.
25
26    Orange is distributed in the hope that it will be useful,
27    but WITHOUT ANY WARRANTY; without even the implied warranty of
28    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29    GNU General Public License for more details.
30
31    You should have received a copy of the GNU General Public License
32    along with Orange.  If not, see <http://www.gnu.org/licenses/>.
33*/
34
35/*
36     Changes to earth.h from earth R package:
37     - Added defines for STANDALONE USING_R
38     - Removed definition for bool
39     - Added extern "C" definitions for ForwardPass and EvalSubsetsUsingXtX
40     - Changed EvalSubsetsUsingXtX to return an error code if lin. dep. terms in bx
41
42 */
43
44
45#ifndef __EARTH_HPP
46#define __EARTH_HPP
47
48#define STANDALONE 1
49#define USING_R 0
50
51#if USING_R
52
53void FreeR(void);
54
55void ForwardPassR(              // for use by R
56    int    FullSet[],           // out: nMaxTerms x 1, bool vec of lin indep cols of bx
57    double bx[],                // out: MARS basis matrix, nCases x nMaxTerms
58    double Dirs[],              // out: nMaxTerms x nPreds, elements are -1,0,1,2
59    double Cuts[],              // out: nMaxTerms x nPreds, cut for iTerm,iPred
60    const double x[],           // in: nCases x nPreds
61    const double y[],           // in: nCases x nResp
62    const double WeightsArg[],  // in: nCases x 1, can be R_NilValue, currently ignored
63    const int *pnCases,         // in: number of rows in x and elements in y
64    const int *pnResp,          // in: number of cols in y
65    const int *pnPreds,         // in: number of cols in x
66    const int *pnMaxDegree,     // in:
67    const int *pnMaxTerms,      // in:
68    const double *pPenalty,     // in:
69    double *pThresh,            // in: forward step threshold
70    const int *pnMinSpan,       // in:
71    const int *pnFastK,         // in: Fast MARS K
72    const double *pFastBeta,    // in: Fast MARS ageing coef
73    const double *pNewVarPenalty, // in: penalty for adding a new variable (default is 0)
74    const int  LinPreds[],        // in: nPreds x 1, 1 if predictor must enter linearly
75    const SEXP Allowed,           // in: constraints function
76    const int *pnAllowedFuncArgs, // in: number of arguments to Allowed function, 3 or 4
77    const SEXP Env,               // in: environment for Allowed function
78    const int *pnUseBetaCache,    // in: 1 to use the beta cache, for speed
79    const double *pTrace,         // in: 0 none 1 overview 2 forward 3 pruning 4 more pruning
80    const char *sPredNames[]);    // in: predictor names in trace printfs, can be R_NilValue
81
82void EvalSubsetsUsingXtxR(     // for use by R
83    double       PruneTerms[], // out: specifies which cols in bx are in best set
84    double       RssVec[],     // out: nTerms x 1
85    const int    *pnCases,     // in
86    const int    *pnResp,      // in: number of cols in y
87    const int    *pnMaxTerms,  // in
88    const double bx[],         // in: MARS basis matrix, all cols must be independent
89    const double y[],          // in: nCases * nResp
90    const double Weights[]);   // in: nCases x 1, can be R_NilValue
91
92void RegressR(          // for testing earth routine Regress from R
93    double       Betas[],       // out: nUsedCols * nResp
94    double       Residuals[],   // out: nCases * nResp
95    double       Rss[],         // out: RSS, summed over all nResp
96    double       Diags[],       // out: diags of inv(transpose(bx) * bx)
97    int          *pnRank,       // out: nbr of indep cols in x
98    int          iPivots[],     // out: nCols, can be NULL
99    const double x[],           // in: nCases x nCols
100    const double y[],           // in: nCases x nResp
101    const double Weightss[],    // in: nCases x 1, sqrt of Weights
102    const int    *pnCases,      // in: number of rows in x and in y
103    const int    *pnResp,       // in: number of cols in y
104    int          *pnCols,       // in: number of columns in x, some may not be used
105    const bool   UsedCols[]);   // in: specifies used columns in x
106
107#endif // USING_R
108
109#if STANDALONE
110//#define bool char
111
112void Earth(
113    double *pBestGcv,       // out: GCV of the best model i.e. BestSet columns of bx
114    int    *pnTerms,        // out: max term nbr in final model, after removing lin dep terms
115    bool   BestSet[],       // out: nMaxTerms x 1, indices of best set of cols of bx
116    double bx[],            // out: nCases x nMaxTerms
117    int    Dirs[],          // out: nMaxTerms x nPreds, 1,0,-1 for term iTerm, predictor iPred
118    double Cuts[],          // out: nMaxTerms x nPreds, cut for term iTerm, predictor iPred
119    double Residuals[],     // out: nCases x nResp
120    double Betas[],         // out: nMaxTerms x nResp
121    const double x[],       // in: nCases x nPreds
122    const double y[],       // in: nCases x nResp
123    const double WeightsArg[], // in: nCases, can be NULL
124    const int nCases,       // in: number of rows in x and elements in y
125    const int nResp,        // in: number of cols in y
126    const int nPreds,       // in: number of cols in x
127    const int nMaxDegree,   // in: Friedman's mi
128    const int nMaxTerms,    // in: includes the intercept term
129    const double Penalty,   // in: GCV penalty per knot
130    double Thresh,          // in: forward step threshold
131    const int nMinSpan,     // in: set to non zero to override internal calculation
132    const bool Prune,       // in: do backward pass
133    const int nFastK,       // in: Fast MARS K
134    const double FastBeta,  // in: Fast MARS ageing coef
135    const double NewVarPenalty, // in: penalty for adding a new variable
136    const int LinPreds[],       // in: 1 x nPreds, 1 if predictor must enter linearly
137    const bool UseBetaCache,    // in: 1 to use the beta cache, for speed
138    const double Trace,         // in: 0 none 1 overview 2 forward 3 pruning 4 more pruning
139    const char *sPredNames[]);  // in: predictor names in trace printfs, can be NULL
140
141void FormatEarth(
142    const bool   UsedCols[],// in: nMaxTerms x 1, indices of best set of cols of bx
143    const int    Dirs[],    // in: nMaxTerms x nPreds, 1,0,-1 for term iTerm, predictor iPred
144    const double Cuts[],    // in: nMaxTerms x nPreds, cut for term iTerm, predictor iPred
145    const double Betas[],   // in: nMaxTerms x nResp
146    const int    nPreds,
147    const int    nResp,     // in: number of cols in y
148    const int    nTerms,
149    const int    nMaxTerms,
150    const int    nDigits,   // number of significant digits to print
151    const double MinBeta);  // terms with abs(beta) less than this are not printed, 0 for all
152
153void PredictEarth(
154    double       y[],           // out: vector nResp
155    const double x[],           // in: vector nPreds x 1 of input values
156    const bool   UsedCols[],    // in: nMaxTerms x 1, indices of best set of cols of bx
157    const int    Dirs[],        // in: nMaxTerms x nPreds, 1,0,-1 for iTerm iPred
158    const double Cuts[],        // in: nMaxTerms x nPreds, cut for term iTerm predictor iPred
159    const double Betas[],       // in: nMaxTerms x nResp
160    const int    nPreds,        // in: number of cols in x
161    const int    nResp,         // in: number of cols in y
162    const int    nTerms,
163    const int    nMaxTerms);
164
165#endif // STANDALONE
166
167
168/*
169 * Earth interface (with C call semantics) for ctypes.
170 * Using this allows greater flexibility for calling from
171 * python with numpy arrays then using EarthLearner.
172 *
173 */
174
175#ifdef _MSC_VER
176    #define EARTH_EXPORT __declspec(dllexport)
177#else
178    #define EARTH_EXPORT
179#endif // _MSC_VER
180
181extern "C" {
182EARTH_EXPORT void EarthForwardPass(
183    int    *pnTerms,            // out: highest used term number in full model
184    bool   FullSet[],           // out: 1 * nMaxTerms, indices of lin indep cols of bx
185    double bx[],                // out: MARS basis matrix, nCases * nMaxTerms
186    int    Dirs[],              // out: nMaxTerms * nPreds, -1,0,1,2 for iTerm, iPred
187    double Cuts[],              // out: nMaxTerms * nPreds, cut for iTerm, iPred
188    int    nFactorsInTerm[],    // out: number of hockey stick funcs in each MARS term
189    int    nUses[],             // out: nbr of times each predictor is used in the model
190    const double x[],           // in: nCases x nPreds
191    const double y[],           // in: nCases x nResp
192    const double WeightsArg[],  // in: nCases x 1, can be NULL, currently ignored
193    const int nCases,           // in: number of rows in x and elements in y
194    const int nResp,            // in: number of cols in y
195    const int nPreds,           // in:
196    const int nMaxDegree,       // in:
197    const int nMaxTerms,        // in:
198    const double Penalty,       // in: GCV penalty per knot
199    double Thresh,              // in: forward step threshold
200    int nFastK,                 // in: Fast MARS K
201    const double FastBeta,      // in: Fast MARS ageing coef
202    const double NewVarPenalty, // in: penalty for adding a new variable (default is 0)
203    const int  LinPreds[],      // in: nPreds x 1, 1 if predictor must enter linearly
204    const bool UseBetaCache,    // in: true to use the beta cache, for speed
205    const char *sPredNames[]);   // in: predictor names, can be NULL
206
207EARTH_EXPORT int EarthEvalSubsetsUsingXtx(
208    bool   PruneTerms[],    // out: nMaxTerms x nMaxTerms
209    double RssVec[],        // out: nMaxTerms x 1, RSS of each subset
210    const int    nCases,    // in
211    const int    nResp,     // in: number of cols in y
212    const int    nMaxTerms, // in: number of MARS terms in full model
213    const double bx[],      // in: nCases x nMaxTerms, all cols must be indep
214    const double y[],       // in: nCases * nResp
215    const double WeightsArg[]); // in: nCases x 1, can be NULL
216}; // extern "C"
217
218/*
219 * ORANGE INTERFACE
220 */
221
222#undef STANDALONE
223#undef USING_R
224
225#include "classify.hpp"
226#include "learn.hpp"
227#include "orange.hpp"
228#include "domain.hpp"
229#include "examplegen.hpp"
230#include "table.hpp"
231#include "examples.hpp"
232
233class ORANGE_API TEarthLearner : public TLearner {
234public:
235    __REGISTER_CLASS
236
237    TEarthLearner();
238
239    int max_terms; //P Maximum number of terms.
240    int max_degree; //P Maximum degree of terms.
241
242    float penalty; //P Penalty.
243    float threshold; //P Forward step threshold.
244    bool prune; //P Prune terms (do backward pass).
245    float fast_beta; //P Fast beta (aging coefficient).
246    float trace; //P Execution trace (for debugging only).
247    int min_span; //P Min. span (training instances) between knots.
248    int fast_k; //P Fast K.
249    bool new_var_penalty; //P Penalty for adding a new variable.
250    bool use_beta_cache; //P Use caching for betas.
251
252    PClassifier operator()(PExampleGenerator, const int & = 0);
253
254};
255
256#include "slist.hpp"
257
258class ORANGE_API TEarthClassifier: public TClassifierFD {
259public:
260    __REGISTER_CLASS
261
262    TEarthClassifier();
263    TEarthClassifier(PDomain domain, bool * best_set, int * dirs, double * cuts, double *betas, int num_preds, int num_responses, int num_terms, int max_terms);
264    TEarthClassifier(const TEarthClassifier & other);
265
266    virtual ~TEarthClassifier();
267
268    TValue operator()(const TExample&);
269    std::string format_earth();
270
271    int num_preds; //P Number of predictor variables
272    int num_terms; //P Number of used terms
273    int max_terms; //P Maximum number of terms
274    int num_responses; //P Number of response variables
275
276    PBoolList best_set; //P Used terms.
277    PFloatListList dirs; //P max_preds x num_preds matrix
278    PFloatListList cuts; //P max_preds x num_preds matrix of cuts
279    PFloatList betas; //P Term coefficients;
280
281    void save_model(TCharBuffer& buffer);
282    void load_model(TCharBuffer& buffer);
283private:
284
285    PBoolList get_best_set();
286    PFloatListList get_dirs();
287    PFloatListList get_cuts();
288    PFloatList get_betas();
289
290    void init_members();
291    double* to_xvector(const TExample&);
292
293    bool* _best_set;
294    int * _dirs;
295    double * _cuts;
296    double * _betas;
297};
298
299WRAPPER(EarthLearner)
300WRAPPER(EarthClassifier)
301
302#endif // __EARTH_HPP
Note: See TracBrowser for help on using the repository browser.