source: orange/orange/ensemble.c @ 1117:9d3ae0f917e6

Revision 1117:9d3ae0f917e6, 7.9 KB checked in by janezd <janez.demsar@…>, 10 years ago (diff)
  • filestr2val and val2filestr now also pass the Example under construction as an argument
Line 
1/*
2    This file is part of Orange.
3
4    Orange is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2 of the License, or
7    (at your option) any later version.
8
9    Orange is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with Orange; if not, write to the Free Software
16    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
18    Authors: Janez Demsar, Blaz Zupan, 1996--2002
19    Contact: janez.demsar@fri.uni-lj.si
20*/
21
22/*  This file is a part of dynamic library that staticaly links with C4.5.
23    To compile it, download C4.5 (R8) from http://www.cse.unsw.edu.au/~quinlan/,
24    copy this file and buildc45.py to C45's directory R8/Src and run buildc45.py.
25
26    The script should work on Windows, Linux and Mac OSX. For other systems, you
27    will need to modify buildc45.py and Orange's c4.5.cpp; contact me for help or,
28    if you manage to do it yourself, send me the patches if you want to make your
29    port available.
30
31
32    This file redefines malloc, calloc, free and cfree to record the (de)allocations,
33    so that any unreleased memory can be freed after learning is done.
34   
35    The library exports a function that calls C4.5's learning, and a function
36    for clearing the memory afterwards.
37
38    Finally, this file contains functions (O_Iterate, O_OneTree and O_BestTree) that
39    are based on functions Iterate, OneTree and BestTree from Ross Quinlan's C4.5.
40    They had to be rewritten to avoid the unnecessary printouts (I don't remember
41    why haven't I simply redefined printf to do nothing). Besides, I have removed
42    from them storing the data that C4.5 needed only for output, not for classification.
43    In other ways, the functions should be equivalent to the originals.
44*/
45
46#include <stdio.h>
47
48#ifndef HAS_RANDOM
49long random()
50{ return rand(); }
51#endif
52
53#ifdef _MSC_VER
54  #define C45_API __declspec(dllexport)
55  #define WIN32_LEAN_AND_MEAN       // Exclude rarely-used stuff from Windows headers
56
57  #include <windows.h>
58  #undef IGNORE
59
60  BOOL APIENTRY DllMain( HANDLE hModule, DWORD  ul_reason_for_call, LPVOID lpReserved)
61  { return TRUE; }
62
63#else
64  #define C45_API
65
66#endif
67
68void **allocations = NULL, **emptySlot = NULL, **lastAllocation = NULL;
69int allocationsSize = 0;
70
71void *guarded_alloc(size_t size, size_t elsize, char callo)
72{ 
73  void **s, **t;
74  int nal;
75
76  if (!allocations) {
77    allocationsSize = 2048;
78    emptySlot = allocations = malloc(allocationsSize * sizeof(void *));
79    lastAllocation = allocations + allocationsSize;
80  }
81
82  else if (emptySlot == lastAllocation) {
83      nal = allocationsSize * 1.5;
84      allocations = realloc(allocations, nal * sizeof(void *));
85      emptySlot = allocations + allocationsSize;
86      allocationsSize = nal;
87      lastAllocation = allocations + allocationsSize;
88  }
89
90  *emptySlot = callo ? calloc(size, elsize + 128) : malloc(size + 128);
91  return *(emptySlot++);
92}
93
94void **findSlot(void *ptr)
95{ 
96  void **s;
97  for(s = allocations; s != emptySlot; s++)
98    if (*s == ptr)
99      return s;
100  return NULL;
101}
102
103
104void guarded_free(void *ptr)
105{ 
106/* It seems that there are callocs the manage to
107   avoid being allocated through my macros, so I
108   cannot rely on guarded_collect but must also
109   define  guarded_free.
110
111   (Namely there's one in prune.c that allocates
112    LocalClassDist) */
113
114  void **slot = findSlot(ptr);
115  *slot = NULL;
116  free(ptr);
117}
118
119
120C45_API void guarded_collect()
121{ 
122  void **s;
123  for(s = allocations; s != emptySlot; s++)
124    if (*s)
125      free(*s);
126
127  free(allocations);
128  allocations = emptySlot = lastAllocation = NULL;
129  allocationsSize = 0;
130}
131
132
133#define calloc(x,y) guarded_alloc((x), (y), 1)
134#define malloc(x) guarded_alloc((x), 0, 0)
135#define free(p) guarded_free((p))
136#define cfree(p) guarded_free((p))
137
138#define realloc(p,x) This is defined just to make sure that we have not missed any reallocs in C4.5 (we found none)
139
140
141void Error(int l, char *s, char *t)
142{}
143
144#include <math.h>
145#include "types.h"
146
147short MaxAtt, MaxClass, MaxDiscrVal;
148ItemNo MaxItem;
149Description *Item;
150DiscrValue *MaxAttVal;
151char *SpecialStatus, **ClassName, **AttName, ***AttValName;
152
153#define PrintConfusionMatrix(x) // make besttree.c happy
154
155#include "besttree.c"
156#include "classify.c"
157#include "contin.c"
158#include "discr.c"
159#include "info.c"
160#include "prune.c"
161#include "sort.c"
162#include "st-thresh.c"
163#include "stats.c"
164#include "subset.c"
165#include "trees.c"
166#include "build.c"
167
168short VERBOSITY = 0, TRIALS = 10;
169Boolean GAINRATIO = true, SUBSET = false;
170ItemNo MINOBJS = 2, WINDOW = 0, INCREMENT = 0;
171float       CF = 0.25;
172
173C45_API void *c45Data[] = {
174  &MaxAtt, &MaxClass, &MaxDiscrVal, &MaxItem,
175  &Item, &MaxAttVal, &SpecialStatus, &ClassName, &AttName,
176  &AttValName
177};
178
179/* These need to be defined since we got rid of files that would otherwise define them */
180char *FileName = "DF";
181Tree        *Pruned = NULL;
182Boolean     AllKnown;
183
184
185/* Need to redefine these to avoid printouts */
186
187
188void Swap();
189ClassNo Category();
190Tree FormTree();
191
192Tree O_Iterate(ItemNo Window, ItemNo IncExceptions)
193{
194  Tree Classifier, BestClassifier = Nil;
195  ItemNo i, Errors, TotalErrors, BestTotalErrors = MaxItem+1, Exceptions, Additions;
196  ClassNo Assigned;
197
198  do {
199    InitialiseWeights();
200    AllKnown = true;
201    Classifier = FormTree(0, Window-1);
202
203    Errors = Round(Classifier->Errors);
204
205    Exceptions = Window;
206    ForEach(i, Window, MaxItem) {
207      Assigned = Category(Item[i], Classifier);
208      if (Assigned != Class(Item[i]) ) {
209        Swap(Exceptions, i);
210        Exceptions++;
211        }
212      }
213
214    Exceptions -= Window;
215    TotalErrors = Errors + Exceptions;
216
217    if (!BestClassifier || (TotalErrors < BestTotalErrors)) {
218      if (BestClassifier)
219        ReleaseTree(BestClassifier);
220      BestClassifier = Classifier;
221      BestTotalErrors = TotalErrors;
222    }
223    else 
224        ReleaseTree(Classifier);
225
226    Additions = Min(Exceptions, IncExceptions);
227    Window = Min(Window + Max(Additions, Exceptions / 2), MaxItem + 1);
228  }
229  while (Exceptions);
230
231  return BestClassifier;
232}
233
234
235Tree O_OneTree(char prune)
236{
237  Tree tree;
238
239  InitialiseTreeData();
240  InitialiseWeights();
241
242  AllKnown = true;
243  tree = FormTree(0, MaxItem);
244
245  if (prune)
246    Prune(tree);
247
248  return tree;
249}
250
251
252Tree O_BestTree(int trials, int window, int increment, char prune)
253{
254  short t;
255 
256  Tree best = NULL, tree;
257
258  InitialiseTreeData();
259
260  TargetClassFreq = (ItemNo *) calloc(MaxClass+1, sizeof(ItemNo));
261
262  if (!window)
263    window = (int)(Max(2 * sqrt(MaxItem+1.0), (MaxItem+1) / 5));
264
265  if (!increment)
266    increment = Max(window / 5, 1);
267
268  FormTarget(window);
269
270  ForEach(t, 0, trials-1 ) {
271    FormInitialWindow();
272    tree = O_Iterate(window, increment);
273
274    if (prune)
275      Prune(tree);
276
277    if ( !best || tree->Errors < best->Errors )
278      best = tree;
279    else
280      ReleaseTree(tree);
281  }
282
283  return best;
284}
285
286
287C45_API Tree learn(int trials, char gainRatio, char subset, char batch, char probThresh, int minObjs, int window, int increment, float cf, char prune)
288{
289  Tree best;
290 
291  /* These get initialized to NULL when c45 is loaded.
292     Since garbage collection frees their memory, we reinitialized
293     them to force c45 to allocate the memory again. */
294
295  ClassSum = NULL;
296  PossibleValues = NULL;
297
298  VERBOSITY  = 0;
299
300  GAINRATIO  = gainRatio;
301  SUBSET     = subset;
302  MINOBJS    = minObjs;
303  CF         = cf;
304 
305  best = batch ? O_OneTree(prune) : O_BestTree(trials, window, increment, prune);
306  if (probThresh)
307    SoftenThresh(best);
308
309  return best;
310}
311
312
313Tree cpp_learn(int trials, char gainRatio, char subset, char batch, char probThresh, int minObjs, int window, int increment, float cf, char prune)
314{ return learn(trials, gainRatio, subset, batch, probThresh, minObjs, window, increment, cf, prune); }
Note: See TracBrowser for help on using the repository browser.