Changeset 4996:6cf2e949e0b3 in orange


Ignore:
Timestamp:
07/18/08 23:13:05 (6 years ago)
Author:
janezd <janez.demsar@…>
Branch:
default
Convert:
380d1c1dc14cdb5aa808fa04222c6ccd71c61df2
Message:
  • association rules and itemsets now support 'storeExamples' (the examples covered by the rule or by its left side)
  • added getItemsets to AssociationRulesInducer (but not yet to its sparse counterpart)
Location:
source/orange
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • source/orange/assoc.cpp

    r1337 r4996  
    3737 
    3838 
    39 class TItemSetNode; 
    40  
    41 /* These objects are collected in TExampleSets, lists of examples that correspond to a particular tree node. 
    42    'example' is a unique example id (basically its index in the original dataset) 
    43    'weight' is the example's weight. */ 
    44 class TExWei { 
    45 public: 
    46   int example; 
    47   float weight; 
    48  
    49   TExWei(const int &ex, const float &wei) 
    50   : example(ex), 
    51     weight(wei) 
    52   {} 
    53 }; 
    54  
    55 /* This is a set of examples, used to list the examples that support a particular tree node */ 
    56 typedef vector<TExWei> TExampleSet; 
    57  
    58  
    59 /* A tree element that corresponds to an attribute value (ie, TItemSetNode has as many 
    60    TlItemSetValues as there are values that appear in itemsets. 
    61    For each value, we have the 'examples' that support it, the sum of their weights 
    62    ('support') and the branch that contains more specialized itemsets. */ 
    63 class TItemSetValue { 
    64 public: 
    65   int value; 
    66   TItemSetNode *branch; 
    67  
    68   float support; 
    69   TExampleSet examples; 
    70  
    71  
    72   // This constructor is called when building the 1-tree 
    73   TItemSetValue(int al) 
    74   : value(al), 
    75     branch(NULL), 
    76     support(0.0) 
    77   {} 
    78  
    79   // This constructor is called when itemsets are intersected (makePairs ets) 
    80   TItemSetValue(int al, const TExampleSet &ex, float asupp) 
    81   : value(al), 
    82     branch(NULL), 
    83     support(asupp), 
    84     examples(ex) 
    85   {} 
    86  
    87   ~TItemSetValue(); 
    88  
    89   void sumSupport() 
    90   {  
    91     support = 0;  
    92     ITERATE(TExampleSet, wi, examples) 
    93       support += (*wi).weight; 
    94   } 
    95 }; 
    96  
    97  
    98 /* TItemSetNode splits itemsets according to the value of attribute 'attrIndex'; 
    99    each element of 'values' corresponds to an attribute value (not necessarily to all, 
    100    but only to those values that appear in itemsets). 
    101    Itemsets for which the value is not defined are stored in a subtree in 'nextAttribute'. 
    102    This can be seen in TItemSetTree::findSupport that finds a node that corresponds to the 
    103    given itemset */ 
    104 class TItemSetNode { 
    105 public: 
    106   int attrIndex; 
    107   TItemSetNode *nextAttribute; 
    108   vector<TItemSetValue> values; 
    109  
    110  
    111   // This constructor is called by 1-tree builder which initializes all values (and later reduces them) 
    112   TItemSetNode(PVariable var, int anattri) 
    113   : attrIndex(anattri),  
    114     nextAttribute(NULL) 
    115   {  
    116     for(int vi = 0, ve = var->noOfValues(); vi<ve; vi++) 
    117       values.push_back(TItemSetValue(vi)); 
    118   } 
    119  
    120  
    121   // This constructor is called when extending the tree 
    122   TItemSetNode(int anattri) 
    123   : attrIndex(anattri),  
    124     nextAttribute(NULL)  
    125   {} 
    126  
    127  
    128   ~TItemSetNode() 
    129   { mldelete nextAttribute; } 
    130 }; 
     39TItemSetValue::TItemSetValue(int al) 
     40: value(al), 
     41  branch(NULL), 
     42  support(0.0) 
     43{} 
     44 
     45 
     46TItemSetValue::TItemSetValue(int al, const TExampleSet &ex, float asupp) 
     47: value(al), 
     48  branch(NULL), 
     49  support(asupp), 
     50  examples(ex) 
     51{} 
     52 
     53TItemSetValue::~TItemSetValue() 
     54{ mldelete branch; 
     55} 
     56 
     57 
     58void TItemSetValue::sumSupport() 
     59{  
     60  support = 0;  
     61  ITERATE(TExampleSet, wi, examples) 
     62    support += (*wi).weight; 
     63} 
     64 
     65 
     66TItemSetNode::TItemSetNode(PVariable var, int anattri) 
     67: attrIndex(anattri),  
     68  nextAttribute(NULL) 
     69{  
     70  for(int vi = 0, ve = var->noOfValues(); vi<ve; vi++) 
     71    values.push_back(TItemSetValue(vi)); 
     72} 
     73 
     74 
     75TItemSetNode::TItemSetNode(int anattri) 
     76: attrIndex(anattri),  
     77  nextAttribute(NULL)  
     78{} 
     79 
     80 
     81TItemSetNode::~TItemSetNode() 
     82{ mldelete nextAttribute; } 
    13183 
    13284 
     
    157109 
    158110 
    159 TItemSetValue::~TItemSetValue() 
    160 { mldelete branch; 
    161 } 
    162  
    163  
     111 
     112void setMatchingExamples(PAssociationRule rule, const TExampleSet &leftSet, const TExampleSet &bothSets) 
     113{ 
     114   TIntList *matchLeft = new TIntList(); 
     115   rule->matchLeft = matchLeft; 
     116   const_ITERATE(TExampleSet, nli, leftSet) 
     117     matchLeft->push_back((*nli).example); 
     118 
     119   TIntList *matchBoth = new TIntList(); 
     120   rule->matchBoth = matchBoth; 
     121   const_ITERATE(TExampleSet, nri, bothSets) 
     122     matchBoth->push_back((*nri).example); 
     123} 
    164124 
    165125TAssociationRule::TAssociationRule(PExample al, PExample ar) 
     
    259219 
    260220/* Searches the tree to find a node that corresponds to itemset 'ex' and returns its support */ 
    261 float findSupport(const TExample &ex, TItemSetNode *node) 
     221float findSupport(const TExample &ex, TItemSetNode *node, TItemSetValue **actualNode = NULL) 
    262222{ 
    263223  vector<TItemSetValue>::iterator li = node->values.begin(); // This is initialized just to avoid warnings. 
     
    292252    while((++ei!=ex.end()) && (*ei).isSpecial()); 
    293253 
    294   return (ei==ex.end()) ? (*li).support : 0; 
    295 } 
    296  
    297  
    298 float findSupport(const TExample &ex, TRuleTreeNode *node) 
     254  if (ei == ex.end()) { 
     255    if (actualNode) 
     256      *actualNode = &*li; 
     257    return (*li).support; 
     258  } 
     259   
     260    if (actualNode) 
     261      *actualNode = NULL; 
     262  return 0; 
     263} 
     264 
     265 
     266float findSupport(const TExample &ex, TRuleTreeNode *node, TRuleTreeNode **actualNode = NULL) 
    299267{ 
    300268  TExample::const_iterator ei(ex.begin()), eei(ex.end()); 
     
    308276 
    309277    while((++ei!=eei) && !(*ei).isSpecial()); 
    310     if (ei==eei) 
     278    if (ei==eei) { 
     279      if (actualNode) 
     280        *actualNode = node; 
    311281      return node->support; 
     282    } 
    312283  } 
    313284 
     
    321292  confidence(aconf), 
    322293  support(asupp), 
    323   classificationRules(false) 
     294  classificationRules(false), 
     295  storeExamples(false) 
    324296{} 
    325297 
     
    327299PAssociationRules TAssociationRulesInducer::operator()(PExampleGenerator examples, const int &weightID) 
    328300{ 
    329  
    330   if (classificationRules && !examples->domain->classVar) 
    331     raiseError("cannot induce classification rules on classless data"); 
    332301 
    333302  PITERATE(TVarList, vi, examples->domain->variables) 
     
    337306  TItemSetNode *tree = NULL; 
    338307  PAssociationRules rules; 
     308  if (classificationRules && !examples->domain->classVar) 
     309    raiseError("cannot induce classification rules on classless data"); 
     310 
    339311  try { 
    340312    int depth, nOfExamples; 
    341313    TDiscDistribution classDist; 
    342314    buildTrees(examples, weightID, tree, depth, nOfExamples, classDist); 
    343  
     315     
    344316    rules = classificationRules ? generateClassificationRules(examples->domain, tree, nOfExamples, classDist) 
    345317                                : generateRules(examples->domain, tree, depth, nOfExamples); 
     318                                 
     319    if (storeExamples) { 
     320      PExampleTable xmpls = mlnew TExampleTable(examples); 
     321      PITERATE(TAssociationRules, ri, rules) 
     322        (*ri)->examples = xmpls; 
     323    } 
    346324  } 
    347325  catch (...) { 
     
    354332} 
    355333 
    356  
     334     
     335     
    357336void TAssociationRulesInducer::buildTrees(PExampleGenerator gen, const int &weightID, TItemSetNode *&tree, int &depth, int &nOfExamples, TDiscDistribution &classDist) 
    358337{  
     
    519498{ TExample left(dom); 
    520499  PAssociationRules rules = mlnew TAssociationRules(); 
    521   generateClassificationRules1(dom, tree, tree, left, 0, nOfExamples, rules, nOfExamples, classDist); 
     500  generateClassificationRules1(dom, tree, tree, left, 0, nOfExamples, rules, nOfExamples, classDist, NULL); 
    522501  return rules; 
    523502} 
    524503 
    525504 
    526 void TAssociationRulesInducer::generateClassificationRules1(PDomain dom, TItemSetNode *root, TItemSetNode *node, TExample &left, const int nLeft, const float nAppliesLeft, PAssociationRules rules, const int nOfExamples, const TDiscDistribution &classDist) 
     505void TAssociationRulesInducer::generateClassificationRules1(PDomain dom, TItemSetNode *root, TItemSetNode *node, TExample &left, const int nLeft, const float nAppliesLeft, PAssociationRules rules, const int nOfExamples, const TDiscDistribution &classDist, TExampleSet *leftSet) 
    527506{  
    528507  for(; node; node = node->nextAttribute) 
     
    532511        if ((*li).branch) { 
    533512          left[node->attrIndex] = TValue((*li).value); 
    534           generateClassificationRules1(dom, root, (*li).branch, left, nLeft+1, (*li).support, rules, nOfExamples, classDist); 
     513          generateClassificationRules1(dom, root, (*li).branch, left, nLeft+1, (*li).support, rules, nOfExamples, classDist, &(*li).examples); 
    535514        } 
    536515      left[node->attrIndex].setDC(); 
     
    546525            right->setClass(TValue((*li).value)); 
    547526            PAssociationRule rule = mlnew TAssociationRule(mlnew TExample(left), right, nAppliesLeft, classDist[(*li).value], nAppliesBoth, nOfExamples, nLeft, 1); 
     527            if (storeExamples) 
     528              if (!leftSet) { 
     529                set<int> allExamplesSet; 
     530                ITERATE(vector<TItemSetValue>, ri, root->values) 
     531                  ITERATE(TExampleSet, ei, (*ri).examples) 
     532                    allExamplesSet.insert((*ei).example); 
     533                TExampleSet allExamples; 
     534                ITERATE(set<int>, ali, allExamplesSet) 
     535                  allExamples.push_back(TExWei(*ali, 1)); 
     536                setMatchingExamples(rule, allExamples, (*li).examples); 
     537              } 
     538              else { 
     539                setMatchingExamples(rule, *leftSet, (*li).examples); 
     540              } 
    548541            rules->push_back(rule); 
    549542          } 
     
    585578        /* Rule with one item on the right are treated separately. 
    586579           Incidentally, these are also the only that are suitable for classification rules */ 
    587         find1Rules(ex, root, (*li).support, nBoth, rules, nOfExamples); 
     580        find1Rules(ex, root, (*li).support, nBoth, rules, nOfExamples, (*li).examples); 
    588581 
    589582        if (nBoth>2) { 
     
    593586            TExample example(ex.domain); 
    594587            for(int m = 2; 
    595                 (m <= nBoth-1) && generateNext1(ruleTree, ruleTree, root, example, m, ex, (*li).support, rules, nOfExamples) > 2; 
     588                (m <= nBoth-1) && generateNext1(ruleTree, ruleTree, root, example, m, ex, (*li).support, rules, nOfExamples, (*li).examples) > 2; 
    596589                m++); 
    597590          } 
     
    611604/* For each value in the itemset, check whether the rule with this value on the right 
    612605   and all others on the left has enough confidence, and add it if so. */ 
    613 void TAssociationRulesInducer::find1Rules(TExample &example, TItemSetNode *tree, const float &nAppliesBoth, const int nBoth, PAssociationRules rules, const int nOfExamples) 
     606void TAssociationRulesInducer::find1Rules(TExample &example, TItemSetNode *tree, const float &nAppliesBoth, const int nBoth, PAssociationRules rules, const int nOfExamples, const TExampleSet &bothSets) 
    614607{ 
    615608  TExample left(example), right(example.domain); 
     
    618611      (*lefti).setDC(); 
    619612      *righti = *ei; 
    620       const float nAppliesLeft = findSupport(left, tree); 
     613      TItemSetValue *nodeLeft; 
     614      const float nAppliesLeft = findSupport(left, tree, &nodeLeft); 
    621615      const float tconf = nAppliesBoth/nAppliesLeft; 
    622616      if (tconf >= confidence) { 
    623617        const float nAppliesRight = findSupport(right, tree); 
    624618        PAssociationRule rule = mlnew TAssociationRule(mlnew TExample(left), mlnew TExample(right), nAppliesLeft, nAppliesRight, nAppliesBoth, nOfExamples, nBoth-1, 1); 
     619        if (storeExamples) 
     620          setMatchingExamples(rule, nodeLeft->examples, bothSets); 
    625621        rules->push_back(rule); 
    626622      } 
     
    661657int TAssociationRulesInducer::generateNext1(TRuleTreeNode *ruleTree, TRuleTreeNode *node, TItemSetNode *itemsets, 
    662658                                            TExample &right, int k, TExample &wholeEx, const float &nAppliesBoth, 
    663                                             PAssociationRules rules, const int nOfExamples) 
     659                                            PAssociationRules rules, const int nOfExamples, const TExampleSet &bothSets) 
    664660{ 
    665661  if (k==2) 
    666     return generatePairs(ruleTree, node, itemsets, right, wholeEx, nAppliesBoth, rules, nOfExamples); 
     662    return generatePairs(ruleTree, node, itemsets, right, wholeEx, nAppliesBoth, rules, nOfExamples, bothSets); 
    667663 
    668664  int itemSets = 0; 
     
    670666    if (node->hasValue) { 
    671667      right[node->attrIndex] = TValue(node->value); 
    672       itemSets += generateNext1(ruleTree, node->hasValue, itemsets, right, k-1, wholeEx, nAppliesBoth, rules, nOfExamples); 
     668      itemSets += generateNext1(ruleTree, node->hasValue, itemsets, right, k-1, wholeEx, nAppliesBoth, rules, nOfExamples, bothSets); 
    673669      right[node->attrIndex].setDC(); 
    674670    } 
     
    680676int TAssociationRulesInducer::generatePairs(TRuleTreeNode *ruleTree, TRuleTreeNode *node, TItemSetNode *itemsets, 
    681677                                            TExample &right, TExample &wholeEx, const float &nAppliesBoth, 
    682                                             PAssociationRules rules, const int nOfExamples) 
     678                                            PAssociationRules rules, const int nOfExamples, const TExampleSet &bothSets) 
    683679{ 
    684680   int itemSets = 0; 
     
    696692           *lefti = *wi; 
    697693 
    698        float nAppliesLeft = findSupport(left.getReference(), itemsets); 
     694       TItemSetValue *nodeLeft; 
     695       float nAppliesLeft = findSupport(left.getReference(), itemsets, &nodeLeft); 
    699696       float aconf = nAppliesBoth/nAppliesLeft; 
    700697       if (aconf>=confidence) { 
     
    711708 
    712709         PAssociationRule rule = mlnew TAssociationRule(left, mlnew TExample(right), nAppliesLeft, nAppliesRight, nAppliesBoth, nOfExamples); 
     710         if (storeExamples) 
     711           setMatchingExamples(rule, nodeLeft->examples, bothSets); 
    713712         rules->push_back(rule); 
    714713       } 
  • source/orange/assoc.hpp

    r4339 r4996  
    3636 
    3737WRAPPER(Example) 
     38VWRAPPER(IntList) 
     39WRAPPER(ExampleTable) 
    3840 
    3941class ORANGE_API TAssociationRule : public TOrange { 
     
    5557  int nLeft; //PR number of items on the rule's left side 
    5658  int nRight; //PR number of items on the rule's right side 
     59   
     60  PExampleTable examples; //PR examples which the rule was built from 
     61  PIntList matchLeft; //PR indices of examples that match the left side of the rule 
     62  PIntList matchBoth; //PR indices to examples that match both sides of the rule 
    5763 
    5864  TAssociationRule(PExample, PExample); 
     
    9096 
    9197class TItemSetNode; 
     98 
     99/* These objects are collected in TExampleSets, lists of examples that correspond to a particular tree node. 
     100   'example' is a unique example id (basically its index in the original dataset) 
     101   'weight' is the example's weight. */ 
     102class TExWei { 
     103public: 
     104  int example; 
     105  float weight; 
     106 
     107  TExWei(const int &ex, const float &wei) 
     108  : example(ex), 
     109    weight(wei) 
     110  {} 
     111}; 
     112 
     113/* This is a set of examples, used to list the examples that support a particular tree node */ 
     114typedef vector<TExWei> TExampleSet; 
     115 
     116 
     117/* A tree element that corresponds to an attribute value (ie, TItemSetNode has as many 
     118   TlItemSetValues as there are values that appear in itemsets. 
     119   For each value, we have the 'examples' that support it, the sum of their weights 
     120   ('support') and the branch that contains more specialized itemsets. */ 
     121class TItemSetValue { 
     122public: 
     123  int value; 
     124  TItemSetNode *branch; 
     125 
     126  float support; 
     127  TExampleSet examples; 
     128 
     129  // This constructor is called when building the 1-tree 
     130  TItemSetValue(int al); 
     131   
     132  // This constructor is called when itemsets are intersected (makePairs ets) 
     133  TItemSetValue(int al, const TExampleSet &ex, float asupp); 
     134 
     135  ~TItemSetValue(); 
     136  void sumSupport(); 
     137}; 
     138 
     139 
     140/* TItemSetNode splits itemsets according to the value of attribute 'attrIndex'; 
     141   each element of 'values' corresponds to an attribute value (not necessarily to all, 
     142   but only to those values that appear in itemsets). 
     143   Itemsets for which the value is not defined are stored in a subtree in 'nextAttribute'. 
     144   This can be seen in TItemSetTree::findSupport that finds a node that corresponds to the 
     145   given itemset */ 
     146class TItemSetNode { 
     147public: 
     148  int attrIndex; 
     149  TItemSetNode *nextAttribute; 
     150  vector<TItemSetValue> values; 
     151 
     152  // This constructor is called by 1-tree builder which initializes all values (and later reduces them) 
     153  TItemSetNode(PVariable var, int anattri); 
     154 
     155  // This constructor is called when extending the tree 
     156  TItemSetNode(int anattri); 
     157 
     158  ~TItemSetNode(); 
     159}; 
     160 
    92161class TRuleTreeNode; 
     162 
    93163 
    94164class ORANGE_API TAssociationRulesInducer : public TOrange { 
     
    101171  float support; //P required support 
    102172  bool classificationRules; //P if true, rules will have the class and only the class attribute on the right-hand side 
     173  bool storeExamples; //P if true, each rule is going to have tables with references to examples which match its left side or both sides 
    103174 
    104175public: 
     
    113184 
    114185  PAssociationRules generateClassificationRules(PDomain, TItemSetNode *tree, const int nOfExamples, const TDiscDistribution &); 
    115   void generateClassificationRules1(PDomain, TItemSetNode *root, TItemSetNode *node, TExample &left, const int nLeft, const float nAppliesLeft, PAssociationRules, const int nOfExamples, const TDiscDistribution &); 
     186  void generateClassificationRules1(PDomain, TItemSetNode *root, TItemSetNode *node, TExample &left, const int nLeft, const float nAppliesLeft, PAssociationRules, const int nOfExamples, const TDiscDistribution &, TExampleSet *leftSet); 
    116187 
    117188  PAssociationRules generateRules(PDomain, TItemSetNode *, const int depth, const int nOfExamples); 
    118189  void generateRules1(TExample &, TItemSetNode *root, TItemSetNode *node, int k, int oldk, PAssociationRules, const int nOfExamples); 
    119   void find1Rules(TExample &, TItemSetNode *, const float &support, int oldk, PAssociationRules, const int nOfExamples); 
     190  void find1Rules(TExample &, TItemSetNode *, const float &support, int oldk, PAssociationRules, const int nOfExamples, const TExampleSet &bothSets); 
    120191  TRuleTreeNode *buildTree1FromExample(TExample &, TItemSetNode *); 
    121   int generateNext1(TRuleTreeNode *ruleTree, TRuleTreeNode *node, TItemSetNode *itemsetsTree, TExample &right, int k, TExample &whole, const float &support, PAssociationRules, const int nOfExamples); 
    122   int generatePairs(TRuleTreeNode *ruleTree, TRuleTreeNode *node, TItemSetNode *itemsetsTree, TExample &right, TExample &whole, const float &support, PAssociationRules, const int nOfExamples); 
     192  int generateNext1(TRuleTreeNode *ruleTree, TRuleTreeNode *node, TItemSetNode *itemsetsTree, TExample &right, int k, TExample &whole, const float &support, PAssociationRules, const int nOfExamples, const TExampleSet &bothSets); 
     193  int generatePairs(TRuleTreeNode *ruleTree, TRuleTreeNode *node, TItemSetNode *itemsetsTree, TExample &right, TExample &whole, const float &support, PAssociationRules, const int nOfExamples, const TExampleSet &bothSets); 
    123194}; 
    124195 
     
    158229    TSparseItemsetNode *parent;                 //pointer to parent node 
    159230    TSparseISubNodes subNode;               //children items 
     231    vector<int> exampleIds; 
    160232     
    161233    TSparseItemsetNode(long avalue = -1);           //constructor 
     
    178250    void considerItemset(long itemset[], int iLength, float weight, int aimLength); 
    179251    void considerExamples(TSparseExamples *examples, int aimLength); 
    180     void delLeafSmall(float minSupport); 
    181     PAssociationRules genRules(int maxDepth, float minConf, float nOfExamples); 
     252  void assignExamples(TSparseItemsetNode *node, long *itemset, long *itemsetend, const int exampleId); 
     253  void assignExamples(TSparseExamples &examples); 
     254  void delLeafSmall(float minSupport); 
     255    PAssociationRules genRules(int maxDepth, float minConf, float nOfExamples, bool storeExamples); 
    182256    long getItemsetRules(long itemset[], int iLength, float minConf,  
    183                          float nAppliesBoth, float nOfExamples, PAssociationRules rules); 
     257                         float nAppliesBoth, float nOfExamples, PAssociationRules rules, bool storeExamples, TSparseItemsetNode *bothNode); 
    184258    PDomain domain; 
    185259 
     
    197271  float confidence; //P required confidence 
    198272  float support; //P required support 
     273   
     274  bool storeExamples; //P stores examples corresponding to rules 
    199275 
    200276  TAssociationRulesSparseInducer(float asupp=0.1, float aconf=0, int awei=0); 
     
    216292  int maxItemSets; //P maximal number of itemsets (increase if you want) 
    217293  float support; //P required support 
     294 
     295  bool storeExamples; //P stores examples corresponding to itemsets 
    218296 
    219297  TItemsetsSparseInducer(float asupp=0.1, int awei=0); 
  • source/orange/assoc_sparse.cpp

    r4339 r4996  
    2323#include "assoc.hpp" 
    2424#include "examplegen.hpp" 
     25#include "table.hpp" 
    2526 
    2627 
     
    8788  } 
    8889  else { 
    89     for(int i = 0, e = examples->domain->variables; i!=e; i++) 
     90    for(int i = 0, e = examples->domain->variables->size(); i!=e; i++) 
    9091      intDomain.push_back(i); 
    9192    } 
     
    274275} 
    275276 
     277void TSparseItemsetTree::assignExamples(TSparseItemsetNode *node, long *itemset, long *itemsetend, const int exampleId) 
     278{ 
     279  node->exampleIds.push_back(exampleId); 
     280  if (!node->subNode.empty()) 
     281    for(; itemset != itemsetend; itemset++) 
     282      if (node->hasNode(*itemset)) 
     283        assignExamples((*node)[*itemset], itemset+1, itemsetend, exampleId); 
     284} 
     285     
     286void TSparseItemsetTree::assignExamples(TSparseExamples &examples) 
     287{ 
     288  int exampleId = 0; 
     289  ITERATE(vector<TSparseExample*>,ei,examples.transaction) 
     290    assignExamples(root, (*ei)->itemset, (*ei)->itemset+(*ei)->length, exampleId++); 
     291 
     292 
     293 
    276294 
    277295// deletes all leaves that have weiSupp smaler than given minSupp; 
     
    300318 
    301319// generates all posible association rules from tree using given confidence 
    302 PAssociationRules TSparseItemsetTree::genRules(int maxDepth, float minConf, float nOfExamples) { 
     320PAssociationRules TSparseItemsetTree::genRules(int maxDepth, float minConf, float nOfExamples, bool storeExamples) { 
    303321    typedef pair<TSparseItemsetNode *,int> NodeDepth; //<node,depth> 
    304322 
     
    323341     
    324342        if (currDepth > 1) 
    325             count += getItemsetRules(itemset, currDepth, minConf, currNode->weiSupp, nOfExamples, rules);   //creates rules from itemsets and adds them to rules 
     343            count += getItemsetRules(itemset, currDepth, minConf, currNode->weiSupp, nOfExamples, rules, storeExamples, currNode);  //creates rules from itemsets and adds them to rules 
    326344 
    327345        RITERATE(TSparseISubNodes,sni,currNode->subNode)        //adds subnodes to list 
     
    335353long TSparseItemsetTree::getItemsetRules(long itemset[], int iLength, float minConf,  
    336354                                   float nAppliesBoth, float nOfExamples,  
    337                                    PAssociationRules rules) { 
     355                                   PAssociationRules rules, 
     356                                   bool storeExamples, TSparseItemsetNode *bothNode) { 
    338357     
    339358    float nAppliesLeft, nAppliesRight; 
     
    414433                  //add rules 
    415434                  rule = mlnew TAssociationRule(exLeftS, exRightS, nAppliesLeft, nAppliesRight, nAppliesBoth, nOfExamples, currDepth, iLength-currDepth); 
     435                  if (storeExamples) { 
     436                    rule->matchLeft = new TIntList(currNode->exampleIds); 
     437                    rule->matchBoth = new TIntList(bothNode->exampleIds); 
     438                  } 
     439 
    416440                  rules->push_back(rule); 
    417441                  count ++; 
     
    466490  confidence(aconf), 
    467491  support(asupp), 
    468   nOfExamples(0.0) 
     492  nOfExamples(0.0), 
     493  storeExamples(false) 
    469494{} 
    470495 
     
    498523    } 
    499524     
    500     return tree->genRules(i, confidence, sparseExm.fullWeight); 
     525    if (storeExamples) 
     526      tree->assignExamples(sparseExm); 
     527       
     528    PAssociationRules rules = tree->genRules(i, confidence, sparseExm.fullWeight, storeExamples); 
     529     
     530  if (storeExamples) { 
     531    PExampleTable xmpls = mlnew TExampleTable(examples); 
     532    PITERATE(TAssociationRules, ri, rules) 
     533      (*ri)->examples = xmpls; 
     534  } 
     535  return rules; 
    501536} 
    502537 
     
    510545: maxItemSets(15000), 
    511546  support(asupp), 
    512   nOfExamples(0.0) 
     547  nOfExamples(0.0), 
     548  storeExamples(false) 
    513549{} 
    514550 
     
    542578    } 
    543579 
     580  if (storeExamples) 
     581      tree->assignExamples(sparseExm); 
     582     
    544583    return tree; 
    545584} 
  • source/orange/lib_learner.cpp

    r4929 r4996  
    9595} 
    9696 
     97void gatherRules(TItemSetNode *node, vector<pair<int, int> > &itemsSoFar, PyObject *listOfItems, bool storeExamples) 
     98{ 
     99  for(; node; node = node->nextAttribute) { 
     100    itemsSoFar.push_back(make_pair(node->attrIndex, (int)0)); 
     101    ITERATE(vector<TItemSetValue>, isi, node->values) { 
     102      itemsSoFar.back().second = (*isi).value; 
     103 
     104      PyObject *itemset = PyTuple_New(itemsSoFar.size()); 
     105      int el = 0; 
     106      vector<pair<int, int> >::const_iterator sfi(itemsSoFar.begin()), sfe(itemsSoFar.end()); 
     107      for(; sfi != sfe; sfi++, el++) { 
     108        PyObject *vp = PyTuple_New(2); 
     109        PyTuple_SET_ITEM(vp, 0, PyInt_FromLong((*sfi).first)); 
     110        PyTuple_SET_ITEM(vp, 1, PyInt_FromLong((*sfi).second)); 
     111        PyTuple_SET_ITEM(itemset, el, vp); 
     112      } 
     113       
     114      PyObject *examples; 
     115      if (storeExamples) { 
     116        examples = PyList_New((*isi).examples.size()); 
     117        int ele = 0; 
     118        ITERATE(TExampleSet, ei, (*isi).examples) 
     119          PyList_SetItem(examples, ele++, PyInt_FromLong((*ei).example)); 
     120      } 
     121      else { 
     122        examples = Py_None; 
     123        Py_INCREF(Py_None); 
     124      } 
     125 
     126      PyObject *rr = PyTuple_New(2); 
     127      PyTuple_SET_ITEM(rr, 0, itemset); 
     128      PyTuple_SET_ITEM(rr, 1, examples); 
     129       
     130      PyList_Append(listOfItems, rr); 
     131      Py_DECREF(rr); 
     132       
     133      gatherRules((*isi).branch, itemsSoFar, listOfItems, storeExamples); 
     134    } 
     135    itemsSoFar.pop_back(); 
     136  } 
     137} 
     138 
     139PyObject *AssociationRulesInducer_getItemsets(PyObject *self, PyObject *args, PyObject *keywords) PYARGS(METH_VARARGS, "(examples[, weightID]) -> list-of-itemsets") 
     140{  
     141  PyTRY 
     142    int weightID; 
     143    PExampleGenerator egen = exampleGenFromArgs(args, weightID); 
     144    if (!egen) 
     145      return PYNULL; 
     146 
     147    if (egen->domain->hasContinuousAttributes(true)) 
     148      PYERROR(PyExc_TypeError, "cannot induce rules with non-discrete attributes", NULL); 
     149 
     150    TItemSetNode *tree = NULL; 
     151    int depth, nOfExamples; 
     152    TDiscDistribution classDist; 
     153    CAST_TO(TAssociationRulesInducer, inducer); 
     154    inducer->buildTrees(egen, weightID, tree, depth, nOfExamples, classDist); 
     155     
     156    PyObject *listOfItemsets = PyList_New(0); 
     157    vector<pair<int, int> > itemsSoFar; 
     158    gatherRules(tree, itemsSoFar, listOfItemsets, inducer->storeExamples); 
     159    delete tree; 
     160    return listOfItemsets; 
     161  PyCATCH 
     162} 
    97163 
    98164PyObject *AssociationRulesSparseInducer_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(examples[, weightID]) -> AssociationRules") 
     
    113179public: 
    114180    const TSparseItemsetNode *node; 
     181    PSparseItemsetTree tree; 
    115182     
    116     TItemsetNodeProxy(const TSparseItemsetNode *n) 
    117     : node(n) 
     183    TItemsetNodeProxy(const TSparseItemsetNode *n, PSparseItemsetTree t) 
     184    : node(n), 
     185    tree(t) 
    118186    {} 
    119187}; 
     188 
    120189 
    121190PyObject *ItemsetsSparseInducer_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(examples[, weightID]) -> AssociationRules") 
     
    130199 
    131200    PSparseItemsetTree tree = SELF_AS(TItemsetsSparseInducer)(egen, weightID); 
    132     return WrapOrange(POrange(new TItemsetNodeProxy(tree->root))); 
    133   PyCATCH 
     201    return WrapOrange(POrange(new TItemsetNodeProxy(tree->root, tree))); 
     202  PyCATCH 
     203} 
     204 
     205PYXTRACT_IGNORE int Orange_traverse(TPyOrange *, visitproc, void *); 
     206PYXTRACT_IGNORE int Orange_clear(TPyOrange *); 
     207 
     208int ItemsetNodeProxy_traverse(PyObject *self, visitproc visit, void *arg) 
     209{  
     210    int err = Orange_traverse((TPyOrange *)self, visit, arg); 
     211    if (err) 
     212        return err; 
     213 
     214    CAST_TO_err(TItemsetNodeProxy, node, -1); 
     215    PVISIT(node->tree); 
     216  PVISIT(node->tree->domain); 
     217    return 0; 
     218} 
     219 
     220int ItemsetNodeProxy_clear(PyObject *self) 
     221{  
     222  SELF_AS(TItemsetNodeProxy).tree = PSparseItemsetTree(); 
     223    return Orange_clear((TPyOrange *)self); 
    134224} 
    135225 
     
    137227{ 
    138228  PyTRY 
    139     const TSparseItemsetNode *me = SELF_AS(TItemsetNodeProxy).node; 
     229    CAST_TO(TItemsetNodeProxy, nodeProxy); 
     230    const TSparseItemsetNode *me = nodeProxy->node; 
    140231    PyObject *children = PyDict_New(); 
    141232    const_ITERATE(TSparseISubNodes, ci, me->subNode) 
    142       PyDict_SetItem(children, PyInt_FromLong(ci->first), WrapOrange(POrange(new TItemsetNodeProxy(ci->second)))); 
     233      PyDict_SetItem(children, PyInt_FromLong(ci->first), WrapOrange(POrange(new TItemsetNodeProxy(ci->second, nodeProxy->tree)))); 
    143234    return children; 
     235  PyCATCH 
     236} 
     237 
     238PyObject *ItemsetNodeProxy_get_examples(PyObject *self) 
     239{ 
     240  PyTRY 
     241    const TSparseItemsetNode *me = SELF_AS(TItemsetNodeProxy).node; 
     242    PyObject *examples = PyList_New(me->exampleIds.size()); 
     243    int i = 0; 
     244    const_ITERATE(vector<int>, ci, me->exampleIds) 
     245      PyList_SetItem(examples, i++, PyInt_FromLong(*ci)); 
     246    return examples; 
    144247  PyCATCH 
    145248} 
Note: See TracChangeset for help on using the changeset viewer.