source: orange/Orange/multilabel/lp.py @ 9671:a7b056375472

Revision 9671:a7b056375472, 4.8 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Moved orange to Orange (part 2)

Line 
1"""
2.. index:: LabelPowerset Learner
3
4***************************************
5LabelPowerset Learner
6***************************************
7
8LabelPowerset Classification is another transformation method for multi-label classification.
9It considers each different set of labels that exists in the multi-label data as a
10single class. Thus it learns a classification problem :math:`H:X \\rightarrow \\mathbb{P}(L)`, where
11:math:`\\mathbb{P}(L)` is the power set of L.
12For more information, see G. Tsoumakas and I. Katakis. `Multi-label classification: An overview
13<http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.9401&rep=rep1&type=pdf>`_.
14International Journal of Data Warehousing and Mining, 3(3):1-13, 2007.
15
16.. index:: LabelPowerset Learner
17.. autoclass:: Orange.multilabel.LabelPowersetLearner
18   :members:
19   :show-inheritance:
20 
21   :param instances: a table of instances.
22   :type instances: :class:`Orange.data.Table`
23     
24   :param base_learner: the binary learner, the default learner is BayesLearner
25   :type base_learner: :class:`Orange.classification.Learner`
26
27.. index:: LabelPowerset Classifier
28.. autoclass:: Orange.multilabel.LabelPowersetClassifier
29   :members:
30   :show-inheritance:
31   
32Examples
33========
34
35The following example demonstrates a straightforward invocation of
36this algorithm (:download:`mlc-classify.py <code/mlc-classify.py>`, uses
37:download:`emotions.tab <code/emotions.tab>`):
38
39.. literalinclude:: code/mlc-classify.py
40   :lines: 6, 19-21
41
42"""
43
44import Orange
45from Orange.core import BayesLearner as _BayesLearner
46import multibase as _multibase
47
48def get_label_bitstream(e):
49    return ''.join(lv.value for lv in e.get_classes())
50
51def transform_to_powerset(instances):
52    new_class = Orange.data.variable.Discrete("label")
53   
54    for e in instances:
55        class_value = get_label_bitstream(e)
56        new_class.add_value(class_value)
57   
58    new_domain = Orange.data.Domain(instances.domain.attributes, new_class)
59   
60    #build the instances
61    new_table = Orange.data.Table(new_domain)
62    for e in instances:
63        new_row = Orange.data.Instance(
64          new_domain,
65          [e[a].value for a in instances.domain.attributes] +
66          [get_label_bitstream(e)])
67       
68        new_table.append(new_row)
69   
70    return new_table
71
72class LabelPowersetLearner(_multibase.MultiLabelLearner):
73    """
74    Class that implements the LabelPowerset (LP) method.
75    """
76    def __new__(cls, instances = None, base_learner = None, weight_id = 0, **argkw):
77        self = _multibase.MultiLabelLearner.__new__(cls, **argkw)
78       
79        if instances:
80            self.__init__(**argkw)
81            return self.__call__(instances, base_learner, weight_id)
82        else:
83            return self
84               
85    def __call__(self, instances, base_learner = None, weight_id = 0, **kwds):
86        if not Orange.multilabel.is_multilabel(instances):
87            raise TypeError("The given data set is not a multi-label data set.")
88
89        self.__dict__.update(kwds)
90
91        new_table = transform_to_powerset(instances)
92       
93        #store the classifier
94        base_learner = base_learner if base_learner else _BayesLearner
95        classifier = base_learner(new_table)
96       
97        #Learn from the given table of data instances.
98        return LabelPowersetClassifier(instances = instances, 
99                                       classifier = classifier,
100                                       weight_id = weight_id)
101
102class LabelPowersetClassifier(_multibase.MultiLabelClassifier):
103    def __call__(self, instance, result_type=Orange.classification.Classifier.GetValue):
104        """
105        :rtype: a list of :class:`Orange.data.Value`, a list of :class:`Orange.statistics.distribution.Distribution`, or a tuple with both
106        """
107        labels = []
108        prob = []
109       
110        c = self.classifier(instance)
111        for bit, lvar in zip(c.value, self.instances.domain.class_vars):
112            labels.append(Orange.data.Value(lvar, bit))
113            prob.append(float(bit == '1'))
114       
115        if result_type == Orange.classification.Classifier.GetValue:
116            return labels
117       
118        disc = [Orange.statistics.distribution.Discrete([1-p, p]) for p in prob]
119        for v, d in zip(self.instances.domain.class_vars, disc):
120            d.variable = v
121       
122        if result_type == Orange.classification.Classifier.GetProbabilities:
123            return disc
124        return labels,disc
125
126#########################################################################################
127# Test the code, run from DOS prompt
128# assume the data file is in proper directory
129
130if __name__ == "__main__":
131    data = Orange.data.Table("emotions.tab")
132
133    classifier = Orange.multilabel.LabelPowersetLearner(data)
134    for i in range(10):
135        c,p = classifier(data[i],Orange.classification.Classifier.GetBoth)
136        print c,p
Note: See TracBrowser for help on using the repository browser.