source: orange/Orange/multilabel/lp.py @ 9994:1073e0304a87

Revision 9994:1073e0304a87, 4.7 KB checked in by Matija Polajnar <matija.polajnar@…>, 2 years ago (diff)

Remove links from documentation to datasets. Remove datasets reference directory.

Line 
1"""
2.. index:: LabelPowerset Learner
3
4***************************************
5LabelPowerset Learner
6***************************************
7
8LabelPowerset Classification is another transformation method for multi-label classification.
9It considers each different set of labels that exists in the multi-label data as a
10single class. Thus it learns a classification problem :math:`H:X \\rightarrow \\mathbb{P}(L)`, where
11:math:`\\mathbb{P}(L)` is the power set of L.
12For more information, see G. Tsoumakas and I. Katakis. `Multi-label classification: An overview
13<http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.9401&rep=rep1&type=pdf>`_.
14International Journal of Data Warehousing and Mining, 3(3):1-13, 2007.
15
16.. index:: LabelPowerset Learner
17.. autoclass:: Orange.multilabel.LabelPowersetLearner
18   :members:
19   :show-inheritance:
20 
21   :param instances: a table of instances.
22   :type instances: :class:`Orange.data.Table`
23     
24   :param base_learner: the binary learner, the default learner is BayesLearner
25   :type base_learner: :class:`Orange.classification.Learner`
26
27.. index:: LabelPowerset Classifier
28.. autoclass:: Orange.multilabel.LabelPowersetClassifier
29   :members:
30   :show-inheritance:
31   
32Examples
33========
34
35The following example demonstrates a straightforward invocation of
36this algorithm (:download:`mlc-classify.py <code/mlc-classify.py>`):
37
38.. literalinclude:: code/mlc-classify.py
39   :lines: 6, 19-21
40
41"""
42
43import Orange
44from Orange.core import BayesLearner as _BayesLearner
45import multibase as _multibase
46
47def get_label_bitstream(e):
48    return ''.join(lv.value for lv in e.get_classes())
49
50def transform_to_powerset(instances):
51    new_class = Orange.feature.Discrete("label")
52   
53    for e in instances:
54        class_value = get_label_bitstream(e)
55        new_class.add_value(class_value)
56   
57    new_domain = Orange.data.Domain(instances.domain.attributes, new_class)
58   
59    #build the instances
60    new_table = Orange.data.Table(new_domain)
61    for e in instances:
62        new_row = Orange.data.Instance(
63          new_domain,
64          [e[a].value for a in instances.domain.attributes] +
65          [get_label_bitstream(e)])
66       
67        new_table.append(new_row)
68   
69    return new_table
70
71class LabelPowersetLearner(_multibase.MultiLabelLearner):
72    """
73    Class that implements the LabelPowerset (LP) method.
74    """
75    def __new__(cls, instances = None, base_learner = None, weight_id = 0, **argkw):
76        self = _multibase.MultiLabelLearner.__new__(cls, **argkw)
77       
78        if instances:
79            self.__init__(**argkw)
80            return self.__call__(instances, base_learner, weight_id)
81        else:
82            return self
83               
84    def __call__(self, instances, base_learner = None, weight_id = 0, **kwds):
85        if not Orange.multilabel.is_multilabel(instances):
86            raise TypeError("The given data set is not a multi-label data set.")
87
88        self.__dict__.update(kwds)
89
90        new_table = transform_to_powerset(instances)
91       
92        #store the classifier
93        base_learner = base_learner if base_learner else _BayesLearner
94        classifier = base_learner(new_table)
95       
96        #Learn from the given table of data instances.
97        return LabelPowersetClassifier(instances = instances, 
98                                       classifier = classifier,
99                                       weight_id = weight_id)
100
101class LabelPowersetClassifier(_multibase.MultiLabelClassifier):
102    def __call__(self, instance, result_type=Orange.classification.Classifier.GetValue):
103        """
104        :rtype: a list of :class:`Orange.data.Value`, a list of :class:`Orange.statistics.distribution.Distribution`, or a tuple with both
105        """
106        labels = []
107        prob = []
108       
109        c = self.classifier(instance)
110        for bit, lvar in zip(c.value, self.instances.domain.class_vars):
111            labels.append(Orange.data.Value(lvar, bit))
112            prob.append(float(bit == '1'))
113       
114        if result_type == Orange.classification.Classifier.GetValue:
115            return labels
116       
117        disc = [Orange.statistics.distribution.Discrete([1-p, p]) for p in prob]
118        for v, d in zip(self.instances.domain.class_vars, disc):
119            d.variable = v
120       
121        if result_type == Orange.classification.Classifier.GetProbabilities:
122            return disc
123        return labels,disc
124
125#########################################################################################
126# Test the code, run from DOS prompt
127# assume the data file is in proper directory
128
129if __name__ == "__main__":
130    data = Orange.data.Table("emotions.tab")
131
132    classifier = Orange.multilabel.LabelPowersetLearner(data)
133    for i in range(10):
134        c,p = classifier(data[i],Orange.classification.Classifier.GetBoth)
135        print c,p
Note: See TracBrowser for help on using the repository browser.