source: orange/Orange/multilabel/lp.py @ 10502:6b593a8cd5a0

Revision 10502:6b593a8cd5a0, 4.8 KB checked in by Matija Polajnar <matija.polajnar@…>, 2 years ago (diff)

Make multi-label warning and errors more clear on what a 'multi-label dataset' is from our perspective; Miha warned me students had troubles with this.

RevLine 
[9460]1"""
2.. index:: LabelPowerset Learner
3
4***************************************
5LabelPowerset Learner
6***************************************
7
8LabelPowerset Classification is another transformation method for multi-label classification.
[9505]9It considers each different set of labels that exists in the multi-label data as a
10single class. Thus it learns a classification problem :math:`H:X \\rightarrow \\mathbb{P}(L)`, where
11:math:`\\mathbb{P}(L)` is the power set of L.
[9460]12For more information, see G. Tsoumakas and I. Katakis. `Multi-label classification: An overview
13<http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.9401&rep=rep1&type=pdf>`_.
14International Journal of Data Warehousing and Mining, 3(3):1-13, 2007.
15
16.. index:: LabelPowerset Learner
17.. autoclass:: Orange.multilabel.LabelPowersetLearner
18   :members:
19   :show-inheritance:
20 
[9500]21   :param instances: a table of instances.
[9460]22   :type instances: :class:`Orange.data.Table`
23     
24   :param base_learner: the binary learner, the default learner is BayesLearner
25   :type base_learner: :class:`Orange.classification.Learner`
26
27.. index:: LabelPowerset Classifier
28.. autoclass:: Orange.multilabel.LabelPowersetClassifier
29   :members:
30   :show-inheritance:
31   
32Examples
33========
34
35The following example demonstrates a straightforward invocation of
[9994]36this algorithm (:download:`mlc-classify.py <code/mlc-classify.py>`):
[9460]37
38.. literalinclude:: code/mlc-classify.py
[9505]39   :lines: 6, 19-21
[9460]40
41"""
42
43import Orange
44from Orange.core import BayesLearner as _BayesLearner
45import multibase as _multibase
46
[9500]47def get_label_bitstream(e):
48    return ''.join(lv.value for lv in e.get_classes())
49
50def transform_to_powerset(instances):
[9922]51    new_class = Orange.feature.Discrete("label")
[9500]52   
53    for e in instances:
54        class_value = get_label_bitstream(e)
55        new_class.add_value(class_value)
56   
57    new_domain = Orange.data.Domain(instances.domain.attributes, new_class)
58   
59    #build the instances
60    new_table = Orange.data.Table(new_domain)
61    for e in instances:
62        new_row = Orange.data.Instance(
63          new_domain,
64          [e[a].value for a in instances.domain.attributes] +
65          [get_label_bitstream(e)])
66       
67        new_table.append(new_row)
68   
69    return new_table
70
[9460]71class LabelPowersetLearner(_multibase.MultiLabelLearner):
72    """
73    Class that implements the LabelPowerset (LP) method.
74    """
[9475]75    def __new__(cls, instances = None, base_learner = None, weight_id = 0, **argkw):
[9460]76        self = _multibase.MultiLabelLearner.__new__(cls, **argkw)
77       
78        if instances:
79            self.__init__(**argkw)
[9500]80            return self.__call__(instances, base_learner, weight_id)
[9460]81        else:
82            return self
83               
[9475]84    def __call__(self, instances, base_learner = None, weight_id = 0, **kwds):
[9500]85        if not Orange.multilabel.is_multilabel(instances):
[10502]86            raise TypeError("The given data set is not a multi-label data set"
87                            " with class values 0 and 1.")
[9460]88
[9500]89        self.__dict__.update(kwds)
90
91        new_table = transform_to_powerset(instances)
[9460]92       
[9462]93        #store the classifier
[9500]94        base_learner = base_learner if base_learner else _BayesLearner
95        classifier = base_learner(new_table)
[9462]96       
[9460]97        #Learn from the given table of data instances.
[9475]98        return LabelPowersetClassifier(instances = instances, 
99                                       classifier = classifier,
100                                       weight_id = weight_id)
[9460]101
[9500]102class LabelPowersetClassifier(_multibase.MultiLabelClassifier):
103    def __call__(self, instance, result_type=Orange.classification.Classifier.GetValue):
[9505]104        """
105        :rtype: a list of :class:`Orange.data.Value`, a list of :class:`Orange.statistics.distribution.Distribution`, or a tuple with both
106        """
[9460]107        labels = []
108        prob = []
109       
[9500]110        c = self.classifier(instance)
111        for bit, lvar in zip(c.value, self.instances.domain.class_vars):
112            labels.append(Orange.data.Value(lvar, bit))
113            prob.append(float(bit == '1'))
[9460]114       
115        if result_type == Orange.classification.Classifier.GetValue:
116            return labels
[9500]117       
118        disc = [Orange.statistics.distribution.Discrete([1-p, p]) for p in prob]
119        for v, d in zip(self.instances.domain.class_vars, disc):
120            d.variable = v
121       
[9460]122        if result_type == Orange.classification.Classifier.GetProbabilities:
123            return disc
124        return labels,disc
[9475]125
126#########################################################################################
[9477]127# Test the code, run from DOS prompt
128# assume the data file is in proper directory
129
[9475]130if __name__ == "__main__":
131    data = Orange.data.Table("emotions.tab")
132
[9476]133    classifier = Orange.multilabel.LabelPowersetLearner(data)
[9475]134    for i in range(10):
135        c,p = classifier(data[i],Orange.classification.Classifier.GetBoth)
[9922]136        print c,p
Note: See TracBrowser for help on using the repository browser.