source: orange/Orange/multilabel/br.py @ 10502:6b593a8cd5a0

Revision 10502:6b593a8cd5a0, 4.9 KB checked in by Matija Polajnar <matija.polajnar@…>, 2 years ago (diff)

Make multi-label warning and errors more clear on what a 'multi-label dataset' is from our perspective; Miha warned me students had troubles with this.

RevLine 
[9445]1"""
2.. index:: Binary Relevance Learner
3
4***************************************
5Binary Relevance Learner
6***************************************
7
[9505]8The most basic problem transformation method for multi-label classification
9is the Binary Relevance method.
10It learns :math:`|L|` binary classifiers :math:`H_l:X \\rightarrow \{l, \\neg l\}`,
[9445]11one for each different label :math:`l` in :math:`L`.
12It transforms the original data set into :math:`|L|` data sets :math:`D_l`
13that contain all examples of the original data set, labelled as
14:math:`l` if the labels of the original example contained :math:`l` and
15as :math:`\\neg l` otherwise. It is the same solution used in order
16to deal with a single-label multi-class problem using a binary classifier.
17For more information, see G. Tsoumakas and I. Katakis. `Multi-label classification: An overview
18<http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.9401&rep=rep1&type=pdf>`_.
19International Journal of Data Warehousing and Mining, 3(3):1-13, 2007.
20
[9505]21Note that a copy of the table is made in RAM for each label to enable construction of
[9500]22a classifier. Due to technical limitations, that is currently unavoidable and
23should be remedied in Orange 3.
24
[9445]25.. index:: Binary Relevance Learner
26.. autoclass:: Orange.multilabel.BinaryRelevanceLearner
27   :members:
28   :show-inheritance:
29 
[9500]30   :param instances: a table of instances.
[9445]31   :type instances: :class:`Orange.data.Table`
32     
[9505]33   :param base_learner: the binary learner, the default learner is
34                        :class:`Orange.classification.bayes.NaiveLearner`.
[9445]35   :type base_learner: :class:`Orange.classification.Learner`
36
37.. index:: Binary Relevance Classifier
38.. autoclass:: Orange.multilabel.BinaryRelevanceClassifier
39   :members:
40   :show-inheritance:
41
42   
43Examples
44========
45
46The following example demonstrates a straightforward invocation of
[9994]47this algorithm (:download:`mlc-classify.py <code/mlc-classify.py>`):
[9445]48
[9456]49.. literalinclude:: code/mlc-classify.py
[9505]50   :lines: 6, 15-17
[9445]51
52"""
53
54import Orange
[9500]55from Orange.classification.bayes import NaiveLearner as _BayesLearner
[9445]56import multibase as _multibase
57
58class BinaryRelevanceLearner(_multibase.MultiLabelLearner):
59    """
60    Class that implements the Binary Relevance (BR) method.
61    """
[9475]62    def __new__(cls, instances = None, base_learner = None, weight_id = 0, **argkw):
[9445]63        self = _multibase.MultiLabelLearner.__new__(cls, **argkw)
64        if base_learner:
65            self.base_learner = base_learner
66        else:
67            self.base_learner = _BayesLearner
68       
69        if instances:
70            self.__init__(**argkw)
[9500]71            return self.__call__(instances, weight_id)
[9445]72        else:
73            return self
74       
[9500]75    def __call__(self, instances, weight_id = 0, **kwds):
76        if not Orange.multilabel.is_multilabel(instances):
[10502]77            raise TypeError("The given data set is not a multi-label data set"
78                            " with class values 0 and 1.")
[9500]79       
[9445]80        for k in kwds.keys():
81            self.__dict__[k] = kwds[k]
82
83        classifiers = []
84           
[9500]85        for c in instances.domain.class_vars:
86            new_domain = Orange.data.Domain(instances.domain.attributes, c)
[9445]87           
[9478]88            #build the instances
[9500]89            new_table = Orange.data.Table(new_domain, instances)
[9478]90            classifer = self.base_learner(new_table)
[9445]91            classifiers.append(classifer)
92           
93        #Learn from the given table of data instances.
[9475]94        return BinaryRelevanceClassifier(instances = instances, 
95                                         classifiers = classifiers,
96                                         weight_id = weight_id)
[9445]97
98class BinaryRelevanceClassifier(_multibase.MultiLabelClassifier):
99    def __init__(self, **kwds):
100        self.__dict__.update(kwds)
[9453]101       
[9500]102    def __call__(self, instance, result_type=Orange.classification.Classifier.GetValue):
[9505]103        """
104        :rtype: a list of :class:`Orange.data.Value`, a list of :class:`Orange.statistics.distribution.Distribution`, or a tuple with both
105        """
[9445]106        domain = self.instances.domain
107        labels = []
[9500]108        dists = []
[9445]109       
[9500]110        for c in self.classifiers:
111            v, p = c(instance, Orange.classification.Classifier.GetBoth)
[9445]112               
[9500]113            labels.append(v)
114            dists.append(p)
[9445]115           
116        if result_type == Orange.classification.Classifier.GetValue:
117            return labels
118        if result_type == Orange.classification.Classifier.GetProbabilities:
[9500]119            return dists
120        return labels, dists
[9475]121       
122#########################################################################################
[9500]123# A quick test/example.
[9477]124
[9475]125if __name__ == "__main__":
126    data = Orange.data.Table("emotions.tab")
127
128    classifier = Orange.multilabel.BinaryRelevanceLearner(data,Orange.classification.knn.kNNLearner)
129    for i in range(10):
130        c,p = classifier(data[i],Orange.classification.Classifier.GetBoth)
131        print c,p
Note: See TracBrowser for help on using the repository browser.