#
source:
orange/Orange/multilabel/br.py
@
11459:fc07a5c346be

Revision 11459:fc07a5c346be, 4.9 KB checked in by Ales Erjavec <ales.erjavec@…>, 12 months ago (diff) |
---|

Line | |
---|---|

1 | """ |

2 | .. index:: Binary Relevance Learner |

3 | |

4 | *************************************** |

5 | Binary Relevance Learner |

6 | *************************************** |

7 | |

8 | The most basic problem transformation method for multi-label classification |

9 | is the Binary Relevance method. |

10 | It learns :math:`|L|` binary classifiers :math:`H_l:X \\rightarrow \{l, \\neg l\}`, |

11 | one for each different label :math:`l` in :math:`L`. |

12 | It transforms the original data set into :math:`|L|` data sets :math:`D_l` |

13 | that contain all examples of the original data set, labelled as |

14 | :math:`l` if the labels of the original example contained :math:`l` and |

15 | as :math:`\\neg l` otherwise. It is the same solution used in order |

16 | to deal with a single-label multi-class problem using a binary classifier. |

17 | For more information, see G. Tsoumakas and I. Katakis. `Multi-label classification: An overview |

18 | <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.9401&rep=rep1&type=pdf>`_. |

19 | International Journal of Data Warehousing and Mining, 3(3):1-13, 2007. |

20 | |

21 | Note that a copy of the table is made in RAM for each label to enable construction of |

22 | a classifier. Due to technical limitations, that is currently unavoidable and |

23 | should be remedied in Orange 3. |

24 | |

25 | .. index:: Binary Relevance Learner |

26 | .. autoclass:: Orange.multilabel.BinaryRelevanceLearner |

27 | :members: |

28 | :show-inheritance: |

29 | |

30 | :param instances: a table of instances. |

31 | :type instances: :class:`Orange.data.Table` |

32 | |

33 | :param base_learner: the binary learner, the default learner is |

34 | :class:`Orange.classification.bayes.NaiveLearner`. |

35 | :type base_learner: :class:`Orange.classification.Learner` |

36 | |

37 | .. index:: Binary Relevance Classifier |

38 | .. autoclass:: Orange.multilabel.BinaryRelevanceClassifier |

39 | :members: |

40 | :show-inheritance: |

41 | |

42 | |

43 | Examples |

44 | ======== |

45 | |

46 | The following example demonstrates a straightforward invocation of |

47 | this algorithm (:download:`mlc-classify.py <code/mlc-classify.py>`): |

48 | |

49 | .. literalinclude:: code/mlc-classify.py |

50 | :lines: 6, 15-17 |

51 | |

52 | """ |

53 | |

54 | import Orange |

55 | from Orange.classification.bayes import NaiveLearner as _BayesLearner |

56 | import multibase as _multibase |

57 | |

58 | class BinaryRelevanceLearner(_multibase.MultiLabelLearner): |

59 | """ |

60 | Class that implements the Binary Relevance (BR) method. |

61 | """ |

62 | def __new__(cls, instances = None, base_learner = None, weight_id = 0, **argkw): |

63 | self = _multibase.MultiLabelLearner.__new__(cls, **argkw) |

64 | if base_learner: |

65 | self.base_learner = base_learner |

66 | else: |

67 | self.base_learner = _BayesLearner |

68 | |

69 | if instances is not None: |

70 | self.__init__(**argkw) |

71 | return self.__call__(instances, weight_id) |

72 | else: |

73 | return self |

74 | |

75 | def __call__(self, instances, weight_id = 0, **kwds): |

76 | if not Orange.multilabel.is_multilabel(instances): |

77 | raise TypeError("The given data set is not a multi-label data set" |

78 | " with class values 0 and 1.") |

79 | |

80 | for k in kwds.keys(): |

81 | self.__dict__[k] = kwds[k] |

82 | |

83 | classifiers = [] |

84 | |

85 | for c in instances.domain.class_vars: |

86 | new_domain = Orange.data.Domain(instances.domain.attributes, c) |

87 | |

88 | #build the instances |

89 | new_table = Orange.data.Table(new_domain, instances) |

90 | classifer = self.base_learner(new_table) |

91 | classifiers.append(classifer) |

92 | |

93 | #Learn from the given table of data instances. |

94 | return BinaryRelevanceClassifier(instances = instances, |

95 | classifiers = classifiers, |

96 | weight_id = weight_id) |

97 | |

98 | class BinaryRelevanceClassifier(_multibase.MultiLabelClassifier): |

99 | def __init__(self, **kwds): |

100 | self.__dict__.update(kwds) |

101 | |

102 | def __call__(self, instance, result_type=Orange.classification.Classifier.GetValue): |

103 | """ |

104 | :rtype: a list of :class:`Orange.data.Value`, a list of :class:`Orange.statistics.distribution.Distribution`, or a tuple with both |

105 | """ |

106 | domain = self.instances.domain |

107 | labels = [] |

108 | dists = [] |

109 | |

110 | for c in self.classifiers: |

111 | v, p = c(instance, Orange.classification.Classifier.GetBoth) |

112 | |

113 | labels.append(v) |

114 | dists.append(p) |

115 | |

116 | if result_type == Orange.classification.Classifier.GetValue: |

117 | return labels |

118 | if result_type == Orange.classification.Classifier.GetProbabilities: |

119 | return dists |

120 | return labels, dists |

121 | |

122 | ######################################################################################### |

123 | # A quick test/example. |

124 | |

125 | if __name__ == "__main__": |

126 | data = Orange.data.Table("emotions.tab") |

127 | |

128 | classifier = Orange.multilabel.BinaryRelevanceLearner(data,Orange.classification.knn.kNNLearner) |

129 | for i in range(10): |

130 | c,p = classifier(data[i],Orange.classification.Classifier.GetBoth) |

131 | print c,p |

**Note:**See TracBrowser for help on using the repository browser.