Conditions | 8 |
Total Lines | 138 |
Lines | 0 |
Ratio | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | import logging |
||
33 | def get_performance_array(confusion_matrix): |
||
34 | """Calculate performance matrix based on the given confusion matrix |
||
35 | |||
36 | Gets performance array for each class |
||
37 | 0 - True_Positive: number of samples that belong to class and classified correctly |
||
38 | 1 - True_Negative: number of samples that correctly classified as not belonging to class |
||
39 | 2 - False_Positive: number of samples that belong to class and not classified correctMeasure: |
||
40 | 3 - False_Negative: number of samples that do not belong to class but classified as class |
||
41 | 4 - Accuracy: Overall, how often is the classifier correct? (TP + TN) / (TP + TN + FP + FN) |
||
42 | 5 - Misclassification: Overall, how often is it wrong? (FP + FN) / (TP + TN + FP + FN) |
||
43 | 6 - Recall: When it's actually yes, how often does it predict yes? TP / (TP + FN) |
||
44 | 7 - False Positive Rate: When it's actually no, how often does it predict yes? FP / (FP + TN) |
||
45 | 8 - Specificity: When it's actually no, how often does it predict no? TN / (FP + TN) |
||
46 | 9 - Precision: When it predicts yes, how often is it correct? TP / (TP + FP) |
||
47 | 10 - Prevalence: How often does the yes condition actually occur in our sample? Total(class) / Total(samples) |
||
48 | 11 - F(1) Measure: 2 * (precision * recall) / (precision + recall) |
||
49 | 12 - G Measure: sqrt(precision * recall) |
||
50 | |||
51 | Gets Overall Performance for the classifier |
||
52 | 0 - Average Accuracy: The average per-class effectiveness of a classifier |
||
53 | 1 - Weighed Accuracy: The average effectiveness of a classifier weighed by prevalence of each class |
||
54 | 2 - Precision (micro): Agreement of the class labels with those of a classifiers if calculated from sums of per-text |
||
55 | decision |
||
56 | 3 - Recall (micro): Effectiveness of a classifier to identify class labels if calculated from sums of per-text |
||
57 | decisions |
||
58 | 4 - F-Score (micro): Relationship between data's positive labels and those given by a classifier based on a sums of |
||
59 | per-text decisions |
||
60 | 5 - Precision (macro): An average per-class agreement of the data class labels with those of a classifiers |
||
61 | 6 - Recall (macro): An average per-class effectiveness of a classifier to identify class labels |
||
62 | 7 - F-Score (micro): Relations between data's positive labels and those given by a classifier based on a per-class |
||
63 | average |
||
64 | 8 - Exact Matching Ratio: The average per-text exact classification |
||
65 | |||
66 | Note: In Multi-class classification, Micro-Precision == Micro-Recall == Micro-FScore == Exact Matching Ratio |
||
67 | (Multi-class classification: each input is to be classified into one and only one class) |
||
68 | |||
69 | Reference Document: |
||
70 | Sokolova, M., & Lapalme, G. (2009). A systematic analysis of performance measures for classification tasks. |
||
71 | Information Processing and Management, 45, p. 427-437 |
||
72 | |||
73 | Args: |
||
74 | num_classes (:obj:`int`): Number of classes |
||
75 | confusion_matrix (:class:`numpy.array`): Confusion Matrix (numpy array of num_class by num_class) |
||
76 | |||
77 | Returns: |
||
78 | :obj:`tuple` of :class:`numpy.array`: tuple of overall performance and per class performance |
||
79 | """ |
||
80 | if confusion_matrix.shape[0] != confusion_matrix.shape[1]: |
||
81 | logger.error("confusion matrix with shape " + str(confusion_matrix.shape) + " is not square.") |
||
82 | return None, None |
||
83 | |||
84 | num_classes = confusion_matrix.shape[0] |
||
85 | |||
86 | per_class = np.zeros((num_classes, len(per_class_performance_index)), dtype=float) |
||
87 | overall = np.zeros((len(overall_performance_index),), dtype=float) |
||
88 | |||
89 | for i in range(num_classes): |
||
90 | true_positive = confusion_matrix[i][i] |
||
91 | true_negative = np.sum(confusion_matrix)\ |
||
92 | - np.sum(confusion_matrix[i, :])\ |
||
93 | - np.sum(confusion_matrix[:, i])\ |
||
94 | + confusion_matrix[i][i] |
||
95 | false_positive = np.sum(confusion_matrix[:, i]) - confusion_matrix[i][i] |
||
96 | false_negative = np.sum(confusion_matrix[i, :]) - confusion_matrix[i][i] |
||
97 | # Accuracy: (TP + TN) / (TP + TN + FP + FN) |
||
98 | per_class_accuracy = (true_positive + true_negative)\ |
||
99 | / (true_positive + true_negative + false_positive + false_negative) |
||
100 | # Mis-classification: (FP + FN) / (TP + TN + FP + FN) |
||
101 | per_class_misclassification = (false_positive + false_negative)\ |
||
102 | / (true_positive + true_negative + false_positive + false_negative) |
||
103 | # Recall: TP / (TP + FN) |
||
104 | if true_positive + false_negative == 0: |
||
105 | per_class_recall = 0. |
||
106 | else: |
||
107 | per_class_recall = true_positive / (true_positive + false_negative) |
||
108 | # False Positive Rate: FP / (FP + TN) |
||
109 | if false_positive + true_negative == 0: |
||
110 | per_class_fpr = 0. |
||
111 | else: |
||
112 | per_class_fpr = false_positive / (false_positive + true_negative) |
||
113 | # Specificity: TN / (FP + TN) |
||
114 | if false_positive + true_negative == 0: |
||
115 | per_class_specificity = 0. |
||
116 | else: |
||
117 | per_class_specificity = true_negative / (false_positive + true_negative) |
||
118 | # Precision: TP / (TP + FP) |
||
119 | if true_positive + false_positive == 0: |
||
120 | per_class_precision = 0. |
||
121 | else: |
||
122 | per_class_precision = true_positive / (true_positive + false_positive) |
||
123 | # prevalence |
||
124 | per_class_prevalence = (true_positive + false_negative)\ |
||
125 | / (true_positive + true_negative + false_positive + false_negative) |
||
126 | # F-1 Measure: 2 * (precision * recall) / (precision + |
||
127 | if per_class_precision + per_class_recall == 0: |
||
128 | per_class_fscore = 0. |
||
129 | else: |
||
130 | per_class_fscore = 2 * (per_class_precision * per_class_recall) / (per_class_precision + per_class_recall) |
||
131 | # G Measure: sqrt(precision * recall) |
||
132 | per_class_gscore = np.sqrt(per_class_precision * per_class_recall) |
||
133 | per_class[i][0] = true_positive |
||
134 | per_class[i][1] = true_negative |
||
135 | per_class[i][2] = false_positive |
||
136 | per_class[i][3] = false_negative |
||
137 | per_class[i][4] = per_class_accuracy |
||
138 | per_class[i][5] = per_class_misclassification |
||
139 | per_class[i][6] = per_class_recall |
||
140 | per_class[i][7] = per_class_fpr |
||
141 | per_class[i][8] = per_class_specificity |
||
142 | per_class[i][9] = per_class_precision |
||
143 | per_class[i][10] = per_class_prevalence |
||
144 | per_class[i][11] = per_class_fscore |
||
145 | per_class[i][12] = per_class_gscore |
||
146 | |||
147 | # Average Accuracy: Sum{i}{Accuracy{i}} / num_class |
||
148 | overall[0] = np.sum(per_class[:, per_class_performance_index.index('accuracy')]) / num_classes |
||
149 | # Weighed Accuracy: Sum{i}{Accuracy{i} * Prevalence{i}} / num_class |
||
150 | overall[1] = np.dot(per_class[:, per_class_performance_index.index('accuracy')], |
||
151 | per_class[:, per_class_performance_index.index('prevalence')]) |
||
152 | # Precision (micro): Sum{i}{TP_i} / Sum{i}{TP_i + FP_i} |
||
153 | overall[2] = np.sum(per_class[:, per_class_performance_index.index('true_positive')]) / \ |
||
154 | np.sum(per_class[:, per_class_performance_index.index('true_positive')] + |
||
155 | per_class[:, per_class_performance_index.index('false_positive')]) |
||
156 | # Recall (micro): Sum{i}{TP_i} / Sum{i}{TP_i + FN_i} |
||
157 | overall[3] = np.sum(per_class[:, per_class_performance_index.index('true_positive')]) / \ |
||
158 | np.sum(per_class[:, per_class_performance_index.index('true_positive')] + |
||
159 | per_class[:, per_class_performance_index.index('false_negative')]) |
||
160 | # F_Score (micro): 2 * Precision_micro * Recall_micro / (Precision_micro + Recall_micro) |
||
161 | overall[4] = 2 * overall[2] * overall[3] / (overall[2] + overall[3]) |
||
162 | # Precision (macro): Sum{i}{Precision_i} / num_class |
||
163 | overall[5] = np.sum(per_class[:, per_class_performance_index.index('precision')]) / num_classes |
||
164 | # Recall (macro): Sum{i}{Recall_i} / num_class |
||
165 | overall[6] = np.sum(per_class[:, per_class_performance_index.index('recall')]) / num_classes |
||
166 | # F_Score (macro): 2 * Precision_macro * Recall_macro / (Precision_macro + Recall_macro) |
||
167 | overall[7] = 2 * overall[5] * overall[6] / (overall[5] + overall[6]) |
||
168 | # Exact Matching Ratio: |
||
169 | overall[8] = np.trace(confusion_matrix) / np.sum(confusion_matrix) |
||
170 | return overall, per_class |
||
171 | |||
172 |