| Conditions | 8 |
| Total Lines | 138 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | import logging |
||
| 33 | def get_performance_array(confusion_matrix): |
||
| 34 | """Calculate performance matrix based on the given confusion matrix |
||
| 35 | |||
| 36 | Gets performance array for each class |
||
| 37 | 0 - True_Positive: number of samples that belong to class and classified correctly |
||
| 38 | 1 - True_Negative: number of samples that correctly classified as not belonging to class |
||
| 39 | 2 - False_Positive: number of samples that belong to class and not classified correctMeasure: |
||
| 40 | 3 - False_Negative: number of samples that do not belong to class but classified as class |
||
| 41 | 4 - Accuracy: Overall, how often is the classifier correct? (TP + TN) / (TP + TN + FP + FN) |
||
| 42 | 5 - Misclassification: Overall, how often is it wrong? (FP + FN) / (TP + TN + FP + FN) |
||
| 43 | 6 - Recall: When it's actually yes, how often does it predict yes? TP / (TP + FN) |
||
| 44 | 7 - False Positive Rate: When it's actually no, how often does it predict yes? FP / (FP + TN) |
||
| 45 | 8 - Specificity: When it's actually no, how often does it predict no? TN / (FP + TN) |
||
| 46 | 9 - Precision: When it predicts yes, how often is it correct? TP / (TP + FP) |
||
| 47 | 10 - Prevalence: How often does the yes condition actually occur in our sample? Total(class) / Total(samples) |
||
| 48 | 11 - F(1) Measure: 2 * (precision * recall) / (precision + recall) |
||
| 49 | 12 - G Measure: sqrt(precision * recall) |
||
| 50 | |||
| 51 | Gets Overall Performance for the classifier |
||
| 52 | 0 - Average Accuracy: The average per-class effectiveness of a classifier |
||
| 53 | 1 - Weighed Accuracy: The average effectiveness of a classifier weighed by prevalence of each class |
||
| 54 | 2 - Precision (micro): Agreement of the class labels with those of a classifiers if calculated from sums of per-text |
||
| 55 | decision |
||
| 56 | 3 - Recall (micro): Effectiveness of a classifier to identify class labels if calculated from sums of per-text |
||
| 57 | decisions |
||
| 58 | 4 - F-Score (micro): Relationship between data's positive labels and those given by a classifier based on a sums of |
||
| 59 | per-text decisions |
||
| 60 | 5 - Precision (macro): An average per-class agreement of the data class labels with those of a classifiers |
||
| 61 | 6 - Recall (macro): An average per-class effectiveness of a classifier to identify class labels |
||
| 62 | 7 - F-Score (micro): Relations between data's positive labels and those given by a classifier based on a per-class |
||
| 63 | average |
||
| 64 | 8 - Exact Matching Ratio: The average per-text exact classification |
||
| 65 | |||
| 66 | Note: In Multi-class classification, Micro-Precision == Micro-Recall == Micro-FScore == Exact Matching Ratio |
||
| 67 | (Multi-class classification: each input is to be classified into one and only one class) |
||
| 68 | |||
| 69 | Reference Document: |
||
| 70 | Sokolova, M., & Lapalme, G. (2009). A systematic analysis of performance measures for classification tasks. |
||
| 71 | Information Processing and Management, 45, p. 427-437 |
||
| 72 | |||
| 73 | Args: |
||
| 74 | num_classes (:obj:`int`): Number of classes |
||
| 75 | confusion_matrix (:class:`numpy.array`): Confusion Matrix (numpy array of num_class by num_class) |
||
| 76 | |||
| 77 | Returns: |
||
| 78 | :obj:`tuple` of :class:`numpy.array`: tuple of overall performance and per class performance |
||
| 79 | """ |
||
| 80 | if confusion_matrix.shape[0] != confusion_matrix.shape[1]: |
||
| 81 | logger.error("confusion matrix with shape " + str(confusion_matrix.shape) + " is not square.") |
||
| 82 | return None, None |
||
| 83 | |||
| 84 | num_classes = confusion_matrix.shape[0] |
||
| 85 | |||
| 86 | per_class = np.zeros((num_classes, len(per_class_performance_index)), dtype=float) |
||
| 87 | overall = np.zeros((len(overall_performance_index),), dtype=float) |
||
| 88 | |||
| 89 | for i in range(num_classes): |
||
| 90 | true_positive = confusion_matrix[i][i] |
||
| 91 | true_negative = np.sum(confusion_matrix)\ |
||
| 92 | - np.sum(confusion_matrix[i, :])\ |
||
| 93 | - np.sum(confusion_matrix[:, i])\ |
||
| 94 | + confusion_matrix[i][i] |
||
| 95 | false_positive = np.sum(confusion_matrix[:, i]) - confusion_matrix[i][i] |
||
| 96 | false_negative = np.sum(confusion_matrix[i, :]) - confusion_matrix[i][i] |
||
| 97 | # Accuracy: (TP + TN) / (TP + TN + FP + FN) |
||
| 98 | per_class_accuracy = (true_positive + true_negative)\ |
||
| 99 | / (true_positive + true_negative + false_positive + false_negative) |
||
| 100 | # Mis-classification: (FP + FN) / (TP + TN + FP + FN) |
||
| 101 | per_class_misclassification = (false_positive + false_negative)\ |
||
| 102 | / (true_positive + true_negative + false_positive + false_negative) |
||
| 103 | # Recall: TP / (TP + FN) |
||
| 104 | if true_positive + false_negative == 0: |
||
| 105 | per_class_recall = 0. |
||
| 106 | else: |
||
| 107 | per_class_recall = true_positive / (true_positive + false_negative) |
||
| 108 | # False Positive Rate: FP / (FP + TN) |
||
| 109 | if false_positive + true_negative == 0: |
||
| 110 | per_class_fpr = 0. |
||
| 111 | else: |
||
| 112 | per_class_fpr = false_positive / (false_positive + true_negative) |
||
| 113 | # Specificity: TN / (FP + TN) |
||
| 114 | if false_positive + true_negative == 0: |
||
| 115 | per_class_specificity = 0. |
||
| 116 | else: |
||
| 117 | per_class_specificity = true_negative / (false_positive + true_negative) |
||
| 118 | # Precision: TP / (TP + FP) |
||
| 119 | if true_positive + false_positive == 0: |
||
| 120 | per_class_precision = 0. |
||
| 121 | else: |
||
| 122 | per_class_precision = true_positive / (true_positive + false_positive) |
||
| 123 | # prevalence |
||
| 124 | per_class_prevalence = (true_positive + false_negative)\ |
||
| 125 | / (true_positive + true_negative + false_positive + false_negative) |
||
| 126 | # F-1 Measure: 2 * (precision * recall) / (precision + |
||
| 127 | if per_class_precision + per_class_recall == 0: |
||
| 128 | per_class_fscore = 0. |
||
| 129 | else: |
||
| 130 | per_class_fscore = 2 * (per_class_precision * per_class_recall) / (per_class_precision + per_class_recall) |
||
| 131 | # G Measure: sqrt(precision * recall) |
||
| 132 | per_class_gscore = np.sqrt(per_class_precision * per_class_recall) |
||
| 133 | per_class[i][0] = true_positive |
||
| 134 | per_class[i][1] = true_negative |
||
| 135 | per_class[i][2] = false_positive |
||
| 136 | per_class[i][3] = false_negative |
||
| 137 | per_class[i][4] = per_class_accuracy |
||
| 138 | per_class[i][5] = per_class_misclassification |
||
| 139 | per_class[i][6] = per_class_recall |
||
| 140 | per_class[i][7] = per_class_fpr |
||
| 141 | per_class[i][8] = per_class_specificity |
||
| 142 | per_class[i][9] = per_class_precision |
||
| 143 | per_class[i][10] = per_class_prevalence |
||
| 144 | per_class[i][11] = per_class_fscore |
||
| 145 | per_class[i][12] = per_class_gscore |
||
| 146 | |||
| 147 | # Average Accuracy: Sum{i}{Accuracy{i}} / num_class |
||
| 148 | overall[0] = np.sum(per_class[:, per_class_performance_index.index('accuracy')]) / num_classes |
||
| 149 | # Weighed Accuracy: Sum{i}{Accuracy{i} * Prevalence{i}} / num_class |
||
| 150 | overall[1] = np.dot(per_class[:, per_class_performance_index.index('accuracy')], |
||
| 151 | per_class[:, per_class_performance_index.index('prevalence')]) |
||
| 152 | # Precision (micro): Sum{i}{TP_i} / Sum{i}{TP_i + FP_i} |
||
| 153 | overall[2] = np.sum(per_class[:, per_class_performance_index.index('true_positive')]) / \ |
||
| 154 | np.sum(per_class[:, per_class_performance_index.index('true_positive')] + |
||
| 155 | per_class[:, per_class_performance_index.index('false_positive')]) |
||
| 156 | # Recall (micro): Sum{i}{TP_i} / Sum{i}{TP_i + FN_i} |
||
| 157 | overall[3] = np.sum(per_class[:, per_class_performance_index.index('true_positive')]) / \ |
||
| 158 | np.sum(per_class[:, per_class_performance_index.index('true_positive')] + |
||
| 159 | per_class[:, per_class_performance_index.index('false_negative')]) |
||
| 160 | # F_Score (micro): 2 * Precision_micro * Recall_micro / (Precision_micro + Recall_micro) |
||
| 161 | overall[4] = 2 * overall[2] * overall[3] / (overall[2] + overall[3]) |
||
| 162 | # Precision (macro): Sum{i}{Precision_i} / num_class |
||
| 163 | overall[5] = np.sum(per_class[:, per_class_performance_index.index('precision')]) / num_classes |
||
| 164 | # Recall (macro): Sum{i}{Recall_i} / num_class |
||
| 165 | overall[6] = np.sum(per_class[:, per_class_performance_index.index('recall')]) / num_classes |
||
| 166 | # F_Score (macro): 2 * Precision_macro * Recall_macro / (Precision_macro + Recall_macro) |
||
| 167 | overall[7] = 2 * overall[5] * overall[6] / (overall[5] + overall[6]) |
||
| 168 | # Exact Matching Ratio: |
||
| 169 | overall[8] = np.trace(confusion_matrix) / np.sum(confusion_matrix) |
||
| 170 | return overall, per_class |
||
| 171 | |||
| 172 |