| Conditions | 3 |
| Total Lines | 115 |
| Code Lines | 24 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | from functools import partial |
||
| 163 | def precision_recall_gain_curve(y_true, probas_pred, pos_label=1, sample_weight=None): |
||
| 164 | """Compute precision-recall pairs for different probability thresholds. |
||
| 165 | |||
| 166 | Note: this implementation is restricted to the binary classification task. |
||
| 167 | |||
| 168 | The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of |
||
| 169 | true positives and ``fp`` the number of false positives. The precision is |
||
| 170 | intuitively the ability of the classifier not to label as positive a sample |
||
| 171 | that is negative. |
||
| 172 | |||
| 173 | The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of |
||
| 174 | true positives and ``fn`` the number of false negatives. The recall is |
||
| 175 | intuitively the ability of the classifier to find all the positive samples. |
||
| 176 | |||
| 177 | The last precision and recall values are 1. and 0. respectively and do not |
||
| 178 | have a corresponding threshold. This ensures that the graph starts on the |
||
| 179 | y axis. |
||
| 180 | |||
| 181 | Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`. |
||
| 182 | |||
| 183 | Parameters |
||
| 184 | ---------- |
||
| 185 | y_true : ndarray of shape (n_samples,) |
||
| 186 | True binary labels. If labels are not either {-1, 1} or {0, 1}, then |
||
| 187 | pos_label should be explicitly given. |
||
| 188 | |||
| 189 | probas_pred : ndarray of shape (n_samples,) |
||
| 190 | Estimated probabilities or output of a decision function. |
||
| 191 | |||
| 192 | pos_label : int or str, default=None |
||
| 193 | The label of the positive class. |
||
| 194 | When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1}, |
||
| 195 | ``pos_label`` is set to 1, otherwise an error will be raised. |
||
| 196 | |||
| 197 | sample_weight : array-like of shape (n_samples,), default=None |
||
| 198 | Sample weights. |
||
| 199 | |||
| 200 | Returns |
||
| 201 | ------- |
||
| 202 | precision : ndarray of shape (n_thresholds + 1,) |
||
| 203 | Precision values such that element i is the precision of |
||
| 204 | predictions with score >= thresholds[i] and the last element is 1. |
||
| 205 | |||
| 206 | recall : ndarray of shape (n_thresholds + 1,) |
||
| 207 | Decreasing recall values such that element i is the recall of |
||
| 208 | predictions with score >= thresholds[i] and the last element is 0. |
||
| 209 | |||
| 210 | thresholds : ndarray of shape (n_thresholds,) |
||
| 211 | Increasing thresholds on the decision function used to compute |
||
| 212 | precision and recall. n_thresholds <= len(np.unique(probas_pred)). |
||
| 213 | |||
| 214 | See Also |
||
| 215 | -------- |
||
| 216 | plot_precision_recall_curve : Plot Precision Recall Curve for binary |
||
| 217 | classifiers. |
||
| 218 | PrecisionRecallDisplay : Precision Recall visualization. |
||
| 219 | average_precision_score : Compute average precision from prediction scores. |
||
| 220 | det_curve: Compute error rates for different probability thresholds. |
||
| 221 | roc_curve : Compute Receiver operating characteristic (ROC) curve. |
||
| 222 | |||
| 223 | Examples |
||
| 224 | -------- |
||
| 225 | >>> import numpy as np |
||
| 226 | >>> from sklearn.metrics import precision_recall_curve |
||
| 227 | >>> y_true = np.array([0, 0, 1, 1]) |
||
| 228 | >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8]) |
||
| 229 | >>> precision, recall, thresholds = precision_recall_curve( |
||
| 230 | ... y_true, y_scores) |
||
| 231 | >>> precision |
||
| 232 | array([0.66666667, 0.5 , 1. , 1. ]) |
||
| 233 | >>> recall |
||
| 234 | array([1. , 0.5, 0.5, 0. ]) |
||
| 235 | >>> thresholds |
||
| 236 | array([0.35, 0.4 , 0.8 ]) |
||
| 237 | |||
| 238 | """ |
||
| 239 | if pos_label != 1: |
||
| 240 | raise NotImplementedError("Have not implemented non-binary targets") |
||
| 241 | if sample_weight is not None: |
||
| 242 | raise NotImplementedError |
||
| 243 | |||
| 244 | # calc true and false poitives per binary classification thresh |
||
| 245 | fps, tps, thresholds = _binary_clf_curve( |
||
| 246 | y_true, probas_pred, pos_label=pos_label, sample_weight=sample_weight |
||
| 247 | ) |
||
| 248 | |||
| 249 | precision = tps / (tps + fps) |
||
| 250 | precision[np.isnan(precision)] = 0 |
||
| 251 | recall = tps / tps[-1] |
||
| 252 | |||
| 253 | # stop when full recall attained |
||
| 254 | # and reverse the outputs so recall is decreasing |
||
| 255 | last_ind = tps.searchsorted(tps[-1]) |
||
| 256 | sl = slice(last_ind, None, -1) # equivalent to slice [last_ind:None:-1] |
||
| 257 | precision, recall, thresholds = ( |
||
| 258 | np.r_[precision[sl], 1], |
||
| 259 | np.r_[recall[sl], 0], |
||
| 260 | thresholds[sl], |
||
| 261 | ) |
||
| 262 | |||
| 263 | # everything above is taken from sklearn.metrics._ranking.precision_recall_curve |
||
| 264 | |||
| 265 | # logic taken from sklearn.metrics._ranking.det_curve |
||
| 266 | # fns = tps[-1] - tps |
||
| 267 | p_count = tps[-1] |
||
| 268 | n_count = fps[-1] |
||
| 269 | proportion_of_positives = p_count / n_count |
||
| 270 | |||
| 271 | precision_gains, recall_gains = precision_recall_gain( |
||
| 272 | precisions=precision, |
||
| 273 | recalls=recall, |
||
| 274 | proportion_of_positives=proportion_of_positives, |
||
| 275 | ) |
||
| 276 | |||
| 277 | return precision_gains, recall_gains |
||
| 278 |