# Source code for cleanlab.multilabel_classification

# Copyright (C) 2017-2022  Cleanlab Inc.
# This file is part of cleanlab.
#
# cleanlab is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# cleanlab is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with cleanlab.  If not, see <https://www.gnu.org/licenses/>.

"""
Methods to rank the severity of label issues in multi-label classification datasets.
Here each example can belong to one or more classes, or none of the classes at all.
Unlike in standard multi-class classification, predicted class probabilities from model need not sum to 1 for each row in multi-label classification.
"""

import numpy as np
from typing import List

from cleanlab.internal.validation import assert_valid_inputs
from cleanlab.internal.util import get_num_classes
from cleanlab.internal.multilabel_scorer import MultilabelScorer, ClassLabelScorer, Aggregator
from cleanlab.internal.multilabel_utils import int2onehot

[docs]def get_label_quality_scores(
labels: List,
pred_probs: np.ndarray,
*,
method: str = "self_confidence",
aggregator_kwargs: dict = {"method": "exponential_moving_average", "alpha": 0.8}
) -> np.ndarray:
"""Computes a label quality score each example in a multi-label classification dataset.

Scores are between 0 and 1 with lower scores indicating examples whose label more likely contains an error.
For each example, this method internally computes a separate score for each individual class
and then aggregates these per-class scores into an overall label quality score for the example.

To estimate exactly which examples are mislabeled in a multi-label classification dataset,
you can also use :py:func:filter.find_label_issues <cleanlab.filter.find_label_issues> with argument multi_label=True.

Parameters
----------
labels : List[List[int]]
Multi-label classification labels for each example, which is allowed to belong to multiple classes.
The i-th element of labels corresponds to list of classes that i-th example belongs to (e.g. labels = [[1,2],[1],[0],..]).

Important
---------
*Format requirements*: For dataset with K classes, individual class labels must be integers in 0, 1, ..., K-1.

pred_probs : np.ndarray
An array of shape (N, K) of model-predicted probabilities,
P(label=k|x). Each row of this matrix corresponds
to an example x and contains the model-predicted probabilities that
x belongs to each possible class, for each of the K classes. The
columns must be ordered such that these probabilities correspond to
class 0, 1, ..., K-1. In multi-label classification, the rows of pred_probs need not sum to 1.

Note
----
Estimated label quality scores are most accurate when they are computed based on out-of-sample pred_probs from your model.
To obtain out-of-sample predicted probabilities for every example in your dataset, you can use :ref:cross-validation <pred_probs_cross_val>.
This is encouraged to get better results.

method : {"self_confidence", "normalized_margin", "confidence_weighted_entropy"}, default = "self_confidence"
Method to calculate separate per class annotation scores that are subsequently aggregated to form an overall label quality score.
These scores are separately calculated for each class based on the corresponding column of pred_probs in a one-vs-rest manner,
and are standard label quality scores for multi-class classification.

--------
:py:func:rank.get_label_quality_scores <cleanlab.rank.get_label_quality_scores> function for details about each option.

adjust_pred_probs : bool, default = False
Account for class imbalance in the label-quality scoring by adjusting predicted probabilities
via subtraction of class confident thresholds and renormalization.
Set this to True if you prefer to account for class-imbalance.
See Northcutt et al., 2021 <https://jair.org/index.php/jair/article/view/12125>_.

aggregator_kwargs : dict, default = {"method": "exponential_moving_average", "alpha": 0.8}
A dictionary of hyperparameter values for aggregating per class scores into an overall label quality score for each example.
Options for "method" include: "exponential_moving_average" or "softmin" or your own callable function.
See :py:class:internal.multilabel_scorer.Aggregator <cleanlab.internal.multilabel_scorer.Aggregator> for details about each option and other possible hyperparameters.

Returns
-------
label_quality_scores : np.ndarray
A 1D array of shape (N,) with a label quality score (between 0 and 1) for each example in the dataset.
Lower scores indicate examples whose label is more likely to contain annotation errors.

Examples
--------
>>> from cleanlab.multilabel_classification import get_label_quality_scores
>>> import numpy as np
>>> labels = [[1], [0,2]]
>>> pred_probs = np.array([[0.1, 0.9, 0.1], [0.4, 0.1, 0.9]])
>>> scores = get_label_quality_scores(labels, pred_probs)
>>> scores
array([0.9, 0.5])
"""

assert_valid_inputs(
X=None, y=labels, pred_probs=pred_probs, multi_label=True, allow_one_class=True
)
num_classes = get_num_classes(labels=labels, pred_probs=pred_probs, multi_label=True)
binary_labels = int2onehot(labels, K=num_classes)
base_scorer = ClassLabelScorer.from_str(method)