# Copyright (C) 2017-2023  Cleanlab Inc.
# This file is part of cleanlab.
#
# cleanlab is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# cleanlab is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with cleanlab.  If not, see <https://www.gnu.org/licenses/>.
"""The factory module provides a factory class for constructing concrete issue managers
and a decorator for registering new issue managers.
This module provides the :py:meth:`register` decorator for users to register new subclasses of
:py:class:`IssueManager <cleanlab.datalab.internal.issue_manager.issue_manager.IssueManager>`
in the registry. Each IssueManager detects some particular type of issue in a dataset.
Note
----
The :class:`REGISTRY` variable is used by the factory class to keep track
of registered issue managers.
The factory class is used as an implementation detail by
:py:class:`Datalab <cleanlab.datalab.datalab.Datalab>`,
which provides a simplified API for constructing concrete issue managers.
:py:class:`Datalab <cleanlab.datalab.datalab.Datalab>` is intended to be used by users
and provides detailed documentation on how to use the API.
Warning
-------
Neither the :class:`REGISTRY` variable nor the factory class should be used directly by users.
"""
from __future__ import annotations
from typing import Dict, List, Type
from cleanlab.datalab.internal.issue_manager import (
    ClassImbalanceIssueManager,
    DataValuationIssueManager,
    IssueManager,
    LabelIssueManager,
    NearDuplicateIssueManager,
    NonIIDIssueManager,
    ClassImbalanceIssueManager,
    UnderperformingGroupIssueManager,
    DataValuationIssueManager,
    OutlierIssueManager,
    NullIssueManager,
)
from cleanlab.datalab.internal.issue_manager.regression import RegressionLabelIssueManager
from cleanlab.datalab.internal.issue_manager.multilabel.label import MultilabelIssueManager
from cleanlab.datalab.internal.task import Task
REGISTRY: Dict[Task, Dict[str, Type[IssueManager]]] = {
    Task.CLASSIFICATION: {
        "outlier": OutlierIssueManager,
        "label": LabelIssueManager,
        "near_duplicate": NearDuplicateIssueManager,
        "non_iid": NonIIDIssueManager,
        "class_imbalance": ClassImbalanceIssueManager,
        "underperforming_group": UnderperformingGroupIssueManager,
        "data_valuation": DataValuationIssueManager,
        "null": NullIssueManager,
    },
    Task.REGRESSION: {
        "label": RegressionLabelIssueManager,
        "outlier": OutlierIssueManager,
        "near_duplicate": NearDuplicateIssueManager,
        "non_iid": NonIIDIssueManager,
        "data_valuation": DataValuationIssueManager,
        "null": NullIssueManager,
    },
    Task.MULTILABEL: {
        "label": MultilabelIssueManager,
        "outlier": OutlierIssueManager,
        "near_duplicate": NearDuplicateIssueManager,
        "non_iid": NonIIDIssueManager,
        "data_valuation": DataValuationIssueManager,
        "null": NullIssueManager,
    },
}
"""Registry of issue managers that can be constructed from a task and issue type
and used in the Datalab class.
:meta hide-value:
Currently, the following issue managers are registered by default for a given task:
- Classification:
    - ``"outlier"``: :py:class:`OutlierIssueManager <cleanlab.datalab.internal.issue_manager.outlier.OutlierIssueManager>`
    - ``"label"``: :py:class:`LabelIssueManager <cleanlab.datalab.internal.issue_manager.label.LabelIssueManager>`
    - ``"near_duplicate"``: :py:class:`NearDuplicateIssueManager <cleanlab.datalab.internal.issue_manager.duplicate.NearDuplicateIssueManager>`
    - ``"non_iid"``: :py:class:`NonIIDIssueManager <cleanlab.datalab.internal.issue_manager.noniid.NonIIDIssueManager>`
    - ``"class_imbalance"``: :py:class:`ClassImbalanceIssueManager <cleanlab.datalab.internal.issue_manager.imbalance.ClassImbalanceIssueManager>`
    - ``"underperforming_group"``: :py:class:`UnderperformingGroupIssueManager <cleanlab.datalab.internal.issue_manager.underperforming_group.UnderperformingGroupIssueManager>`
    - ``"data_valuation"``: :py:class:`DataValuationIssueManager <cleanlab.datalab.internal.issue_manager.data_valuation.DataValuationIssueManager>`
    - ``"null"``: :py:class:`NullIssueManager <cleanlab.datalab.internal.issue_manager.null.NullIssueManager>`
    
- Regression:
    - ``"label"``: :py:class:`RegressionLabelIssueManager <cleanlab.datalab.internal.issue_manager.regression.label.RegressionLabelIssueManager>`
    - ``"outlier"``: :py:class:`OutlierIssueManager <cleanlab.datalab.internal.issue_manager.outlier.OutlierIssueManager>`
    - ``"near_duplicate"``: :py:class:`NearDuplicateIssueManager <cleanlab.datalab.internal.issue_manager.duplicate.NearDuplicateIssueManager>`
    - ``"non_iid"``: :py:class:`NonIIDIssueManager <cleanlab.datalab.internal.issue_manager.noniid.NonIIDIssueManager>`
    - ``"null"``: :py:class:`NullIssueManager <cleanlab.datalab.internal.issue_manager.null.NullIssueManager>`
- Multilabel:
    - ``"label"``: :py:class:`MultilabelIssueManager <cleanlab.datalab.internal.issue_manager.multilabel.label.MultilabelIssueManager>`
    - ``"outlier"``: :py:class:`OutlierIssueManager <cleanlab.datalab.internal.issue_manager.outlier.OutlierIssueManager>`
    - ``"near_duplicate"``: :py:class:`NearDuplicateIssueManager <cleanlab.datalab.internal.issue_manager.duplicate.NearDuplicateIssueManager>`
    - ``"non_iid"``: :py:class:`NonIIDIssueManager <cleanlab.datalab.internal.issue_manager.noniid.NonIIDIssueManager>`
    - ``"null"``: :py:class:`NullIssueManager <cleanlab.datalab.internal.issue_manager.null.NullIssueManager>`
Warning
-------
This variable should not be used directly by users.
"""
# Construct concrete issue manager with a from_str method
class _IssueManagerFactory:
    """Factory class for constructing concrete issue managers."""
    @classmethod
    def from_str(cls, issue_type: str, task: Task) -> Type[IssueManager]:
        """Constructs a concrete issue manager class from a string."""
        if isinstance(issue_type, list):
            raise ValueError(
                "issue_type must be a string, not a list. Try using from_list instead."
            )
        if task not in REGISTRY:
            raise ValueError(f"Invalid task type: {task}, must be in {list(REGISTRY.keys())}")
        if issue_type not in REGISTRY[task]:
            raise ValueError(f"Invalid issue type: {issue_type} for task {task}")
        return REGISTRY[task][issue_type]
    @classmethod
    def from_list(cls, issue_types: List[str], task: Task) -> List[Type[IssueManager]]:
        """Constructs a list of concrete issue manager classes from a list of strings."""
        return [cls.from_str(issue_type, task) for issue_type in issue_types]
[docs]def register(cls: Type[IssueManager], task: str = str(Task.CLASSIFICATION)) -> Type[IssueManager]:
    """Registers the issue manager factory.
    Parameters
    ----------
    cls :
        A subclass of
        :py:class:`IssueManager <cleanlab.datalab.internal.issue_manager.issue_manager.IssueManager>`.
    task :
        Specific machine learning task like classification or regression.
        See :py:meth:`Task.from_str <cleanlab.datalab.internal.task.Task.from_str>`` for more details,
        to see which task type corresponds to which string.
    Returns
    -------
    cls :
        The same class that was passed in.
    Example
    -------
    When defining a new subclass of
    :py:class:`IssueManager <cleanlab.datalab.internal.issue_manager.issue_manager.IssueManager>`,
    you can register it like so:
    .. code-block:: python
        from cleanlab import IssueManager
        from cleanlab.datalab.internal.issue_manager_factory import register
        @register
        class MyIssueManager(IssueManager):
            issue_name: str = "my_issue"
            def find_issues(self, **kwargs):
                # Some logic to find issues
                pass
    or in a function call:
    .. code-block:: python
        from cleanlab import IssueManager
        from cleanlab.datalab.internal.issue_manager_factory import register
        class MyIssueManager(IssueManager):
            issue_name: str = "my_issue"
            def find_issues(self, **kwargs):
                # Some logic to find issues
                pass
        register(MyIssueManager, task="classification")
    """
    if not issubclass(cls, IssueManager):
        raise ValueError(f"Class {cls} must be a subclass of IssueManager")
    name: str = str(cls.issue_name)
    try:
        _task = Task.from_str(task)
        if _task not in REGISTRY:
            raise ValueError(f"Invalid task type: {_task}, must be in {list(REGISTRY.keys())}")
    except KeyError:
        raise ValueError(f"Invalid task type: {task}, must be in {list(REGISTRY.keys())}")
    if name in REGISTRY[_task]:
        print(
            f"Warning: Overwriting existing issue manager {name} with {cls} for task {_task}."
            "This may cause unexpected behavior."
        )
    REGISTRY[_task][name] = cls
    return cls 
[docs]def list_possible_issue_types(task: Task) -> List[str]:
    """Returns a list of all registered issue types.
    Any issue type that is not in this list cannot be used in the :py:meth:`find_issues` method.
    See Also
    --------
    :py:class:`REGISTRY <cleanlab.datalab.internal.issue_manager_factory.REGISTRY>` : All available issue types and their corresponding issue managers can be found here.
    """
    return list(REGISTRY.get(task, [])) 
[docs]def list_default_issue_types(task: Task) -> List[str]:
    """Returns a list of the issue types that are run by default
    when :py:meth:`find_issues` is called without specifying `issue_types`.
    task :
        Specific machine learning task supported by Datalab.
    See Also
    --------
    :py:class:`REGISTRY <cleanlab.datalab.internal.issue_manager_factory.REGISTRY>` : All available issue types and their corresponding issue managers can be found here.
    """
    default_issue_types_dict = {
        Task.CLASSIFICATION: [
            "null",
            "label",
            "outlier",
            "near_duplicate",
            "non_iid",
            "class_imbalance",
            "underperforming_group",
        ],
        Task.REGRESSION: [
            "null",
            "label",
            "outlier",
            "near_duplicate",
            "non_iid",
        ],
        Task.MULTILABEL: [
            "null",
            "label",
            "outlier",
            "near_duplicate",
            "non_iid",
        ],
    }
    if task not in default_issue_types_dict:
        task = Task.CLASSIFICATION
    default_issue_types = default_issue_types_dict[task]
    return default_issue_types