a
    hN                     @   s   d Z ddlZddlmZ ddlmZ ddlZddlm	Z	 ddl
mZ ddlmZ d	d
lmZmZ d	dlmZmZ d#ddZd$ddZeeedZdd Zdd Zdd Zdd Zd%ddZd&ddZd'dd Zd!d" ZdS )(zAUtilities to handle multiclass/multioutput target in classifiers.    N)Sequence)chain)issparse   )get_namespace)VisibleDeprecationWarning   )attach_uniquecached_unique)_assert_all_finitecheck_arrayc                 C   s<   t | |d\}}t| ds|r0t|| |dS t| S d S )Nxp	__array__)r   hasattrr
   asarraysetyr   is_array_api_compliant r   T/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/utils/multiclass.py_unique_multiclass   s    r   c                 C   s.   t | |d\}}|t| dg ddjd S )Nr   r   ZcsrZcscZcoo)
input_nameaccept_sparser   )r   Zaranger   shape)r   r   _r   r   r   _unique_indicator   s    r   )binary
multiclassmultilabel-indicatorc                     s4  t | ddi} t|  \}t| dkr.tdtdd | D }|ddhkrRdh}t|d	krjtd
| | }|dkrttdd | D d	krtdt|d  stdt|  |r	 fdd| D }
|S tt fdd| D }ttdd |D d	kr&tdt|S )a  Extract an ordered array of unique labels.

    We don't allow:
        - mix of multilabel and multiclass (single label) targets
        - mix of label indicator matrix and anything else,
          because there are no explicit labels)
        - mix of label indicator matrices of different sizes
        - mix of string and integer labels

    At the moment, we also don't allow "multiclass-multioutput" input type.

    Parameters
    ----------
    *ys : array-likes
        Label values.

    Returns
    -------
    out : ndarray of shape (n_unique_labels,)
        An ordered array of unique labels.

    Examples
    --------
    >>> from sklearn.utils.multiclass import unique_labels
    >>> unique_labels([3, 5, 5, 5, 7, 7])
    array([3, 5, 7])
    >>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])
    array([1, 2, 3, 4])
    >>> unique_labels([1, 2, 10], [5, 11])
    array([ 1,  2,  5, 10, 11])
    Zreturn_tupleTr   zNo argument has been passed.c                 s   s   | ]}t |V  qd S N)type_of_target).0xr   r   r   	<genexpr>O       z unique_labels.<locals>.<genexpr>r   r    r   z'Mix type of y not allowed, got types %sr!   c                 s   s$   | ]}t |g d djd V  qdS )r   )r   r   N)r   r   r$   r   r   r   r   r&   \   s   zCMulti-label binary indicator input with different numbers of labelsNzUnknown label type: %sc                    s   g | ]} |d qS )r   r   r(   Z_unique_labelsr   r   r   
<listcomp>m   r'   z!unique_labels.<locals>.<listcomp>c                 3   s$   | ]}d d  |dD V  qdS )c                 s   s   | ]
}|V  qd S r"   r   )r$   ir   r   r   r&   q   r'   z*unique_labels.<locals>.<genexpr>.<genexpr>r   Nr   r(   r)   r   r   r&   q   r'   c                 s   s   | ]}t |tV  qd S r"   )
isinstancestr)r$   labelr   r   r   r&   t   r'   z,Mix of label input types (string and number))r	   r   len
ValueErrorr   pop_FN_UNIQUE_LABELSgetreprconcatunique_valuesr   from_iterabler   sorted)Zysr   Zys_typesZ
label_typeZ	unique_ysZ	ys_labelsr   r)   r   unique_labels)   sF     	
r9   c              	   C   s@   t | \}}|| jdo>t|||| |j| j| kS )Nreal floating)r   isdtypedtypeboolallastypeZint64r   r   r   r   _is_integral_floatz   s     r@   c                 C   s  t | \}}t| ds$t| ts$|rtddddddd}t  tdt zt	| fddi|} W nL tt
fy } z0t|d	r t	| fdti|} W Y d}~n
d}~0 0 W d   n1 s0    Y  t| d
r| jdkr| jd dksdS t| rj| jdv r|  } || j}t| jdkph|jdksT|jdkohd|v oh| jjdv pht|S t| |d}|jd dk o|| jdpt|S dS )a~  Check if ``y`` is in a multilabel format.

    Parameters
    ----------
    y : ndarray of shape (n_samples,)
        Target values.

    Returns
    -------
    out : bool
        Return ``True``, if ``y`` is in a multilabel format, else ```False``.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.utils.multiclass import is_multilabel
    >>> is_multilabel([0, 1, 0, 1])
    False
    >>> is_multilabel([[1], [0, 2], []])
    False
    >>> is_multilabel(np.array([[1, 0], [0, 0]]))
    True
    >>> is_multilabel(np.array([[1], [0], [0]]))
    False
    >>> is_multilabel(np.array([[1, 0, 0]]))
    True
    r   TFr   r   Zallow_ndZensure_all_finiteZ	ensure_2dZensure_min_samplesZensure_min_featureserrorr<   NComplex data not supportedr   r   r   )ZdokZlilZbiur      )r=   zsigned integerzunsigned integer)r   r   r,   r   dictwarningscatch_warningssimplefilterr   r   r0   r-   
startswithobjectndimr   r   formatZtocsrr6   datar/   sizer<   kindr@   r
   r;   )r   r   r   check_y_kwargselabelsr   r   r   is_multilabel   sB    
H"
"rS   c                 C   s(   t | dd}|dvr$td| ddS )aA  Ensure that target y is of a non-regression type.

    Only the following target types (as defined in type_of_target) are allowed:
        'binary', 'multiclass', 'multiclass-multioutput',
        'multilabel-indicator', 'multilabel-sequences'

    Parameters
    ----------
    y : array-like
        Target values.
    r   r   )r   r    zmulticlass-multioutputr!   zmultilabel-sequenceszUnknown label type: zy. Maybe you are trying to fit a classifier, which expects discrete classes on a regression target with continuous values.N)r#   r0   )r   Zy_typer   r   r   check_classification_targets   s
    
rU    Fc                    s  t \}} fdd}tts8ts8tdrDtt pF|}|sXtd jjdv }|rptdt	r|dS t
ddd	d	d
d
d}t  tdt tsztfddi|W nN ttfy }	 z0t|	dr tfdti|W Y d}	~	n
d}	~	0 0 W d   n1 s.0    Y  zltrVd
gddf nd
 }
t|
trvtdt t|
dst|
trt|
tstdW n ty   Y n0 jdvr| S tjsjdkrdS | S tsjtkrtjd
 ts| S jdkr>jd dkr>d}nd}|jdrtrbjn}||||tkrt | d d| S t|
r|
j}
t!jd
 dksЈjdkrt"|
dkrd| S dS dS )a
  Determine the type of data indicated by the target.

    Note that this type is the most specific type that can be inferred.
    For example:

    * ``binary`` is more specific but compatible with ``multiclass``.
    * ``multiclass`` of integers is more specific but compatible with ``continuous``.
    * ``multilabel-indicator`` is more specific but compatible with
      ``multiclass-multioutput``.

    Parameters
    ----------
    y : {array-like, sparse matrix}
        Target values. If a sparse matrix, `y` is expected to be a
        CSR/CSC matrix.

    input_name : str, default=""
        The data name used to construct the error message.

        .. versionadded:: 1.1.0

    raise_unknown : bool, default=False
        If `True`, raise an error when the type of target returned by
        :func:`~sklearn.utils.multiclass.type_of_target` is `"unknown"`.

        .. versionadded:: 1.6

    Returns
    -------
    target_type : str
        One of:

        * 'continuous': `y` is an array-like of floats that are not all
          integers, and is 1d or a column vector.
        * 'continuous-multioutput': `y` is a 2d array of floats that are
          not all integers, and both dimensions are of size > 1.
        * 'binary': `y` contains <= 2 discrete values and is 1d or a column
          vector.
        * 'multiclass': `y` contains more than two discrete values, is not a
          sequence of sequences, and is 1d or a column vector.
        * 'multiclass-multioutput': `y` is a 2d array that contains more
          than two discrete values, is not a sequence of sequences, and both
          dimensions are of size > 1.
        * 'multilabel-indicator': `y` is a label indicator matrix, an array
          of two dimensions with at least two columns, and at most 2 unique
          values.
        * 'unknown': `y` is array-like but none of the above, such as a 3d
          array, sequence of sequences, or an array of non-sequence objects.

    Examples
    --------
    >>> from sklearn.utils.multiclass import type_of_target
    >>> import numpy as np
    >>> type_of_target([0.1, 0.6])
    'continuous'
    >>> type_of_target([1, -1, -1, 1])
    'binary'
    >>> type_of_target(['a', 'b', 'a'])
    'binary'
    >>> type_of_target([1.0, 2.0])
    'binary'
    >>> type_of_target([1, 0, 2])
    'multiclass'
    >>> type_of_target([1.0, 0.0, 3.0])
    'multiclass'
    >>> type_of_target(['a', 'b', 'c'])
    'multiclass'
    >>> type_of_target(np.array([[1, 2], [3, 1]]))
    'multiclass-multioutput'
    >>> type_of_target([[1, 2]])
    'multilabel-indicator'
    >>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))
    'continuous-multioutput'
    >>> type_of_target(np.array([[0, 1], [1, 1]]))
    'multilabel-indicator'
    c                     s.   r& r nd} t d|  dndS dS )zdDepending on the value of raise_unknown, either raise an error or return
        'unknown'.
        rM   zUnknown label type for z: unknownN)r0   )inputr   raise_unknownr   r   r   _raise_or_return4  s    z(type_of_target.<locals>._raise_or_returnr   z:Expected array-like (array or non-string sequence), got %r)ZSparseSeriesZSparseArrayz1y cannot be class 'SparseSeries' or 'SparseArray'r!   TFr   rA   rB   r<   NrC   zSupport for labels represented as bytes is deprecated in v1.5 and will error in v1.7. Convert the labels to a string or integer format.zYou appear to be using a legacy multi-label data representation. Sequence of sequences are no longer supported; use a binary array or sparse matrix instead - the MultiLabelBinarizer transformer can convert to this format.)r   r   r   r   r   z-multioutputrV   r:   rT   Z
continuousr    )#r   r,   r   r   r   r-   r0   	__class____name__rS   rE   rF   rG   rH   r   r   rI   rJ   byteswarnFutureWarning
IndexErrorrK   minr   r<   Zflatr;   rM   anyr?   intr   r
   r/   )r   r   rZ   r   r   r[   ZvalidZsparse_pandasrP   rQ   Zfirst_row_or_valsuffixrM   r   rY   r   r#      s    M
	

J$
(
.r#   c                 C   sr   t | dddu r"|du r"tdnL|durnt | dddur`t| jt|sntd|| jf nt|| _dS dS )a"  Private helper function for factorizing common classes param logic.

    Estimators that implement the ``partial_fit`` API need to be provided with
    the list of possible classes at the first call to partial_fit.

    Subsequent calls to partial_fit should check that ``classes`` is still
    consistent with a previous value of ``clf.classes_`` when provided.

    This function returns True if it detects that this was the first call to
    ``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also
    set on ``clf``.

    classes_Nz8classes must be passed on the first call to partial_fit.zD`classes=%r` is not the same as on last call to partial_fit, was: %rTF)getattrr0   npZarray_equalrf   r9   )Zclfclassesr   r   r   _check_partial_fit_first_call  s    

rj   c                 C   s  g }g }g }| j \}}|dur(t|}t| rp|  } t| j}t|D ]}| j| j| | j|d   }	|dur||	 }
t	|t	|
 }nd}
| j d ||  }tj
| j| j| | j|d   dd\}}tj||
d}d|v r||dk  |7  < d|vr@|| | j d k r@t|dd}t|d|}|| ||j d  |||	   qNnht|D ]^}tj
| dd|f dd\}}|| ||j d  tj||d}|||	   qx|||fS )az  Compute class priors from multioutput-multiclass target data.

    Parameters
    ----------
    y : {array-like, sparse matrix} of size (n_samples, n_outputs)
        The labels for each example.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    Returns
    -------
    classes : list of size n_outputs of ndarray of size (n_classes,)
        List of classes for each column.

    n_classes : list of int of size n_outputs
        Number of classes in each column.

    class_prior : list of size n_outputs of ndarray of size (n_classes,)
        Class distribution of each column.
    Nr   r   T)Zreturn_inverse)weights)r   rh   r   r   ZtocscdiffZindptrrangeindicessumuniquerM   Zbincountinsertappend)r   Zsample_weightri   	n_classesZclass_prior	n_samplesZ	n_outputsZy_nnzkZcol_nonzeroZnz_samp_weightZzeros_samp_weight_sumZ	classes_kZy_kZclass_prior_kr   r   r   class_distribution  sF    






rv   c           
      C   s  | j d }t||f}t||f}d}t|D ]}t|d |D ]}|dd|f  |dd|f 8  < |dd|f  |dd|f 7  < || dd|f dk|f  d7  < || dd|f dk|f  d7  < |d7 }qDq2|dt|d   }	||	 S )ay  Compute a continuous, tie-breaking OvR decision function from OvO.

    It is important to include a continuous value, not only votes,
    to make computing AUC or calibration meaningful.

    Parameters
    ----------
    predictions : array-like of shape (n_samples, n_classifiers)
        Predicted classes for each binary classifier.

    confidences : array-like of shape (n_samples, n_classifiers)
        Decision functions or predicted probabilities for positive class
        for each binary classifier.

    n_classes : int
        Number of classes. n_classifiers must be
        ``n_classes * (n_classes - 1 ) / 2``.
    r   r   NrD   )r   rh   Zzerosrm   abs)
ZpredictionsZconfidencesrs   rt   ZvotesZsum_of_confidencesru   r+   jZtransformed_confidencesr   r   r   _ovr_decision_function  s    
$$$$	ry   )N)N)rV   F)N)N)__doc__rF   collections.abcr   	itertoolsr   numpyrh   Zscipy.sparser   Zutils._array_apir   Zutils.fixesr   _uniquer	   r
   Z
validationr   r   r   r   r2   r9   r@   rS   rU   r#   rj   rv   ry   r   r   r   r   <module>   s0   

QI
 J
#
J