a
    h2                     @   s   d Z ddlZddlmZ ddlZddlmZ ddl	m
Z
mZmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlm Z m!Z! G dd deee
Z"dS )z!
Nearest Centroid Classification
    N)Real)sparse   )BaseEstimatorClassifierMixin_fit_context)#DiscriminantAnalysisPredictionMixin)pairwise_distancespairwise_distances_argmin)LabelEncoder)get_tags)available_if)Interval
StrOptions)check_classification_targets)csc_median_axis_0)check_is_fittedvalidate_datac                       s   e Zd ZU dZeddhgeedddddgded	d
hgdZee	d< ddd
dddZ
edddd Z fddZdd Zdd ZeeejZeeejZeeejZ fddZ  ZS )NearestCentroida  Nearest centroid classifier.

    Each class is represented by its centroid, with test samples classified to
    the class with the nearest centroid.

    Read more in the :ref:`User Guide <nearest_centroid_classifier>`.

    Parameters
    ----------
    metric : {"euclidean", "manhattan"}, default="euclidean"
        Metric to use for distance computation.

        If `metric="euclidean"`, the centroid for the samples corresponding to each
        class is the arithmetic mean, which minimizes the sum of squared L1 distances.
        If `metric="manhattan"`, the centroid is the feature-wise median, which
        minimizes the sum of L1 distances.

        .. versionchanged:: 1.5
            All metrics but `"euclidean"` and `"manhattan"` were deprecated and
            now raise an error.

        .. versionchanged:: 0.19
            `metric='precomputed'` was deprecated and now raises an error

    shrink_threshold : float, default=None
        Threshold for shrinking centroids to remove features.

    priors : {"uniform", "empirical"} or array-like of shape (n_classes,),         default="uniform"
        The class prior probabilities. By default, the class proportions are
        inferred from the training data.

        .. versionadded:: 1.6

    Attributes
    ----------
    centroids_ : array-like of shape (n_classes, n_features)
        Centroid of each class.

    classes_ : array of shape (n_classes,)
        The unique classes labels.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    deviations_ : ndarray of shape (n_classes, n_features)
        Deviations (or shrinkages) of the centroids of each class from the
        overall centroid. Equal to eq. (18.4) if `shrink_threshold=None`,
        else (18.5) p. 653 of [2]. Can be used to identify features used
        for classification.

        .. versionadded:: 1.6

    within_class_std_dev_ : ndarray of shape (n_features,)
        Pooled or within-class standard deviation of input data.

        .. versionadded:: 1.6

    class_prior_ : ndarray of shape (n_classes,)
        The class prior probabilities.

        .. versionadded:: 1.6

    See Also
    --------
    KNeighborsClassifier : Nearest neighbors classifier.

    Notes
    -----
    When used for text classification with tf-idf vectors, this classifier is
    also known as the Rocchio classifier.

    References
    ----------
    [1] Tibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of
    multiple cancer types by shrunken centroids of gene expression. Proceedings
    of the National Academy of Sciences of the United States of America,
    99(10), 6567-6572. The National Academy of Sciences.

    [2] Hastie, T., Tibshirani, R., Friedman, J. (2009). The Elements of Statistical
    Learning Data Mining, Inference, and Prediction. 2nd Edition. New York, Springer.

    Examples
    --------
    >>> from sklearn.neighbors import NearestCentroid
    >>> import numpy as np
    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    >>> y = np.array([1, 1, 1, 2, 2, 2])
    >>> clf = NearestCentroid()
    >>> clf.fit(X, y)
    NearestCentroid()
    >>> print(clf.predict([[-0.8, -1]]))
    [1]
    	manhattan	euclideanr   NZneither)closedz
array-like	empiricaluniformmetricshrink_thresholdpriors_parameter_constraints)r   r   c                C   s   || _ || _|| _d S )Nr   )selfr   r   r    r    _/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/neighbors/_nearest_centroid.py__init__   s    zNearestCentroid.__init__T)Zprefer_skip_nested_validationc                 C   s  | j dkr"t| ||dgd\}}n.t| jjr2dnd}t| |||ddgd\}}t|}t| |j\}}t	 }|
|}|j | _}	|	j}
|
dk rtd	|
 | jd
krtj|dd\}}t|tt| | _n0| jdkrtd|
 g|
 | _nt| j| _| jdk  r tdt| j dsRtdt | j| j  | _tj|
|ftjd| _t|
}t |
D ]}||k}t|||< |rt!|d }| j dkr|stj"|| dd| j|< nt#|| | j|< n|| j$dd| j|< qztj%|| j|  ddd }tj%t&|jdd||
  dd| _'t| j'dkr^td d}|rt(|j)dd|j*dd + dkrt|n(|st(tj,|dddkrt||j$dd}t&d| d|  }| j't"| j' }|-t|d}|| }tj%| j| | dd| _.| j/rt0| j.}t1| j.| j/ | _.tj2| j.dd| j.d |  j.|9  _.|| j. }tj%|| dd| _| S )a0  
        Fit the NearestCentroid model according to the given training data.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples and
            `n_features` is the number of features.
            Note that centroid shrinking cannot be used with sparse matrices.
        y : array-like of shape (n_samples,)
            Target values.

        Returns
        -------
        self : object
            Fitted estimator.
        r   Zcsc)accept_sparse	allow-nanTcsr)ensure_all_finiter#   r   z>The number of classes has to be greater than one; got %d classr   )Zreturn_inverser      r   zpriors must be non-negativeg      ?zAThe priors do not sum to 1. Normalizing such that it sums to one.dtype)ZaxisF)copyzself.within_class_std_dev_ has at least 1 zero standard deviation.Inputs within the same classes for at least 1 feature are identical.z2All features have zero variance. Division by zero.N)out)3r   r   r   
input_tags	allow_nanspissparser   shaper   Zfit_transformclasses_size
ValueErrorr   npuniqueZbincountfloatlenclass_prior_ZasarrayanyisclosesumwarningswarnUserWarningemptyfloat64
centroids_ZzerosrangewhereZmedianr   meanarraysqrtwithin_class_std_dev_allmaxminZtoarrayZptpZreshapeZdeviations_r   signabsZclip)r   Xyr&   Zis_X_sparseZ	n_samplesZ
n_featuresleZy_indclassesZ	n_classes_Zclass_countsZnkZ	cur_classZcenter_maskZvarianceerr_msgZdataset_centroid_msmmmsZsignsZmsdr    r    r!   fit   s    







.
 
zNearestCentroid.fitc                    sr   t |  t| jdt| j  rbt| jj	r4dnd}t
| ||ddd}| jt|| j| jd S t |S dS )	a  Perform classification on an array of test vectors `X`.

        The predicted class `C` for each sample in `X` is returned.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Input data.

        Returns
        -------
        y_pred : ndarray of shape (n_samples,)
            The predicted classes.
        r'   r$   Tr%   F)r&   r#   resetr   N)r   r4   r:   r8   r7   r1   rH   r   r,   r-   r   r
   rA   r   superpredict)r   rM   r&   	__class__r    r!   r[     s    zNearestCentroid.predictc              	   C   s   t | d t| |dddtjd}tj|jd | jjftjd}| jdk}|d d |f  | j|   < | j	
 }|d d |f  | j|   < t| jjD ]R}t|||g | jd }|d	C }t| d
t| j|   |d d |f< q|S )NrA   TFr%   )r*   rX   r#   r)   r   r(   rY   r   g       @)r   r   r4   r@   r?   r0   r1   r2   rG   rA   r*   rB   r	   r   ZravelZsqueezelogr8   )r   rM   ZX_normalizedZdiscriminant_scoremaskZcentroids_normalizedZ	class_idxZ	distancesr    r    r!   _decision_function8  s(    



z"NearestCentroid._decision_functionc                 C   s
   | j dkS )Nr   rY   )r   r    r    r!   _check_euclidean_metricT  s    z'NearestCentroid._check_euclidean_metricc                    s$   t   }| jdk|j_d|j_|S )NZnan_euclideanT)rZ   __sklearn_tags__r   r,   r-   r   )r   tagsr\   r    r!   rb   c  s    
z NearestCentroid.__sklearn_tags__)r   )__name__
__module____qualname____doc__r   r   r   r   dict__annotations__r"   r   rW   r[   r`   ra   r   r   Zdecision_functionZpredict_probaZpredict_log_probarb   __classcell__r    r    r\   r!   r      s2   
h 
}"r   )#rg   r<   numbersr   numpyr4   Zscipyr   r.   baser   r   r   Zdiscriminant_analysisr   Zmetrics.pairwiser	   r
   Zpreprocessingr   utilsr   Zutils._available_ifr   Zutils._param_validationr   r   Zutils.multiclassr   Zutils.sparsefuncsr   Zutils.validationr   r   r   r    r    r    r!   <module>   s"   
