a
    hH                  
   @   sr  d dl Z d dlZd dlZd dlmZmZmZ d dlm	Z	 d dl
mZmZmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZmZ d dlmZ d dlmZ d dl m!Z!m"Z" eeeeeeegZ#d	d
 Z$dd Z%dd Z&dd Z'dd Z(dd Z)dd Z*dd Z+dSddZ,dd Z-dd  Z.d!d" Z/d#d$ Z0d%d& Z1ej23d'e d(d) Z4d*d+ Z5d,d- Z6d.d/ Z7dTd1d2Z8d3d4 Z9d5d6 Z:ej23d7d8gd9 g d:fd;gd9 g d:fg d:d8gd9 fg d:d;gd9 fd8gd9 d8gd9 fgd<d= Z;d>d? Z<d@dA Z=dBdC Z>dDdE Z?ej23dFe@eAdGe@eAdGfeBdHeBdHfgdIdJ ZCdKdL ZDdMdN ZEej23dOg dPdQdR ZFdS )U    N)assert_allcloseassert_array_almost_equalassert_array_equal)config_context)adjusted_mutual_info_scoreadjusted_rand_scorecompleteness_scorecontingency_matrixentropyexpected_mutual_informationfowlkes_mallows_score"homogeneity_completeness_v_measurehomogeneity_scoremutual_info_scorenormalized_mutual_info_scorepair_confusion_matrix
rand_scorev_measure_score)_generalized_averagecheck_clusterings)assert_all_finite))yield_namespace_device_dtype_combinations)_array_api_for_testsassert_almost_equalc               	   C   s   t D ]} d}tjt|d" | ddgg d W d    n1 sB0    Y  d}tjt|d* | ddgddggg d W d    n1 s0    Y  d}tjt|d* | g dddgddgg W d    q1 s0    Y  qd S )	NzDFound input variables with inconsistent numbers of samples: \[2, 3\]matchr      )r   r   r   z$labels_true must be 1D: shape is \(2z$labels_pred must be 1D: shape is \(2r   r   r   )score_funcspytestraises
ValueError)
score_funcexpected r$   i/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/metrics/cluster/tests/test_supervised.py"test_error_messages_on_wrong_input(   s    08r&   c                     s   d\ g d}  fdd| D }|d |d   krR|d   krR|d ksXn J d	\fd
d| D }|d |d   kr|d   kr|d ksn J d S )N)r      min	geometric
arithmeticmaxc                    s   g | ]}t  |qS r$   r   .0method)abr$   r%   
<listcomp><       z,test_generalized_average.<locals>.<listcomp>r   r   r'      )   r6   c                    s   g | ]}t  |qS r$   r-   r.   )cdr$   r%   r3   ?   r4   r$   )methodsmeansr$   )r1   r2   r7   r8   r%   test_generalized_average9   s    4r;   c                  C   s  t D ]} | g g tdks J | dgdgtdks<J | g dg dtdks\J | g dg dtdks|J | g dg dtdksJ | g d	g d
tdksJ | g dg dtdksJ qttg}h d}|D ]} |D ] }| g g |dtdks J | dgdg|dtdksBJ | g dg d|dtdkshJ | g dg d|dtdksJ | g dg d|dtdksJ | g d	g d
|dtdksJ | g dg d|dtdksJ qqd S )N      ?r   r   )r   r   r   r   )*      r=   )        r<   r?   )      E@      @r@   )r?   r<          @)r@   rA   rB   )r   r   r'   )r=   r>   r'   >   r)   r*   r+   r,   average_method)r   r   approxr   r   )r"   Zscore_funcs_with_changing_meansr:   meanr$   r$   r%   test_perfect_matchesC   sR        "

"



rG   c                  C   s@   t g dg d\} }}t| dd t|dd t|dd d S )Nr   r   r   r   r   r   )r   r   r   r   r'   r'   r<   r'   gGz?gQ?r   r   hr7   vr$   r$   r%   *test_homogeneous_but_not_complete_labelingf   s    rM   c                  C   s@   t g dg d\} }}t| dd t|dd t|dd d S )Nr   r   r   r   r'   r'   )r   r   r   r   r   r   g(\?r'   r<   g\(\?rI   rJ   r$   r$   r%   *test_complete_but_not_homogeneous_labelingn   s    rO   c                  C   s@   t g dg d\} }}t| dd t|dd t|dd d S )NrH   r   r   r   r   r'   r'   q=
ףp?r'   zG?p=
ף?rI   rJ   r$   r$   r%   .test_not_complete_and_not_homogeneous_labelingv   s    rT   c                  C   s   d} d}d}d|  | | | | |  }t g dg d| d\}}}t||d t||d t||d tg dg d| d}t||d d S )	Ng?rQ   rR   r   rH   rP   )betar'   )r   r   r   )Z	beta_testZh_testZc_testZv_testrK   r7   rL   r$   r$   r%   test_beta_parameter~   s    rV   c                  C   s   t g dg d\} }}t| dd t|dd t|dd t g dg d\} }}t| dd t|dd t|dd tg dg d}tg dg d}t|d	d t|d	d tg dg d}tg dg d}t|d
d t|d
d d S )N)r   r   r   r'   r'   r'   rP   rQ   r'   rR   rS   rH   )r      r   rW   r'   r'   gQ?gQ?)r   r   r   r   )rK   r7   rL   Zari_1Zari_2Zri_1Zri_2r$   r$   r%   test_non_consecutive_labels   s     rX   
   r=   c                 C   sr   t j|j}t t||f}t|D ]D\}}t|D ]2}	|d||d}
|d||d}| |
||||	f< q8q(|S )Nr   )lowhighsize)nprandomRandomStaterandintzeroslen	enumeraterange)r"   	n_samplesZk_rangen_runsseedZrandom_labelsscoresikjlabels_alabels_br$   r$   r%   uniform_labelings_scores   s    rn   c                  C   sD   g d} d}d}t t|| |}t|jdd}t|g dd d S )N)r'   rY   2   Z   d   rY   r   )Zaxis){Gz?Q?rs   rr   r'   )rn   r   r]   absr,   r   )Zn_clusters_rangere   rf   rh   Zmax_abs_scoresr$   r$   r%   test_adjustment_for_chance   s    ru   c            	      C   s   t g d} t g d}t| |}t|dd t| |dd}t| ||d}t|dd t| |}t| ||d}t|dd | }t||}t|dd t| |}t|d	d tg d
g d}|t	dksJ t t
| d g }t t
|d g }t||}t|dd d S )Nr   r   r   r   r   r   r'   r'   r'   r'   r'   r'   r5   r5   r5   r5   r5   r   r   r   r   r'   r   r'   r'   r'   r'   r5   r   r5   r5   r5   r'   r'   gS
cA?   Tsparse)ZcontingencygpUj@?gP1?)r   r   r'   r'   )r'   r'   r5   r5   r<   n   gRQ?r'   )r]   arrayr   r   r	   sumr   r   r   rE   listflatten)	rl   rm   miCre   ZemiZamiZa110Zb110r$   r$   r%   test_adjusted_mutual_info_score   s*    




r   c                   C   s    t tdggddksJ d S )Nip r   )r   r]   r|   r$   r$   r$   r%   "test_expected_mutual_info_overflow   s    r   c                  C   s   t dgd dgd  dgd  dgd  d	gd
  } t dgd dgd  dgd  dgd  dgd  dgd  dgd  dgd  dgd  dgd  }tt| | tt| | d S )Nr   iy  r'   i]<  r5   i  rW   iU  rx   iP  r   i  i	  iD9  i  i     i.  '   i<     )r]   r|   r   r   r   )xyr$   r$   r%   3test_int_overflow_mutual_info_fowlkes_mallows_score   sD    	r   c                   C   s:   t tg ddd t tg d tg ddks6J d S )Nr   r   r@   ,^R^?rx   r   r   r   r   r   r   )r   r
   r$   r$   r$   r%   test_entropy  s    r   z#array_namespace, device, dtype_namec                 C   s   t | |}|jtjg d|d|d}|jg |j|d}|jg d|d}tddJ t|tjdd	d
kspJ t|dksJ t|dksJ W d    n1 s0    Y  d S )Nr   dtype)device)r   r   r   T)Zarray_api_dispatchr   gh㈵>)rt   r   r   )r   Zasarrayr]   Zint32r   r
   r   rE   )Zarray_namespacer   Z
dtype_nameZxpZfloat_labelsZempty_int32_labelsZ
int_labelsr$   r$   r%   test_entropy_array_api  s    
r   c                  C   sx   t g d} t g d}t| |}t j| |t ddt ddfdd }t|| t| |dd}t||d  d S )	Nrv   rw   r   rx   )Zbinsr   g?)eps)r]   r|   r	   Zhistogram2daranger   )rl   rm   r   ZC2r$   r$   r%   test_contingency_matrix  s    
(
r   c                  C   s   t g d} t g d}t| |}t| |dd }t|| tjtdd  t| |ddd W d    n1 sv0    Y  d S )	Nrv   rw   Try   z!Cannot set 'eps' when sparse=Truer   g|=)r   rz   )r]   r|   r	   Ztoarrayr   r   r    r!   )rl   rm   r   ZC_sparser$   r$   r%   test_contingency_matrix_sparse   s    

r   c                  C   s   t dddtD ]} t j| tdt j| td }}t||tdksNJ t	||tdksfJ t
||tdks~J t||tdksJ dD ]<}t
|||dtdksJ t|||dtdksJ qqd S )Nr   rW   r   r?   r(   rC   )r]   logspaceastypeintZonesr   r   r   rE   r   r   )ri   rl   rm   r0   r$   r$   r%   test_exactly_zero_info_score*  s"    r   $   c                 C   s   t dddtD ]x}t j| }|dd||dd| }}tt||dt	|| t
|t
|  d d}tt||t|||d qd S )Nr   rW   r   rY   rB   r+   rC   )r]   r   r   r   r^   r_   r`   r   r   r   r
   r   )rg   ri   Zrandom_staterl   rm   Zavgr$   r$   r%   %test_v_measure_and_mutual_information;  s&    r   c                  C   sb   t g dg d} t| dtd  t g dg d}t|d t g dg d}t|d	 d S )
NrH   rN   g      @g      R@)r   r   r   r   r   r   r<   )r   r   r   r   r   r   )r   r   r'   r5   rW   rx   r?   )r   r   r]   sqrt)ZscoreZperfect_scoreZworst_scorer$   r$   r%   test_fowlkes_mallows_scoreQ  s    
r   c                  C   s   t g d} t g d}dt d }t| |}t|| t|| }t|| t| d d |}t|| t|| d d }t|| d S )N)r   r   r   r   r   r'   )r   r   r'   r'   r   r   r<   g      (@r   r5   r'   )r]   r|   r   r   r   )rl   rm   r#   Zscore_originalZscore_symmetricZscore_permutedZ
score_bothr$   r$   r%   %test_fowlkes_mallows_score_properties_  s    




r   zlabels_true, labels_predr1      )r   r   r   r   r   r   r   c                 C   s   t | |dksJ d S )Nr   )r   )Zlabels_trueZlabels_predr$   r$   r%   .test_mutual_info_score_positive_constant_labelv  s    r   c                  C   sh   t jd} | d}t dddd }d}tjt|d t|| W d    n1 sZ0    Y  d S )Nr=   i  g{Gz?r   gư>zuClustering metrics expects discrete values but received continuous values for label, and continuous values for targetr   )	r]   r^   r_   ZrandZlinspacer   ZwarnsUserWarningr   )rngnoiseZ
wavelengthmsgr$   r$   r%   test_check_clustering_error  s    
r   c                  C   sF   d} t t| }|}t| | d  dgddgg}tt||| d S )Nrq   r   r   )r~   rd   r]   r|   r   r   Nclustering1clustering2r#   r$   r$   r%   *test_pair_confusion_matrix_fully_dispersed  s
    r   c                  C   sF   d} t | f}|}t ddgd| | d  gg}tt||| d S )Nrq   r   r   )r]   ra   r|   r   r   r   r$   r$   r%   )test_pair_confusion_matrix_single_cluster  s
    r   c                     s   d  d } t  fddt D }t  fddt D d |  }t jdt jd}tt|D ]Z}tt|D ]H}||krxt|| || k}t|| || k}|||f  d7  < qxqhtt||| d S )	NrY   r'   c                    s   g | ]}|d  g  qS r   r$   r/   ri   nr$   r%   r3     r4   z.test_pair_confusion_matrix.<locals>.<listcomp>c                    s   g | ]}|d  g d   qS r   r$   r   r   r$   r%   r3     r4   )r'   r'   )shaper   r   )	r]   Zhstackrd   ra   Zint64rb   r   r   r   )r   r   r   r#   ri   rk   Zsame_cluster_1Zsame_cluster_2r$   r   r%   test_pair_confusion_matrix  s    $r   zclustering1, clustering2rq   )rq   c                 C   s   t t| |d d S )Nr<   r   r   )r   r   r$   r$   r%   test_rand_score_edge_cases  s    r   c            	      C   s`   g d} g d}d}d}d}d| | | }|| }|| | | }|| }t t| || d S )NrH   rP   rW      r'      r   )	r   r   ZD11ZD10ZD01ZD00Zexpected_numeratorZexpected_denominatorr#   r$   r$   r%   test_rand_score  s    r   c                  C   sv   t jd} | jdddt jd}| jdddt jd}t & tdt t	|| W d   n1 sh0    Y  dS )zCheck that large amount of data will not lead to overflow in
    `adjusted_rand_score`.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20305
    r   r'   i r   errorN)
r]   r^   r_   r`   Zint8warningscatch_warningssimplefilterRuntimeWarningr   )r   Zy_trueZy_predr$   r$   r%   !test_adjusted_rand_score_overflow  s    
r   rD   )r)   r+   r*   r,   c                 C   sv   dgd }dg|dd  }ddg|dd  }t ||| d}|dksJJ t ||| d}d|  krldk srn J dS )zCheck that nmi returns a score between 0 (included) and 1 (excluded
    for non-perfect match)

    Non-regression test for issue #13836
    r   i  r   Nr'   rC   )r   )rD   Zlabels1Zlabels2Zlabels3Znmir$   r$   r%   )test_normalized_mutual_info_score_bounded  s    
r   )rY   r=   )r   )Gr   numpyr]   r   Znumpy.testingr   r   r   Zsklearn.baser   Zsklearn.metrics.clusterr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   Z#sklearn.metrics.cluster._supervisedr   r   Zsklearn.utilsr   Zsklearn.utils._array_apir   Zsklearn.utils._testingr   r   r   r&   r;   rG   rM   rO   rT   rV   rX   rn   ru   r   r   r   r   markZparametrizer   r   r   r   r   r   r   r   r   r   r   r   r~   rd   ra   r   r   r   r   r$   r$   r$   r%   <module>   s   @
#






		*
