a
    hr                     @   sZ	  d Z ddlZddlZddlmZ ddlZddlZddlm	Z
 ddlmZ ddlmZmZmZmZ ddlmZmZmZmZmZmZmZ ddlmZmZ dd	lmZ dd
lm Z  ddl!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 e2g dg dg dgZ3dZ4e3j5\Z6Z7ee4e3ddd\Z8Z9dd e/D Z:ej2ge/ Z;e<e:dkrzg dnddgZ=ej>j?de;e=d ej>?d!d"d#gej>?d$ej@ejAgd%d& ZBej>j?de;e=d ej>?d!d"d#gd'd( ZCej>j?de;e=d d)d* ZDej>?d+d,d-gej>j?de;e=d ej>?d.g d/d0d1 ZEej>?d2d"d#gd3d4 ZFej>?d5e:d6d7 ZGd8d9 ZHej>j?d:e8ge: e=d ej>j?d;d<d=e3d>d? gg d@d ej>?dAeegdBdC ZIej>j?d;d<d=e3dDd? gg d@d dEdF ZJej>?dGdHdIdJd? dKfdLgej>?dAeegdMdN ZKej>?dAeegdOdP ZLdQdR ZMej>?d2d"d#gej>?d.dSdgdTdU ZNdVdW ZOej>?dAeegdXdY ZPdZd[ ZQej>j?d:e8ge: e=d d\d] ZRd^d_ ZSd`da ZTej>?dbdcddgdedf ZUdgdh ZVdidj ZWej>?dAeegdkdl ZXej>j?de;e=d ej>?dmed"fed#fedfgej>?dnddgdodp ZYej>?d5e:ej>?dAeegdqdr ZZej>?d5e:ej>j?d;d<d=e3gg dsd ej>?dAeegdtdu Z[ej>j?de;e=d ej>?d$ej\ej]gej>?d;d=dvgej>?dAeegdwdx Z^ej>?dAeegdydz Z_ej>?dAeegd{d| Z`d}d~ Zadd Zbej>j?d:e8ge: e=d ej>?dAeegdd Zcej>?d$ej\ej]ej@ejAgej>?dAeegdd Zdej>j?d:e8ge: e=d dd Zedd Zfdd Zgdd Zhej>j?d:e8ge: e=d ej>?dAeegdd Ziej>j?d:e8ge: e=d ej>?dAeegdd Zjdd Zkej>j?de;e=d dd Zlej>?dAeegdd Zmdd Znej>j?de;e=d ej>?d!d"d#gdd Zoej>?d$ej@ejAgej>?dddgdd Zpej>?d$ej@ejAgdd Zqej>?dedfedfgdd Zrej>?dAeegdd Zsej>?dAeegej>?dde4d idfd;e8dd idfd;dd? idfd;e8ddddf idfd;dd? idfgdd Ztej>?dde8dd idfgdd Zuej>?d:e8ge: ej>?d$ejAej@gdd Zvej>?de-e8dddgdd Zwdd Zxdd Zyej>?dd;ezd=iddgddggddfddĄ Z{ej>?dedfedfedfgddɄ Z|ej>?de/dg dd̄ Z}ej>?d;d=d<gdd΄ Z~ej>?d;d=d<gddЄ Zej>j?de;e=d ej>?d2d"d#gdd҄ ZdS )zTesting for K-means    N)StringIO)sparse)clone)KMeansMiniBatchKMeansk_meanskmeans_plusplus)_euclidean_dense_dense_wrapper_euclidean_sparse_dense_wrapper_inertia_dense_inertia_sparse_is_same_clustering_relocate_empty_clusters_dense_relocate_empty_clusters_sparse)_labels_inertia_mini_batch_step)
make_blobs)ConvergenceWarning)pairwise_distancespairwise_distances_argmin)v_measure_score)euclidean_distances)assert_allcloseassert_array_equalcreate_memmap_backed_data)	row_norms)CSR_CONTAINERS)_get_threadpool_controller)              @r   r   r   )      ?r    g      @r   r   )r    r   r   r   r    d   r    *   )	n_samplescentersZcluster_stdrandom_statec                 C   s   g | ]}|t qS  )X).0	containerr&   r&   ^/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/cluster/tests/test_k_means.py
<listcomp>2       r+      )densesparse_matrixZsparse_arrayr.   r/   array_constr)ZidsalgoZlloydelkandtypec                 C   s   | ddgddgddgddgg|d}g d}t jddgddgg|d}g d}d}t jddgd	dgg|d}d
}	td
d||d}
|
j||d t|
j| t|
j| t|
j| |
j	|	ksJ d S )Nr         ?   r3   )   r5   r5   r7   r   r   r5   r5   g      ?g      ?g      ?r-   
n_clustersn_initinit	algorithmsample_weight)
nparrayr   fitr   labels_r   inertia_cluster_centers_n_iter_)r0   r1   r3   r'   r?   init_centersexpected_labelsexpected_inertiaexpected_centersexpected_n_iterkmeansr&   r&   r*   test_kmeans_results;   s    $rM   c           	      C   s   | ddgddgddgddgg}t ddgddgg}tdd||d}|| d}d}t|j| |j|ksrJ z4g d}ddgd	dgg}t|j| t|j	| W nB t
y   g d
}d	dgddgg}t|j| t|j	| Y n0 d S )Nr   r4   r5   r7   r-   r9   g      ?r8   g      ?)r5   r5   r   r   r    r   )r@   rA   r   rB   r   rD   rF   r   rC   rE   AssertionError)	r0   r1   r'   rG   rL   rK   rI   rH   rJ   r&   r&   r*   test_kmeans_relocated_clustersS   s$     
rO   c              	   C   s   t g ddd}| |}t d}t g ddd}t g ddd}t g d}t jdt jd}| t ju rt|||||| nt|j|j	|j
||||| t|g d	 t|d
gdgdgg d S )N)
      $g      #ig      !ir5   	         #@
   rQ   r5   rT   )rP   rU   )g     0rU   rU   )g      $@r   r   r6   )   r5   r5   irS   )r@   rA   reshapeoneszerosint32r   r   dataindicesZindptrr   r   )r0   r'   r?   centers_oldcenters_newZweight_in_clusterslabelsr&   r&   r*   test_relocate_empty_clustersr   s.    

r`   distributionnormalZblobstol){Gz?g:0yE>g0.++r   c           	      C   s   t j|}| dkr"|jdd}nt|d\}}d||dk < ||}td|d|d}td	d|d|d
}|| || t|j|j t	|j
|j
 |j|jksJ |jtj|jddksJ d S )Nrb   i  rT   sizer%   r      r5   )r:   r%   r;   rc   r2   )r=   r:   r%   r;   rc   ư>)rel)r@   randomRandomStaterb   r   r   rB   r   rE   r   rC   rF   rD   pytestZapprox)	ra   r0   rc   global_random_seedrndr'   _Zkm_lloydZkm_elkanr&   r&   r*   test_kmeans_elkan_results   s(    

rr   r=   c                 C   sH   t j|}|jdd}d}t| d|dd|d|}|j|k sDJ d S )Nre   rf   i,  ri   r5   r   )r=   r:   r%   r;   rc   max_iter)r@   rl   rm   rb   r   rB   rF   )r=   ro   rp   r'   rs   kmr&   r&   r*   test_kmeans_convergence   s    	ru   X_csrc              	   C   sv  t j|}t|jtjd }| }t |}t |}t j|jd t	j
d}t j|jd t	j
d}t jt	jd t	j
d}	t	d d }
| d d }|	d d }t|
||||t j|dd}|dksJ t|
||\}}|dksJ ||k sJ t|||||t j|dd}|dksJ t|||\}}|dks<J ||k sJJ t|| t|| t|| t|| d S )Nrf   r   r6   rT   F)random_reassignr   )r@   rl   rm   r$   rb   shapecopyZ
zeros_likerY   r'   r3   rX   r   r   r   r   )rv   ro   rngr]   Zcenters_old_csrr^   Zcenters_new_csrZweight_sumsZweight_sums_csrr?   ZX_mbZX_mb_csrZsample_weight_mbZold_inertiar_   Znew_inertiaZold_inertia_csrZ
labels_csrZnew_inertia_csrr&   r&   r*   !test_minibatch_update_consistency   sV    


	
	


r{   c                 C   sX   | j }|jttfksJ | j}t|jd tks6J ttt	|d | j
dksTJ d S )Nr   r    r   )rE   rx   r:   
n_featuresrC   r@   uniquer   r   true_labelsrD   )rt   r$   r_   r&   r&   r*   _check_fitted_model  s    r   
input_datar<   rl   	k-means++c                 C   s   t S Nr$   r'   kr%   r&   r&   r*   <lambda>"  r,   r   )rl   r   ndarraycallable	Estimatorc                 C   s4   t |trdnd}| |td|d|}t| d S )NrT   r5   r"   r<   r:   r%   r;   )
isinstancestrr:   rB   r   )r   r   r<   r;   rt   r&   r&   r*   test_all_init  s    r   c                 C   s   t S r   r   r   r&   r&   r*   r   1  r,   c                 C   sF   t | trdnd}t| td|d}tdD ]}|t q*t| d S )NrT   r5   r   r   r!   )r   r   r   r:   rangepartial_fitr'   r   )r<   r;   rt   ir&   r&   r*   &test_minibatch_kmeans_partial_fit_init/  s    r   zinit, expected_n_init)r   r5   )rl   defaultc                 C   s   |j || jd fdS )Nr5   rf   )uniformrx   )r'   r:   r%   r&   r&   r*   r   F  s   r   )
array-liker5   c                 C   sl   d\}}}t j||}|dkr.t j||}|dkrF| tu rBdnd}| ||dd|}|j|kshJ dS )	zCheck that `n_init="auto"` chooses the right number of initializations.
    Non-regression test for #26657:
    https://github.com/scikit-learn/scikit-learn/pull/26657
    )r!   rT   ri   r   r   r7   rT   autor:   r<   r;   N)r@   rl   randnr   rB   _n_init)r   r<   Zexpected_n_initZn_sampler|   r:   r'   rL   r&   r&   r*   ,test_kmeans_init_auto_with_initial_centroids@  s    
r   c                 C   s`   t t}t t}| ttd|dt}| t|d|d|}t|j|j t|j	|j	 d S )Nr5   r:   r<   r;   r%   )
r@   asfortranarrayr'   r$   r:   rB   r   rE   r   rC   )r   ro   	X_fortrancenters_fortranZkm_cZkm_fr&   r&   r*   test_fortran_aligned_data_  s"    

r   c                  C   s>   t tddd} tj}t t_z| t W |t_n|t_0 d S )Nr"   r5   )r:   r%   verbose)r   r:   sysstdoutr   rB   r'   )rt   Z
old_stdoutr&   r&   r*   test_minibatch_kmeans_verboser  s    r   rd   c              	   C   s   t jdjdd}t| tddd|dd| | }t	d|j
sJJ t	d	|j
s\J |dkrxt	d
|j
sJ nt	d|j
sJ d S )Nr   re   rf   r"   rl   r5   )r=   r:   r%   r<   r;   rc   r   zInitialization completezIteration [0-9]+, inertiazstrict convergencez center shift .* within tolerance)r@   rl   rm   rb   r   r:   rB   
readouterrresearchout)r=   rc   capsysr'   capturedr&   r&   r*   test_kmeans_verbose}  s$    
r   c                   C   sD   t jtdd" tdddt W d    n1 s60    Y  d S )Nz,init_size.* should be larger than n_clustersmatchrT      )	init_sizer:   )rn   warnsRuntimeWarningr   rB   r'   r&   r&   r&   r*   'test_minibatch_kmeans_warning_init_size  s    r   c                 C   sF   t jtdd$ | ttddt W d    n1 s80    Y  d S )NzAExplicit initial center position passed: performing only one initr   rT   r<   r:   r;   )rn   r   r   r$   r:   rB   r'   )r   r&   r&   r*   'test_warning_n_init_precomputed_centers  s
    r   c                 C   s   t dd| d\}}d|d d dd d f< tdd| dd	|}|jjd
d }|dkshJ d|dtdd| dd	|}|jjd
d }|dksJ d|dtd| dd}tdD ]}|| q|jjd
d }|dksJ d|dd S )Nr!   ri   )r#   r$   r%   r   r-   r   rT   rl   )r:   
batch_sizer%   r<   r5   ZaxisrR   znum_non_zero_clusters=z is too small   )r:   r%   r<   )r   r   rB   rE   anysumr   r   )ro   Zzeroed_Xr~   rt   Znum_non_zero_clustersr   r&   r&   r*    test_minibatch_sensible_reassign  s.    
r   c              
   C   s   t ttf}ttD ]}tt|k jdd||< qt t	}t 
|}t| ||dd  }t| |||t tt j|ddd t| ||dd  }||ksJ t| |||t tt j|ddd t|| d S )Nr   r   r5   T)rw   Zreassignment_ratiogV瞯<)r@   emptyr:   r|   r   r'   r~   meanrX   r#   Z
empty_liker   r   rY   rl   rm   r   )r   ro   Zperfect_centersr   r?   r^   Zscore_beforeZscore_afterr&   r&   r*   test_minibatch_reassign  s:    



r   c                   C   s   t ddtdddt d S )Nr!   rT   r"   T)r:   r   r   r%   r   )r   r#   rB   r'   r&   r&   r&   r*   &test_minibatch_with_many_reassignments  s    r   c                  C   sp   t ddddt} | jdks"J t ddddt} | jdksDJ t dddtd dt} | jtkslJ d S )NrT   ri   r5   )r:   r   r;         )r:   r   r;   r   )r   rB   r'   Z
_init_sizer#   rt   r&   r&   r*   test_minibatch_kmeans_init_size  s    r   ztol, max_no_improvement)-C6?N)r   rT   c                 C   s   t dddd\}}}td|d|dddd|d	}|| d|j  k rNdk sTn J |  }|d u rrd	|jv srJ |dkrd
|jv sJ d S )Nr7   r   T)r$   r%   Zreturn_centersr   rT   r5   )	r:   r<   r   rc   r%   rs   r;   r   max_no_improvementz Converged (small centers change)z*Converged (lack of improvement in inertia))r   r   rB   rF   r   r   )r   rc   r   r'   rq   r$   rt   r   r&   r&   r*   #test_minibatch_declared_convergence  s&    
r   c                  C   s   d} t jd }td| ddt }|jt|j|  | ks@J t|jt	sPJ td| ddd ddt }|jdksxJ |jd| |  ksJ t|jt	sJ d S )Nr   r   r7   )r:   r   r%   rT   )r:   r   r%   rc   r   rs   )
r'   rx   r   rB   rF   r@   ceilZn_steps_r   int)r   r#   rt   r&   r&   r*   test_minibatch_iter_steps;  s$    
	r   c                  C   s6   t  } tdtdd}||  t| t| t  d S )NFr"   )Zcopy_xr:   r%   )r'   ry   r   r:   rB   r   r   )Zmy_Xrt   r&   r&   r*   test_kmeans_copyxT  s
    
r   c                 C   s`   t j|dd}| d|dd}|||}| d|dd}|||}||ks\J d S )Nr!   rT   r5   )r;   r%   rs   )r@   rl   rm   r   rB   Zscore)r   ro   r'   km1s1km2s2r&   r&   r*   test_score_max_iter_  s    r   zEstimator, algorithmrs   c                 C   s   t ddd|d\}}|||d}| ddd||d}|d urF|j|d || |j}	||}
t|
|	 ||}
t|
|	 ||j}
t|
t	d d S )Nr   rT   r#   r|   r$   r%   r6   rl   )r:   r<   r;   rs   r%   )r=   )
r   
set_paramsrB   rC   predictr   fit_predictrE   r@   Zarange)r   r=   r0   rs   Zglobal_dtypero   r'   rq   rt   r_   predr&   r&   r*   test_kmeans_predictl  s*    





r   c                 C   sl   t j|tf}| t|dd}|jt|d | t|dd}|j||d t|j	|j	 t
|j|j d S Nr5   r:   r%   r;   r>   )r@   rl   rm   Zrandom_sampler#   r:   rB   r'   r   rC   r   rE   )r   rv   ro   r?   Zkm_denseZ	km_sparser&   r&   r*   test_dense_sparse  s    r   )rl   r   r   c                 C   s^   t |trdnd}| t||dd}|| t|t|j |t t|||j d S )NrT   r5   r   r   )r   r   r:   rB   r   r   r'   rC   )r   r<   rv   r;   rt   r&   r&   r*   test_predict_dense_sparse  s    

r   r   c           
   	   C   s   t ddgddgddgddgddgddgg}|||d	}|d
krFdnd}|d
kr^|d d n|}| d|||d}| tu r|jdd || |jjt jksJ g d}	tt	|j
|	d | tu rt||}|jjt jksJ d S )Nr   rT      rR   rQ   r5   r-   rV   r6   r   r   )r   )r   r5   r5   r   r   r5   r    )r@   rA   r   r   rB   rE   r3   float64r   r   rC   r   r   )
r   r0   r3   r<   ro   X_denser'   r;   rt   rH   r&   r&   r*   test_integer_input  s     .
r   c                 C   sb   | t |dt}||j}t|t|j t| t	
t  |t}t|tt|j d S )Nr:   r%   )r:   rB   r'   	transformrE   r   r   r   Zdiagonalr@   rY   )r   ro   rt   Xtr&   r&   r*   test_transform  s    
r   c                 C   s8   | |dd tt}| |ddt}t|| d S )Nr5   )r%   r;   )rB   r'   r   Zfit_transformr   )r   ro   ZX1ZX2r&   r&   r*   test_fit_transform  s    r   c                 C   s:   t j}dD ]*}ttd|| ddt}|j|ks
J q
d S )N)r5   ri   rT   rl   r5   )r:   r<   r;   r%   rs   )r@   infr   r:   rB   r'   rD   )ro   Zprevious_inertiar;   rt   r&   r&   r*   test_n_init  s    r   c                 C   s`   t ttd | d\}}}|jttfks(J t|jd tks@J ttt	|d |dks\J d S )N)r:   r?   r%   r   r    r   )
r   r'   r:   rx   r|   r@   r}   r   r   r~   )ro   Zcluster_centersr_   inertiar&   r&   r*   test_k_means_function  s    r   c           
      C   s0  | d|d}i }i }i }i }t jt jfD ]}|j|dd}	||	 |j||< ||	||< |j||< |j||< |jj	|ksJ | t
u r(||	dd  |jj	|ks(J q(t|t j |t j dd t|t j |t j |t j  d d	 t|t j |t j |t j  d d	 t|t j |t j  d S )
Nr5   )r;   r%   Fry   r   r7   r   rtol)Zatol)r@   r   float32astyperB   rD   r   rE   rC   r3   r   r   r   maxr   )
r   r   ro   rt   r   r   r$   r_   r3   r'   r&   r&   r*   test_float_precision  s,    



( r   c                 C   sJ   t j|dd}tj|dd}| |tdd}|| t|j|rFJ d S )NFr   r5   r   )r'   r   r$   r:   rB   r@   Zmay_share_memoryrE   )r   r3   Z
X_new_typeZcenters_new_typert   r&   r&   r*   test_centers_not_mutated=  s
    
r   c                 C   s8   t td| }t t|jdd| }t|j|j d S )N)r:   r5   r   )r   r:   rB   rE   r   )r   r   r   r&   r&   r*   test_kmeans_init_fitted_centersL  s
    r   c                 C   s   t ddgddgddgddgg}td| d}d}tjt|d4 || t|jtt	dksfJ W d    n1 sz0    Y  d S )Nr   r5      r   zmNumber of distinct clusters \(3\) found smaller than n_clusters \(4\). Possibly due to duplicate points in X.r   r7   )
r@   asarrayr   rn   r   r   rB   setrC   r   )ro   r'   rt   msgr&   r&   r*   1test_kmeans_warns_less_centers_than_unique_points\  s    "
r   c                 C   s   t j| ddS Nr   r   )r@   sortr   r&   r&   r*   _sort_centersm  s    r   c                 C   s   t j| jddtd}t jt|dd}ttdt	| d}t
|jt|d}t |j|}t
||}t|j| t|j|j tt|jt|j d S )Nr5   ri   rf   r   r   )r<   r;   r:   r%   r>   )r@   rl   rm   randintr#   repeatr'   r   r$   r:   r   rB   rC   r   r   rD   r   rE   )ro   r?   ZX_repeatrt   Zkm_weightedZrepeated_labelsZkm_repeatedr&   r&   r*   test_weighted_vs_repeatedq  s     r   c                 C   s\   t t}| t|dd}t|j|d d}t|j||d}t|j|j t|j	|j	 d S r   )
r@   rX   r#   r:   r   rB   r   rC   r   rE   )r   r   ro   r?   rt   Zkm_noneZkm_onesr&   r&   r*   test_unit_weights_vs_no_weights  s    	
r   c                 C   sj   t j|jtd}| t|dd}t|j||d}t|j|d| d}t|j	|j	 t
|j|j d S )Nrf   r5   r   r>   r4   )r@   rl   rm   r   r#   r:   r   rB   r   rC   r   rE   )r   r   ro   r?   rt   Zkm_origZ	km_scaledr&   r&   r*   test_scaled_weights  s    	r   c                  C   s$   t dddt} | jdks J d S )Nr2   r5   )r=   rs   )r   rB   r'   rF   r   r&   r&   r*    test_kmeans_elkan_iter_attribute  s    r   c                 C   st   | dgdgg}ddg}t dgdgg}td|dd}|j||d tt|jdks\J t|jdgdgg d S )	NrQ   r5   gffffff?g?rT   r-   r   r>   )	r@   rA   r   rB   lenr   rC   r   rE   )r0   r'   r?   r<   rt   r&   r&   r*   #test_kmeans_empty_cluster_relocated  s    r   c                 C   s   t j|}|jdd}t jddd$ | t|d|j}W d    n1 sR0    Y  t jddd$ | t|d|j}W d    n1 s0    Y  t	|| d S )N)2   rT   rf   r5   Zopenmp)ZlimitsZuser_apir   r-   )
r@   rl   rm   rb   r   limitr:   rB   rC   r   )r   ro   rp   r'   Zresult_1Zresult_2r&   r&   r*   #test_result_equal_in_diff_n_threads  s      r   c                   C   sD   t jtdd" tdddt W d    n1 s60    Y  d S )Nz9algorithm='elkan' doesn't make sense for a single clusterr   r5   r2   )r:   r=   )rn   r   r   r   rB   r'   r&   r&   r&   r*   test_warning_elkan_1_cluster  s
    r   c                 C   sz   t j|jdd}|d d }| |}dd }|||\}}tdd||dd|}|j}	|j}
t||	 t	||
 d S )N)r!   ri   rf   ri   c                 S   sP   |  }t| |}t|jd D ]}| ||k jdd||< q t| |}||fS r   )ry   r   r   rx   r   )r'   r<   Znew_centersr_   labelr&   r&   r*   	py_kmeans  s    

z+test_k_means_1_iteration.<locals>.py_kmeansr5   )r:   r;   r<   r=   rs   )
r@   rl   rm   r   r   rB   rC   rE   r   r   )r0   r1   ro   r'   rG   r  Z	py_labelsZ
py_centersZ	cy_kmeansZ	cy_labelsZ
cy_centersr&   r&   r*   test_k_means_1_iteration  s    

r  squaredTFc                 C   s   t j|}tjdddd|| d}| d}|dj| dd}|d	  }|| d	  }|rh|nt 	|}t
|||}	t|j|j|||}
| t jkrd
nd}t|	|
|d t|	||d t|
||d d S )Nr5   r!   r4   csrZdensityformatr%   r3   rQ   Fr   r-   r   gHz>r   )r@   rl   rm   sptoarrayrW   r   r   r   sqrtr	   r
   r[   r\   r   r   )r3   r  ro   rz   Za_sparseZa_densebZb_squared_normexpectedZdistance_dense_denseZdistance_sparse_denser   r&   r&   r*   test_euclidean_distance  s"    r  c                 C   s|  t j|}tjdddd|| d}| }|dj| dd}|ddj| dd}|jddt jd	}|||  d
 j	dd}t 	|| }	t
||||dd}
t||||dd}| t jkrdnd}t|
||d t|
|	|d t||	|d d}||k}|| ||  d
 j	dd}t 	|||  }	t
||||d|d}
t||||d|d}t|
||d t|
|	|d t||	|d d S )Nr!   rT   r4   r  r  Fr   ri   )rg   r3   r-   r5   r   )	n_threadsr   rj   r   )r  Zsingle_label)r@   rl   rm   r  r	  r   r   r   rZ   r   r   r   r   r   )r3   ro   rz   ZX_sparser   r?   r$   r_   Z	distancesr  Zinertia_denseZinertia_sparser   r  maskr&   r&   r*   test_inertia  s>    
r  zKlass, default_n_initrT   r7   c                 C   s\   | ddd}| t |jdks$J | ddd}| t | jdkrP|jdksXndsXJ d S )	Nr   r   )r;   r<   r5   rl   r   rT   r7   )rB   r'   r   __name__)KlassZdefault_n_initZestr&   r&   r*   test_n_init_autoI  s    

r  c                 C   sR   t dgdgdgg}t g d}| dddj||d t|t g d d S )Nr5   r-   r   )r4   g?g333333?r   r   r>   )r@   rA   rB   r   )r   r'   r?   r&   r&   r*   test_sample_weight_unchangedT  s    r  zparam, matchr:   r5   z#n_samples.* should be >= n_clusterszIThe shape of the initial centers .* does not match the number of clustersc                 C   s   | d d S )Nr-   r&   ZX_r   r%   r&   r&   r*   r   i  r,   rV   zUThe shape of the initial centers .* does not match the number of features of the datac                 C   s   | d dd df S )NrV   r-   r&   r  r&   r&   r*   r   s  r,   c                 C   sR   | dd}t jt|d& |jf i |t W d    n1 sD0    Y  d S )Nr5   )r;   r   )rn   raises
ValueErrorr   rB   r'   )r   paramr   rt   r&   r&   r*   test_wrong_params^  s    
r  x_squared_normszKThe length of x_squared_norms .* should be equal to the length of n_samplesc                 C   sD   t jt|d" tttfi |  W d    n1 s60    Y  d S )Nr   )rn   r  r  r   r'   r:   )r  r   r&   r&   r*   !test_kmeans_plusplus_wrong_params  s    r  c                 C   s   |  |}t|t|d\}}|jd tks.J |dk s>J ||jd k sTJ |jd tksfJ |jdd|jddk sJ |jdd|jddk sJ tt|  || d S )Nrh   r   r   )	r   r   r:   rx   allr   minr   r'   )r   r3   ro   r[   r$   r\   r&   r&   r*   test_kmeans_plusplus_output  s    

  r  r  c                 C   s$   t tt| d\}}tt| | d S )N)r  )r   r'   r:   r   )r  r$   r\   r&   r&   r*   test_kmeans_plusplus_norms  s    r   c                 C   s<   t tt| d\}}tt}t |t| d\}}t|| d S )Nrh   )r   r'   r:   r@   r   r   )ro   Z	centers_crq   r   r   r&   r&   r*   test_kmeans_plusplus_dataorder  s    

r!  c                  C   sp   t jg dt jd} t| | ds$J t jg dt jd}t| |dsHJ t jg dt jd}t| |drlJ d S )N)r5   r   r   r5   r-   r   r-   r5   r6   r7   )r   r-   r-   r   r5   r-   r5   r   )r5   r   r   r-   r-   r   r-   r5   )r@   rA   rZ   r   )Zlabels1Zlabels2Zlabels3r&   r&   r*   test_is_same_clustering  s    r"  kwargs)r<   r;   c                 C   sH   t jddgddgddgddggt jd}tf ddi| }|| dS )zZCheck that init works with numpy scalar strings.

    Non-regression test for #21964.
    r   r4   r5   r6   r:   r-   N)r@   r   r   r   rB   )r#  r'   Z
clusteringr&   r&   r*   -test_kmeans_with_array_like_or_np_scalar_init  s    (r$  zKlass, methodrB   r   c                    sR   | j   |  }t||t |jjd }| }t fddt|D | dS )z=Check `feature_names_out` for `KMeans` and `MiniBatchKMeans`.r   c                    s   g | ]}  | qS r&   r&   )r(   r   
class_namer&   r*   r+     r,   z*test_feature_names_out.<locals>.<listcomp>N)	r  lowergetattrr'   rE   rx   Zget_feature_names_outr   r   )r  methodrL   r:   Z	names_outr&   r%  r*   test_feature_names_out  s    
r*  csr_containerc                 C   sd   t ddddd\}}| dur$| |}t }||}t|j|_t|j|_||}t|| dS )z_Check that predict does not change cluster centers.

    Non-regression test for gh-24253.
    r   rT   r   r   N)r   r   r   r   rE   rC   r   r   )r+  r'   rq   rL   Zy_pred1Zy_pred2r&   r&   r*   ,test_predict_does_not_change_cluster_centers  s    

r,  c           	   	   C   s   t j|}tddd|d\}}t|dd}t }|j||| |j|jd ddt j|d	}|j||| t 	|jd dt j|d	}t
t t|| W d
   n1 s0    Y  d
S )zCheck that sample weight is used during init.

    `_init_centroids` is shared across all classes inheriting from _BaseKMeans so
    it's enough to check for KMeans.
    r   rT   r   Tr  r   rf   ri   r'   r  r<   r?   Zn_centroidsr%   N)r@   rl   rm   r   r   r   _init_centroidsr   rx   rX   rn   r  rN   r   )	r<   ro   rz   r'   rq   r  rL   clusters_weightedZclustersr&   r&   r*   test_sample_weight_init  s0    


r0  c           
   	   C   s   t j|}tddd|d\}}|j|jd d}d|ddd< t|dd	}t }|j||| |d
t j|d}t	|ddd |}	t 
t |	drJ dS )zCheck that if sample weight is 0, this sample won't be chosen.

    `_init_centroids` is shared across all classes inheriting from _BaseKMeans so
    it's enough to check for KMeans.
    r!   ri   r   r   rf   Nr-   Tr  rT   r-  )r@   rl   rm   r   r   rx   r   r   r.  r   r   isclose)
r<   ro   rz   r'   rq   r?   r  rL   r/  dr&   r&   r*   test_sample_weight_zero'  s$    


r3  c                 C   s   t ddgddgddgddgddgg}td|| d}d}tjt|d ||| W d   n1 sl0    Y  |jdksJ dS )zCheck that kmeans stops when there are more centers than non-duplicate samples

    Non-regression test for issue:
    https://github.com/scikit-learn/scikit-learn/issues/28055
    r   r5   ri   )r:   r<   r=   zENumber of distinct clusters \(4\) found smaller than n_clusters \(5\)r   N)r@   rA   r   rn   r   r   rB   rF   )r=   r0   r'   rt   r   r&   r&   r*   test_relocating_with_duplicatesE  s    (,r4  )__doc__r   r   ior   numpyr@   rn   Zscipyr   r  Zsklearn.baser   Zsklearn.clusterr   r   r   r   Zsklearn.cluster._k_means_commonr	   r
   r   r   r   r   r   Zsklearn.cluster._kmeansr   r   Zsklearn.datasetsr   Zsklearn.exceptionsr   Zsklearn.metricsr   r   Zsklearn.metrics.clusterr   Zsklearn.metrics.pairwiser   Zsklearn.utils._testingr   r   r   Zsklearn.utils.extmathr   Zsklearn.utils.fixesr   Zsklearn.utils.parallelr   rA   r$   r#   rx   r:   r|   r'   r~   ZX_as_any_csrZdata_containersr   Zdata_containers_idsmarkZparametrizer   r   rM   rO   r`   rr   ru   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rZ   Zint64r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r   r!  r"  Zstr_r$  r*  r,  r0  r3  r4  r&   r&   r&   r*   <module>   s  $	



)

?	



!
.

#

$


	
*


	

	


$



!
