a
    h(                    @   s  d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZ d dlmZ d dlmZmZm Z  d dl!m"Z" d dl#m$Z$m%Z% d dl&m'Z' d dl(m)Z)m*Z*m+Z+m,Z,m-Z- d dl.m/Z/m0Z0m1Z1m2Z2m3Z3 dd Z4dd Z5dd Z6ej78dg dej78de2dd Z9ej78dg ddd Z:ej78dg dd d! Z;ej78dg dej78d"e1d#d$ Z<d%d& Z=d'd( Z>ej78d"e1d)d* Z?ej78d"e1d+d, Z@ej78dd-d.gej78d/deAeBgd0d1 ZCej78dd-d.gej78d2d3d4gd5d6 ZDej78dd7d8gej78d/eBeEd9eEd:gd;d< ZFej78d"e1d=d> ZGej78d?dejHd@dAd gdBdC ZIej78d/eAdDgdEdF ZJej78dGdHdIejHfgdJdK ZKdLdM ZLej78dNe2ejMg dOdP ZNej78d?dejHd@dAd gdQdR ZOej78d/eAdDgdSdT ZPej78dUdVgdWggdVgejHgggdXdY ZQdZd[ ZRd\d] ZSd^d_ ZTd`da ZUdbdc ZVej78ddg dedfdg ZWej78dhde e e e  gdidj ZXdkdl ZYdmdn ZZdodp Z[ej78dg ddqdr Z\dsdt Z]dudv Z^dwdx Z_ej78dydzd{gd|d} Z`d~d Zadd Zbdd Zcej7j8dd dedd gdz dgdz gfddedeje gdz ejegdz gfeje ejeedeje gdz ejegdz gfg dg dedg dg dgfdeje dgddejegeddeje dgddejeggfgg dddd Zfej78ddejeeje dfdd{gg ddfg dddgdfgdd Zgej7j8dddgeje ejegfddgdgd dgd gfgddgddd Zhej78dddgdd Ziej78dddVejjjkdVdgej78dddVejjjkdVdgdd Zlej78dedddVgdVdWggedddVgdVdggddddfejdddgddggeBdejdddgddggeBdi dfgdd Zmdd Znej78den ej78dddzedg dfddzedg dfgdd Zoej78de1e2 e0 e3 e/ dd Zpej78dg dej78dejdd fgeqee1e2 e0 e3 e/ ejHg ddĄ ZrddƄ Zsej78dejdddgddggeAddejdg dȢg dɢgeAdfedejHdIgdIejHggejHedg dʢg dˢgfejdejHdgdejHggeAdejHejdg dȢg dɢgeAdfejdddgddggeAddejdg dȢg dɢgeAdfgdd̈́ Ztej78deegej78ddejHdfdgddԄ Zuddք Zvej78de2dd؄ Zwej78deegddڄ Zxej78de1e2 e0 e3 e/ dd܄ Zyej78dddgdd Zzej78ddg dfdg dfgdd Z{ej78ddejHgdd Z|ej78ddejHgdd Z}ej78ddg deAddWfdg deAddVfdddgeAddWfdg deAddWfdg de~ddWfdVg de~ddVfdg de~ddWfdVg de~ddWfgdd Zej78dg ddd Zd d Zdd Zej78dddgdd Zdd Zd	d
 Zdd Zej78dejejgdd Zej78dddgej78dddgdd Zej78dddgej78dg dej78dddgdd Zej78d"e1dd Zdd Zej78dg ddd Zej78dg dej78dedg dg dgedejHdWdzdgejHd d!d"ggeddVdWdzejHgd{d d!ejHgggd#d$ ZdS (%      N)productsparse)kstest)tree)load_diabetes)DummyRegressor)ConvergenceWarning)enable_iterative_imputer)IterativeImputer
KNNImputerMissingIndicatorSimpleImputer)_most_frequent)ARDRegressionBayesianRidgeRidgeCV)GridSearchCV)Pipeline
make_union)_sparse_random_matrix)_convert_containerassert_allcloseassert_allclose_dense_sparseassert_array_almost_equalassert_array_equal)BSR_CONTAINERSCOO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERSLIL_CONTAINERSc                 C   s   t | | | j|jksJ d S N)r   dtypexy r&   \/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/impute/tests/test_impute.py"_assert_array_equal_and_same_dtype(   s    
r(   c                 C   s   t | | | j|jksJ d S r!   )r   r"   r#   r&   r&   r'   _assert_allclose_and_same_dtype-   s    
r)   c           
      C   s   d||f }t }| jjdks(|jjdkr,t}t||d}|| |  }	||j||	dd ||	||	dd t||d}|||  |||  }	t
|	r|	 }	||j||	dd ||	||	dd dS )zUtility function for testing imputation for a given strategy.

    Test with dense and sparse arrays

    Check that:
        - the statistics (mean, median, mode) are correct
        - the missing values are imputed correctlyz<Parameters: strategy = %s, missing_values = %s, sparse = {0}fmissing_valuesstrategyF)err_msgTN)r   r"   kindr   r   fit	transformcopystatistics_formatr   issparsetoarray)
XX_truer-   
statisticsr,   Zsparse_containerr.   Z	assert_aeimputerX_transr&   r&   r'   _check_statistics2   s$    
r<   r-   )meanmedianmost_frequentconstantcsr_containerc                 C   s   t jdd}t j|d d d< t| d}|||}|jdksDJ ||}|jdks\J t| d}||}|jdks~J d S )N
      r-   )rB   rC   )initial_strategy)nprandomrandnnanr   fit_transformshaper   )r-   rA   r7   r:   	X_imputedZiterative_imputerr&   r&   r'   test_imputation_shapeY   s    



rM   r=   r>   r?   c                 C   sh   t d}t j|d d df< t| d|}tjtdd || W d    n1 sZ0    Y  d S )N      r   rD   ZSkippingmatch)	rF   onesrI   r   r0   pytestwarnsUserWarningr1   )r-   r7   r:   r&   r&   r'    test_imputation_deletion_warningk   s
    
rX   c                 C   s   t d}tj}tjg dtd}|j||d|gd|ddgg|d}t| d	|}t	|j
| t jtd
d || W d    n1 s0    Y  d S )Npandasabcdr"         rC   rB   columnsrD   z6Skipping features without any observed values: \['b'\]rR   )rU   importorskiprF   rI   arrayobject	DataFramer   r0   r   Zfeature_names_in_rV   rW   r1   )r-   pdr,   feature_namesr7   r:   r&   r&   r'   .test_imputation_deletion_warning_feature_namesu   s    


rj   csc_containerc                 C   s   t d}d|d< ||}t| dd}tjtdd || W d    n1 sT0    Y  ||  tjtdd || W d    n1 s0    Y  d S )NrO   r   )r-   r,   zProvide a dense arrayrR   )	rF   rT   r   rU   raises
ValueErrorr0   r6   r1   )r-   rk   r7   r:   r&   r&   r'   test_imputation_error_sparse_0   s    
(rn   c                 O   s>   t | dr| jnt| }|dkr&tjS tj| g|R i |S Nsizer   )hasattrrp   lenrF   rI   r>   Zarrargskwargslengthr&   r&   r'   safe_median   s    rw   c                 O   s>   t | dr| jnt| }|dkr&tjS tj| g|R i |S ro   )rq   rp   rr   rF   rI   r=   rs   r&   r&   r'   	safe_mean   s    rx   c              
   C   sx  t jd}d}d}|| || f}t |d }t d|d d }|dd d  |dd d< dt jdd fd	t jd
d fg}|D ]\}}	}
t |}t |}t |d }t|d D ]Z}|| d dk|| d  || d  }t|d ||  || ||   d}|d | | }|d | }t 	|	|}||
t|d |  }|
|||||< t |||f|d d |f< d|	krt |t 	|| || f|d d |f< n(t ||t 	|| |f|d d |f< t j||d d |f  t j||d d |f  q|d	kr<t |jdd }nt |jdd }|d d |f }t|||||	|  qd S )Nr   rB   r`   ra   rC   r=   c                 S   s   t t| |fS r!   )rx   rF   hstackzvpr&   r&   r'   <lambda>       z-test_imputation_mean_median.<locals>.<lambda>r>   c                 S   s   t t| |fS r!   )rw   rF   ry   rz   r&   r&   r'   r~      r   Zaxis)rF   rG   RandomStatezerosarangerI   emptyrangemaxrepeatZpermutationrr   ry   shuffleisnananyallr<   )rk   rngdimdecrK   r   valuesZtestsr-   Ztest_missing_valuesZtrue_value_funr7   r8   Ztrue_statisticsjZnb_zerosZnb_missing_valuesZ	nb_valuesr{   r}   r|   Zcols_to_keepr&   r&   r'   test_imputation_mean_median   sN    

(&
 
r   c                 C   s   t dt jt jgdt jt jgddt jgddt jgddt jgddt jgddt jgddt jgg }t g dg d	g dg d
g dg dg dg dg }g d}t||d|t j|  d S )Nr   rQ   ra   rC   r   r   r   )rQ   rQ   rQ   )r   r         )r   rQ         @)ra   rQ         @)r   r         )r   rC         ?)r   rQ   r   r   r   r   r   r   r>   )rF   re   rI   Z	transposer<   )rk   r7   ZX_imputed_medianZstatistics_medianr&   r&   r'   $test_imputation_median_special_cases   s4    





r   r=   r>   r"   c                 C   sj   t jg dg dg dg|d}d}tjt|d$ t| d}|| W d    n1 s\0    Y  d S )Nr[   r\   rP   ra   e   gh	   r_   4non-numeric data:
could not convert string to float:rR   rD   )rF   re   rU   rl   rm   r   rJ   )r-   r"   r7   msgr:   r&   r&   r'   .test_imputation_mean_median_error_invalid_type  s
     
r   typelist	dataframec                 C   s|   g dg dg dg}|dkr2t d}||}d}t jt|d$ t| d}|| W d    n1 sn0    Y  d S )	Nr   r   r   r   rY   r   rR   rD   )rU   rd   rg   rl   rm   r   rJ   )r-   r   r7   rh   r   r:   r&   r&   r'   :test_imputation_mean_median_error_invalid_type_list_pandas  s    


r   r@   r?   USc                 C   s   t jt jt jddgt jdt jdgt jddt jgt jdddgg|d}d}tjt|d	* t| d
}||| W d    n1 s0    Y  d S )Nr[   r*   r]   r^   r\   r   r_   z#SimpleImputer does not support datarR   rD   )	rF   re   rI   rU   rl   rm   r   r0   r1   )r-   r"   r7   r.   r:   r&   r&   r'   /test_imputation_const_mostf_error_invalid_types,  s    

r   c                 C   sd   t g dg dg dg dg}t g dg dg dg dg}t||d	t jd
ddgd|  d S )N)r   r   r   rQ   )r   rC   r   rP   )r   r`   rP   r   )r   rC   rP      )rC   r   rQ   )rC   rP   rP   )r`   rP   rP   )rC   rP   r   r?   rC   rP   r   )rF   re   r<   rI   )rk   r7   r8   r&   r&   r'   test_imputation_most_frequentA  s    	r   markerZNAN c                 C   s   t j| | ddg| d| dg| dd| g| dddggtd}t jg dg d	g d
g dgtd}t| dd}|||}t|| d S )Nr[   r*   r]   r^   r\   r   r_   )r]   r[   r*   )r]   r^   r^   )r\   r^   r^   )r]   r^   r   r?   r+   )rF   re   rf   r   r0   r1   r   r   r7   r8   r:   r;   r&   r&   r'   %test_imputation_most_frequent_objects]  s&    





r   categoryc                 C   sj   t d}td}|j|| d}tjg dg dg dg dgtd}tdd	}|	|}t
|| d S )
NrY   ,Cat1,Cat2,Cat3,Cat4
,i,x,
a,,y,
a,j,,
b,j,x,r_   )r[   ir$   )r[   r   r%   )r[   r   r$   )r\   r   r$   r?   rD   rU   rd   ioStringIOZread_csvrF   re   rf   r   rJ   r   r"   rh   r*   dfr8   r:   r;   r&   r&   r'   $test_imputation_most_frequent_pandasz  s    



r   zX_data, missing_value)r`   r         ?c                 C   s   t jd| td}||d< d}d|dt|d}tjtt|d( t	|d	|d
}|
| W d    n1 st0    Y  d S )NrO   r_   r   r   r$   fill_value=
 (of type ) cannot be castrR   r@   r,   r-   
fill_value)rF   fullfloatr   rU   rl   rm   reescaper   rJ   )ZX_datamissing_valuer7   r   r.   r:   r&   r&   r'   +test_imputation_constant_error_invalid_type  s    r   c                  C   sl   t g dg dg dg dg} t g dg dg dg dg}td	d
ddd}|| }t|| d S )Nr   rC   rP   r   ra   r   rQ   r   r   r   r   r      r   r   r   )r   rC   rP   r   )ra   r   rQ   r   )r   r   r   r   )r   r   r   r   r   r@   r   Tr,   r-   r   keep_empty_features)rF   re   r   rJ   r   )r7   r8   r:   r;   r&   r&   r'    test_imputation_constant_integer  s    ""
r   array_constructorc              	   C   s   t t jddt jgdt jdt jgddt jt jgdddt jgg}t g dg dg d	g d
g}| |}| |}tdddd}||}t|| d S )N皙?r   333333??ffffff?      ?)r   r   r   r   )r   r   r   r   )r   r   r   r   )r   r   r   r   r@   r   Tr-   r   r   )rF   re   rI   r   rJ   r   )r   r7   r8   r:   r;   r&   r&   r'   test_imputation_constant_float  s"    	
r   c                 C   s   t j| dd| gd| d| gdd| | gddd	| ggtd
}t jg dg dg dg dgtd
}t| dddd}||}t|| d S )Nr[   r\   r]   r^   r   r*   r   r   r   r_   )missingr[   r\   r   )r]   r   r^   r   )r   r*   r   r   )r   r   r   r   r@   r   Tr   )rF   re   rf   r   rJ   r   r   r&   r&   r'   test_imputation_constant_object  s0    






r   c                 C   sl   t d}td}|j|| d}tjg dg dg dg dgtd}tdd	d
}|	|}t
|| d S )NrY   r   r_   )r   r   r$   r   )r[   r   r%   r   )r[   r   r   r   )r\   r   r$   r   r@   Tr-   r   r   r   r&   r&   r'   test_imputation_constant_pandas  s    



r   r7   r`   rC   c                 C   sf   t  | }|jdksJ t  }|dgdgg |jdks@J |dgtjgg |jdksbJ d S )Nr   r`   rC   )r   r0   n_iter_rF   rI   r7   r:   r&   r&   r'   "test_iterative_imputer_one_feature
  s    r   c                  C   sr   t dddd} | jd }tdt|dfdtjddfg}d	g d
i}t dddd }t||}|| | d S )Nd   皙?)densityr   r:   r,   r   random_stateZimputer__strategyrN   r`   )	r   datar   r   r   ZDecisionTreeRegressorr6   r   r0   )r7   r,   Zpipeline
parametersYgsr&   r&   r'   $test_imputation_pipeline_grid_search  s    

r   c                  C   sv  t ddddd} |   }tdddd}|||}d|d	< t||krTJ |  }t|jd ddd}|||}d|jd< t|j|jkrJ |   }tddd
d}|||}d|d	< t	|| |  
 }t|jd dd
d}|||}d|jd< t	|j|j |  }t|jd dd
d}|||}d|jd< t|j|jkrrJ d S )NrQ   g      ?r   r   r   r=   T)r,   r-   r2   r   r   F)r   r2   r6   r   r0   r1   rF   r   r   r   Ztocsc)ZX_origr7   r:   Xtr&   r&   r'   test_imputation_copy)  s4    



r   c                  C   s   t jd} d}d}t||d| d }|dk}t j||< tdd}||}t||j	
| tdd|}t |
||j	
|krJ d|_t|
||j	
| d S )Nr   r   rB   r   r   )max_iterrQ   )rF   rG   r   r   r6   rI   r   rJ   r   initial_imputer_r1   r0   r   r   )r   nr^   r7   Zmissing_flagr:   rL   r&   r&   r'   !test_iterative_imputer_zero_itersT  s    


 r   c                  C   sp   t jd} d}d}t||d| d }tdddd}|| || tdddd}|| || d S )	Nr   r   rP   r   r   r`   )r,   r   verboserC   )rF   rG   r   r   r6   r   r0   r1   )r   r   r^   r7   r:   r&   r&   r'   test_iterative_imputer_verbosel  s    


r   c                  C   sB   d} d}t | |f}tddd}||}t||j| d S )Nr   rP   r   r`   )r,   r   )rF   r   r   rJ   r   r   r1   )r   r^   r7   r:   rL   r&   r&   r'   "test_iterative_imputer_all_missingz  s    
r   imputation_order)rG   roman	ascending
descendingarabicc           
      C   sR  t jd}d}d}d}t||d|d }d|d d df< td|dd	d
ddd| |d
}|| dd |jD }t||j	 |j
ksJ | dkrt |d |d  t d|ksJ n| dkrt |d |d  t |d ddksJ n^| dkr*|d |d  }||d d  }	||	ksNJ n$d| v rNt|||d  ksNJ d S )Nr   r   rB   rC   r   r   r`   rQ   FT)
r,   r   n_nearest_featuressample_posteriorskip_complete	min_value	max_valuer   r   r   c                 S   s   g | ]
}|j qS r&   Zfeat_idx).0r   r&   r&   r'   
<listcomp>  r   z;test_iterative_imputer_imputation_order.<locals>.<listcomp>r   r   r   rG   ending)rF   rG   r   r   r6   r   rJ   imputation_sequence_rr   r   Zn_features_with_missing_r   r   )
r   r   r   r^   r   r7   r:   Zordered_idxZordered_idx_round_1Zordered_idx_round_2r&   r&   r'   'test_iterative_imputer_imputation_order  s>    
(.

r  	estimatorc           	      C   s   t jd}d}d}t||d|d }tdd| |d}|| g }|jD ]>}| d ur`t| ntt	 }t
|j|szJ |t|j qLtt|t|ksJ d S )Nr   r   rB   r   r   r`   )r,   r   r  r   )rF   rG   r   r   r6   r   rJ   r  r   r   
isinstancer  appendidrr   set)	r  r   r   r^   r7   r:   hashestripletexpected_typer&   r&   r'   !test_iterative_imputer_estimators  s    

r  c                  C   s   t jd} d}d}t||d| d }tdddd| d}||}tt ||dk d tt 	||dk d t||dk ||dk  d S )	Nr   r   rB   r   r   r`   皙?)r,   r   r   r   r   
rF   rG   r   r   r6   r   rJ   r   minr   r   r   r^   r7   r:   r   r&   r&   r'   test_iterative_imputer_clip  s    

r  c                  C   s   t jd} d}d}t||d| d }d|d d df< tdddd	dd
dd| d	}||}tt ||dk d tt 	||dk d
 t||dk ||dk  d S )Nr   r   rB   r   r   r`   rC   rQ   Tr  rG   )	r,   r   r   r   r   r   r   r   r   r  r  r&   r&   r'   %test_iterative_imputer_clip_truncnorm  s(    
r  c                     s   t jd} | jdd t j d d< tddd| d  t  fdd	td
D }t	|dksnJ t	|dks~J |
 |  }}t|| | d\}}|dkr|d7 }t|| | d\}}|dk s|dksJ dd S )N*   )rQ   rQ   )rp   r   r   T)r   r   r   r   c                    s   g | ]}  d  d  qS r   )r1   )r  _r   r&   r'   r    r   zEtest_iterative_imputer_truncated_normal_posterior.<locals>.<listcomp>r   Znormg-q=r  r   z&The posterior does appear to be normal)rF   rG   r   normalrI   r   rJ   re   r   r   r=   Zstdr   )r   ZimputationsmusigmaZks_statisticZp_valuer&   r   r'   1test_iterative_imputer_truncated_normal_posterior  s     
r  c                 C   s   t jd}d}d}|jdd||fd}|jdd||fd}d|d d df< d|d< tdd| |d|}td| d	|}t||d d df ||d d df  d S )
Nr   r   rB   rP   )lowhighrp   r`   r   )r,   r   rE   r   r+   )	rF   rG   r   randintr   r0   r   r   r1   )r-   r   r   r^   X_trainX_testr:   Zinitial_imputerr&   r&   r'   +test_iterative_imputer_missing_at_transform  s     (r!  c                  C   s   t jd} t jd}d}d}t||d| d }tddd| d}|| ||}||}t |t	
t |ksJ tddd	d d
| d}tddd	d d
|d}	|| |	| ||}
||}|	|}t|
| t|
| d S )Nr   r`   r   rB   r   r   T)r,   r   r   r   Fr   )r,   r   r   r   r   r   )rF   rG   r   r   r6   r   r0   r1   r=   rU   approxr   )Zrng1Zrng2r   r^   r7   r:   Z
X_fitted_1Z
X_fitted_2imputer1imputer2ZX_fitted_1aZX_fitted_1br&   r&   r'   .test_iterative_imputer_transform_stochasticity'  sF    


	





r%  c                  C   s   t jd} | dd}t j|d d df< td| d}td| d}|||}||}t	|d d dd f | t	|| d S )Nr   r   rB   )r   r   r`   )
rF   rG   r   randrI   r   r0   r1   rJ   r   )r   r7   m1m2Zpred1Zpred2r&   r&   r'   !test_iterative_imputer_no_missingY  s    
r)  c            	      C   s   t jd} d}| |d}| d|}t ||}| ||dk }| }t j||< tdd| d}||}t	||dd d S )	Nr   2   r`   r   rQ   r   r   r   g{Gz?atol)
rF   rG   r   r&  dotr2   rI   r   rJ   r   )	r   r^   ABr7   nan_mask	X_missingr:   X_filledr&   r&   r'   test_iterative_imputer_rank_oneg  s    

r4  rankrP   rQ   c                 C   s   t jd}d}d}||| }|| |}t ||}|||dk }| }t j||< |d }|d | }	||d  }
||d  }tddd|d|	}|	|}t
|
|d	d
 d S )Nr   F   r   rC   rQ   r   r`   )r   r   r   r   r   r,  )rF   rG   r   r&  r.  r2   rI   r   r0   r1   r   )r5  r   r   r^   r/  r0  r3  r1  r2  r  X_test_filledr   r:   
X_test_estr&   r&   r'   )test_iterative_imputer_transform_recoveryv  s(    

r9  c               	   C   s  t jd} d}d}| ||}| ||}t |j}t|D ]R}t|D ]D}|d d || | f  |d d |f |d d |f  d 7  < qLq@| ||dk }| }	t j	|	|< |d }|	d | }
||d  }|	|d  }t
dd| d|
}||}t||dd	d
 d S )Nr   r   rB   rC   g      ?r`   r+  gMbP?{Gz?)rtolr-  )rF   rG   r   rH   r   rK   r   r&  r2   rI   r   r0   r1   r   )r   r   r^   r/  r0  r3  r   r   r1  r2  r  r7  r   r:   r8  r&   r&   r'   &test_iterative_imputer_additive_matrix  s&    D

r<  c                  C   s   t jd} d}d}| |d}| d|}t ||}| ||dk }| }t j||< tdddd| d	}||}	t	|j
||j ksJ t|jdd| d
}||}
t|	|
dd tdddd| d	}|| |j|jksJ d S )Nr   r*  rQ   r`   r   r   r:  F)r   Ztolr   r   r   )r   r   r   r   gHz>r,  )rF   rG   r   r&  r.  r2   rI   r   rJ   rr   r  r   r   r0   r   )r   r   r^   r/  r0  r7   r1  r2  r:   ZX_filled_100ZX_filled_earlyr&   r&   r'   %test_iterative_imputer_early_stopping  s0    






r=  c            
      C   s   t dd\} }| j\}}d| d d df< tjd}d}t|D ]0}|jt|t|| dd}tj	| ||f< q@t
d	dd
}t ( tdt || |}	W d    n1 s0    Y  tt|	rJ d S )NT)Z
return_X_yr`   rP   r   g333333?F)rp   replacerQ   )r   r   error)r   rK   rF   rG   r   r   choicer   intrI   r   warningscatch_warningssimplefilterRuntimeWarningrJ   r   r   )
r7   r%   Z	n_samples
n_featuresr   Zmissing_rateZfeatZ
sample_idxr:   ZX_fillr&   r&   r'   $test_iterative_imputer_catch_warning  s    

*rG  z$min_value, max_value, correct_outputr   )r   rQ   rB   )r      i,  r   rB   rH  )ZscalarszNone-defaultinflistszlists-with-inf)Zidsc                 C   s   t jddd}t| |d}|| t|jt jrFt|j	t jsJJ |jj
d |j
d krv|j	j
d |j
d kszJ t|dd d f |j t|dd d f |j	 d S )Nr   rB   rP   r   r   r`   )rF   rG   r   rH   r   r0   r  Z
_min_valuendarrayZ
_max_valuerK   r   )r   r   Zcorrect_outputr7   r:   r&   r&   r'   )test_iterative_imputer_min_max_array_like  s    
rM  zmin_value, max_value, err_msg)r   r   min_value >= max_value.rN  )r   rH  r   z_value' should be of shape)r   rQ   rQ   c                 C   sT   t jd}t| |d}tjt|d || W d    n1 sF0    Y  d S )NrB   rP   rK  rR   )rF   rG   r   rU   rl   rm   r0   )r   r   r.   r7   r:   r&   r&   r'   *test_iterative_imputer_catch_min_max_error  s    rP  zmin_max_1, min_max_2ira   zNone-vs-infzScalar-vs-vectorc              	   C   s   t t jdddgdt jt jdgddt jdgt jddt jgg}t t jdt jdgddt jt jgt jdddgg}t| d | d dd	}t|d |d dd	}|||}|||}t|d d df |d d df  d S )
NrC   r`   rB   r   rP   ra   rQ   r   )r   r   r   )rF   re   rI   r   r0   r1   r   )Z	min_max_1Z	min_max_2r  r   r#  r$  ZX_test_imputed1ZX_test_imputed2r&   r&   r'   4test_iterative_imputer_min_max_array_like_imputation  s&    *rQ  r   TFc                 C   s   t jd}t g dg dg dg dg}t t jdddgt jdd	dgt jd	d
d	gg}td| |d}|||}| rt|d d df t 	|d d df  nt|d d df g ddd d S )Nr   )rQ   rC   rC   r`   )rB   r`   rC   r   )rP   r`   r`   r`   )r   ra   rC   rC   rC   ra   rQ   r`   rB   r=   )rE   r   r   )   r      g-C6?)r;  )
rF   rG   r   re   rI   r   r0   r1   r   r=   )r   r   r  r   r:   r8  r&   r&   r'   'test_iterative_imputer_skip_non_missing0  s    ".*rT  
rs_imputer)seedrs_estimatorc                 C   sH   G dd d}||d}t | d}td}|| |j|ksDJ d S )Nc                   @   s$   e Zd Zdd Zdd Zdd ZdS )zCtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimatorc                 S   s
   || _ d S r!   r   )selfr   r&   r&   r'   __init__G  s    zLtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.__init__c                 _   s   | S r!   r&   )rX  rt   Zkgardsr&   r&   r'   r0   J  s    zGtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.fitc                 S   s   t |jd S )Nr   )rF   r   rK   )rX  r7   r&   r&   r'   predictM  s    zKtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.predictN)__name__
__module____qualname__rY  r0   rZ  r&   r&   r&   r'   ZeroEstimatorF  s   r^  r   rO  )r   rF   r   r0   r   )rU  rW  r^  r  r:   r  r&   r&   r'   ,test_iterative_imputer_dont_set_random_stateC  s    




r_  zX_fit, X_trans, params, msg_errr   missing-onlyauto)featuresr   zBhave missing values in transform but have no missing values in fitr[   r\   r]   r_   z1MissingIndicator does not support data with dtypec                 C   s\   t dd}|jf i | tjt|d  || | W d    n1 sN0    Y  d S )Nr   r   rR   )r   
set_paramsrU   rl   rm   r0   r1   )X_fitr;   paramsZmsg_err	indicatorr&   r&   r'   test_missing_indicator_errorW  s    
rg  c                  C   sN   dt jft jt jfdt jfg} t jgt t t t t	 }dd t
|| D S )Nr   r   c                 S   s0   g | ](\}\}}|d kr"|t jus|||fqS r  )rF   re   )r  arr_typer,   r"   r&   r&   r'   r  y  s   
z5_generate_missing_indicator_cases.<locals>.<listcomp>)rF   Zint32rI   float64re   r   r   r   r    r   r   )Zmissing_values_dtypesZ	arr_typesr&   r&   r'   !_generate_missing_indicator_caseso  s$    rj  zarr_type, missing_values, dtypez,param_features, n_features, features_indicesr   r`   rC   r   c                 C   s  t | | dgdd| gg}t | | dgg dg}t g dg dg}t g dg dg}	|||}|||}||}|	|}	t| |dd	}
|
|}|
|}|jd |ksJ |jd |ksJ t|
j| t	||d d |f  t	||	d d |f  |j
tksJ |j
tks,J t|t js>J t|t jsPJ |
jd
d |
|}|
|}|j
tksJ |j
tksJ |jdksJ |jdksJ t	| | t	| | d S )Nr`   ra   rC   ra   rS  rB   )r`   r`   r   )r   r   r`   r   F)r,   rb  r   Tr   csc)rF   re   astyper   rJ   r1   rK   r   Z	features_r   r"   boolr  rL  rc  r4   r6   )r,   rh  r"   Zparam_featuresrF  Zfeatures_indicesrd  r;   ZX_fit_expectedZX_trans_expectedrf  
X_fit_maskX_trans_maskZX_fit_mask_sparseZX_trans_mask_sparser&   r&   r'   test_missing_indicator_new  s>    






rr  rh  c                 C   s   d}t ||dgd|dgg}t ||dgg dg}| |}| |}t|d}tjtdd || W d    n1 s~0    Y  || tjtdd || W d    n1 s0    Y  d S )	Nr   r`   ra   rC   rl  r   z"Sparse input with missing_values=0rR   )rF   re   r   rU   rl   rm   rJ   r1   )rh  r,   rd  r;   ZX_fit_sparseZX_trans_sparserf  r&   r&   r'   5test_missing_indicator_raise_on_sparse_with_missing_0  s    
(
rs  param_sparse)TFra  zarr_type, missing_valuesc                 C   sJ  t ||dgd|dgg}t ||dgg dg}| |t j}| |t j}t||d}||}||}|du r|jdksJ |jdksJ n|dkr|d	krt|t j	sJ t|t j	sJ n||d
u rt|t j	sJ t|t j	sJ nRt
|r"|jdksJ |jdksFJ n$t|t j	s4J t|t j	sFJ d S )Nr`   ra   rC   rl  )r,   r   Trm  ra  r   F)rF   re   rn  ri  r   rJ   r1   r4   r  rL  r   r5   )rh  r,   rt  rd  r;   rf  rp  rq  r&   r&   r'   #test_missing_indicator_sparse_param  s*    

ru  c                  C   sP   t jg dg dgtd} tddd}|| }t|t g dg dg d S )	Nr[   r\   r]   )r\   r]   r[   r_   r[   r   )r,   rb  )TFF)FFT)rF   re   rf   r   rJ   r   )r7   rf  r;   r&   r&   r'   test_missing_indicator_string  s    
rw  zX, missing_values, X_trans_exp)r\   r\   TF)r\   r\   FT)r   r   TF)r   r   FTc                 C   s0   t t|ddt|d}|| }t|| d S )Nr?   r+   r   )r   r   r   rJ   r   )r7   r,   ZX_trans_expZtransr;   r&   r&   r'   #test_missing_indicator_with_imputer  s    

rx  imputer_constructorz.imputer_missing_values, missing_value, err_msgNaNzInput X contains NaN)z-1r   z(types are expected to be both numerical.c                 C   sf   t jd}|dd}||d< | |d}tjt|d || W d    n1 sX0    Y  d S )Nr  rB   r   r   rR   )rF   rG   r   rH   rU   rl   rm   rJ   )ry  Zimputer_missing_valuesr   r.   r   r7   r:   r&   r&   r'   (test_inconsistent_dtype_X_missing_values#  s    
r{  c                  C   sB   t ddgddgg} tddd}|| }|jd dks>J d S )Nr`   r`  r   rb  r,   r   )rF   re   r   rJ   rK   )r7   mir   r&   r&   r'   !test_missing_indicator_no_missing:  s    
r~  c                 C   sH   | g dg dg dg}t ddd}||}| | ksDJ d S )Nrk  )r`   rC   r   )rC   r   r`   r   r`   r|  )r   rJ   Zgetnnzsum)rA   r7   r}  r   r&   r&   r'   /test_missing_indicator_sparse_no_explicit_zerosE  s    
r  c                 C   s8   t ddgddgg}|  }|| |jd u s4J d S )Nr`   )rF   re   r0   Z
indicator_)ry  r7   r:   r&   r&   r'   test_imputer_without_indicatorQ  s    
r  c                 C   s   | t jddgdt jdgddt jgg dg}t g dg dg d	g d
g}tt jdd}||}t|stJ |j|jksJ t|	 | d S )Nr`   rQ   rC   r   rP   )r`   rC   r   )      @r         @r           r  )       @r  r   r  r   r  )g      @r  r  r  r  r   )r   r  g      "@r  r  r  T)r,   add_indicator)
rF   rI   re   r   rJ   r   r5   rK   r   r6   )rh  ZX_sparser8   r:   r;   r&   r&   r'   2test_simple_imputation_add_indicator_sparse_matrixZ  s    ,	
r  zstrategy, expected)r?   r\   )r@   r   c                 C   sN   ddgdt jgg}t jddgd|ggtd}t| d}||}t|| d S )Nr[   r\   r]   r_   rD   )rF   rI   re   rf   r   rJ   r   )r-   expectedr7   r8   r:   r;   r&   r&   r'   "test_simple_imputation_string_listq  s
    

r  zorder, idx_orderr   )rP   ra   rC   r   r`   r   )r`   r   rC   ra   rP   c                 C   s   t jd}|dd}t j|d ddf< t j|d ddf< t j|d dd	f< t j|d d
df< tt@ td| dd	|}dd |j
D }||ksJ W d    n1 s0    Y  d S )Nr  r   rQ   r*  r`      r      rC   rB   ra   )r   r   r   c                 S   s   g | ]
}|j qS r&   r   )r  r$   r&   r&   r'   r    r   z)test_imputation_order.<locals>.<listcomp>)rF   rG   r   r&  rI   rU   rV   r	   r   r0   r  )orderZ	idx_orderr   r7   Ztrsidxr&   r&   r'   test_imputation_order  s    r  r   c              	   C   s4  t d| ddgg ddd| dgddd| gg}t g d	d
d| dgd| ddgddd
| gg}t d| ddg| d| | gd
| d| g| d| dgg}t g d| d
| dgg d| d| d
gg}t| ddd}||}||}||}||}	t|| t|	| ||fD ]$}
||
}||}t||
 q
d S )Nr   rP   r   ra   r   rQ   ra   r   r   r   r   )rQ   ra   rC   r`   rC   r`   ra   rQ   )r`   r`   r`   rP   )rC   rP   rP   ra   r=   T)r,   r-   r  )rF   re   r   rJ   inverse_transformr1   r   )r   X_1ZX_2ZX_3ZX_4r:   	X_1_transZX_1_inv_transZ	X_2_transZX_2_inv_transr7   r;   ZX_inv_transr&   r&   r'   (test_simple_imputation_inverse_transform  sR    


	


	



	

	







r  c              	   C   s   t d| ddgg ddd| dgddd| gg}t| d	d
}||}tjtd|j dd || W d    n1 s|0    Y  d S )Nr   rP   r   r  r   r   r   r   r=   r+   zGot 'add_indicator='rR   )	rF   re   r   rJ   rU   rl   rm   r  r  )r   r  r:   r  r&   r&   r'   3test_simple_imputation_inverse_transform_exceptions  s    


	
r  z)expected,array,dtype,extra_value,n_repeatextra_valuerv  most_frequent_value)r  r  valuer   Zmin_valuevalue)r   r   r  r{   r`   rC   rP   )r`   r`   rC   )r  r  r`   )r`   r`   r  c                 C   s"   | t tj||d||ksJ d S )Nr_   )r   rF   re   )r  re   r"   r  Zn_repeatr&   r&   r'   test_most_frequent  s    r  rE   c                 C   sp   t dt jdgdt jt jgg}t| dd}||}t|dddf d ||}t|dddf d dS )zCheck the behaviour of the iterative imputer with different initial strategy
    and keeping empty features (i.e. features containing only missing values).
    r`   rC   rP   T)rE   r   Nr   )rF   re   rI   r   rJ   r   r1   )rE   r7   r:   rL   r&   r&   r'   *test_iterative_imputer_keep_empty_features  s     

r  c                  C   sT   t g dg dg dg dg} d}tdd|dd	d
}||  t|jj| dS )z<Check that we propagate properly the parameter `fill_value`.r   r   r   r   r   r   r@   r   T)r,   rE   r   r   r   N)rF   re   r   rJ   r   r   r3   )r7   r   r:   r&   r&   r'   *test_iterative_imputer_constant_fill_value  s    "
r  c               	   C   s  t ddt jt jgddt jdgddt jt jgdd	t jd
gg} t j t j t j dg}t jt jt jdg}t||dd| }t j| ddd}|j|jksJ t |t 	| t
dksJ t |t 	| t
dksJ t ddt jt jgdddt jgddt jt jgdd	d
t jgg} t j t j dt j g}t jt jdt jg}t||dd| }| ddddf }|j|jksJ t |t 	| t
dksJ t |t 	| t
dksJ dS )zCheck that we properly apply the empty feature mask to `min_value` and
    `max_value`.

    Non-regression test for https://github.com/scikit-learn/scikit-learn/issues/29355
    r`   rC   ra   rQ   r   r   r   rB   rR  rS  F)r   r   r   r   g      @NrP   )rF   re   rI   rI  r   rJ   deleterK   r  r   rU   r"  r   )r7   r   r   rL   ZX_without_missing_columnr&   r&   r'   1test_iterative_imputer_min_max_value_remove_empty   sP    ""$r  r   c                 C   s   t dt jdgdt jt jgg}t| d}dD ]`}t|||}| rl|j|jksTJ t|dddf d q.|j|jd |jd d fks.J q.dS )z>Check the behaviour of `keep_empty_features` for `KNNImputer`.r`   rC   rP   )r   rJ   r1   Nr   )rF   re   rI   r   getattrrK   r   )r   r7   r:   methodrL   r&   r&   r'   $test_knn_imputer_keep_empty_featuresW  s     
r  c                  C   s  t d} | d| jg dddi}t| jddd}t||tj	d	gdgd
ggt
d | d| jg dddi}tddd}t||tj	d	gd
gdggt
d | d| jg dddi}t| jddd}t||tj	dgdgdggdd ttjddd}t||tj	dgdgdggdd | d| jg dddi}t| jdd}t||tj	dgdgdgdggdd | d| jg dddi}t| jdd}t||tj	dgdgdggdd | d| jg dddi}t| jddd}t||tj	dgdgdggdd | d| jg d ddi}t| jdd}t||tj	dgd!gd!gdggdd d S )"NrY   feature)abcNdestringr_   r@   nar   r  r  )r  r  fghok)r   r-   r  )r`   NrP   ZInt64r   r`   rP   ri  )r`   NrC   rP   r>   r+   rC   )r`   NrC   r=   r   )r   Nr  g       r   r  )r   Nr  r  r  )rU   rd   rg   ZSeriesr   ZNAr(   rJ   rF   re   rf   r)   rI   )rh   r   r:   r&   r&   r'   test_simple_impute_pd_nag  sR    
    $  r  c                  C   sd   t d} tj}| j||d|gd|ddggg dd}t|d|}| }g d	}t|| d
S )zDCheck that missing indicator return the feature names with a prefix.rY   r`   ra   rC   rB   rZ   rb   r   )Zmissingindicator_aZmissingindicator_bZmissingindicator_dN)	rU   rd   rF   rI   rg   r   r0   Zget_feature_names_outr   )rh   r,   r7   rf  ri   Zexpected_namesr&   r&   r'   (test_missing_indicator_feature_names_out  s    


r  c                  C   s\   ddgddgddgg} t dd| }|tjtjgg}|jtksHJ t|ddgg dS )zkCheck transform uses object dtype when fitted on an object dtype.

    Non-regression test for #19572.
    r[   r\   r]   r?   rD   N)r   r0   r1   rF   rI   r"   rf   r   )r7   Zimp_frequentr;   r&   r&   r'    test_imputer_lists_fit_transform  s
    r  
dtype_testc                 C   sn   t jddt jgt jddgg dgt jd}t |}t jt jt jt jgg| d}||}|j| ksjJ dS )zACheck transform preserves numeric dtype independent of fit dtype.r   g333333@r   )g@rC   r`   r_   N)rF   asarrayrI   ri  r   r0   r1   r"   )r  r7   impr   r;   r&   r&   r'   .test_imputer_transform_preserves_numeric_dtype  s     
r  
array_typere   r   c           	   	   C   s   t t jdgt jdgt jdgg}t|| }d}td||d}dD ]}|dr|sd	}tjt|d
 t	|||}W d   q1 s0    Y  nt	|||}|j
|j
ksJ | dkr|dddf  n|dddf }t|| qBdS )zCheck the behaviour of `keep_empty_features` with `strategy='constant'.
    For backward compatibility, a column full of missing values will always be
    fill and never dropped.
    rC   rP   r   rB   r@   r   r  r0   z7`strategy="constant"`, empty features are not dropped. rR   Nr   r   )rF   re   rI   r   r   
startswithrU   rV   FutureWarningr  rK   r6   r   )	r  r   r7   r   r:   r  Zwarn_msgrL   constant_featurer&   r&   r'   0test_simple_imputer_constant_keep_empty_features  s$    "
.*r  c                 C   s   t t jdgt jdgt jdgg}t||}t| |d}dD ]}t|||}|r|j|jksbJ |dkr~|dddf  n|dddf }t|d q<|j|jd |jd	 d	 fks<J q<dS )
zYCheck the behaviour of `keep_empty_features` with all strategies but
    'constant'.
    rC   rP   r   r   r  r   Nr   r`   )	rF   re   rI   r   r   r  rK   r6   r   )r-   r  r   r7   r:   r  rL   r  r&   r&   r'   'test_simple_imputer_keep_empty_features  s    "
*r  c              
   C   s   t g dddt jgt jdt jgg dg dt jddgg}t g dg dg d	g dg dg d
g}tt jt jd}||}t|| tt jt jd}|| |}t| | d S )N)r   r   r   333333@r   r   )r   r   r   )g@r   r   皙?)r  r   r   )r   r   r   )r   r  r  r+   )rF   re   rI   r   r  rJ   r   r6   )rk   r7   r8   r:   r;   r&   r&   r'   test_imputation_custom  s0    



r  c                  C   sL  d} t jg dg dgt jd}td| dd}d| d	t| d
}tjtt	|d |
| W d   n1 sx0    Y  t jg dg dgt jd}|
| d|jjd
}tjtt	|d || W d   n1 s0    Y  t dddg}|t j}|D ]0} td| dd}||}|j|jksJ qdS )zCheck that we raise a proper error message when we cannot cast the fill value
    to the input data type. Otherwise, check that the casting is done properly.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28309
    r   r  )rC   rP   ra   r_   r@   rC   )r-   r   r,   r   r   r   rR   Nz%The dtype of the filling value (i.e. r`   )rF   re   Zint64r   r   rU   rl   rm   r   r   r0   ri  r3   r"   r1   rn  float32rJ   )r   ZX_int64r:   r.   Z	X_float64Zfill_value_listZ	X_float32r;   r&   r&   r'   /test_simple_imputer_constant_fill_value_casting(  s,    (
(
r  c                 C   s^   t t jddgdt jdgddt jgg}t| ddd}t| dd	d}t|||| d
S )a  Check the behaviour of `keep_empty_features` with no empty features.

    With no-empty features, we should get the same imputation whatever the
    parameter `keep_empty_features`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/29375
    r   r`   rC   rP   ra   rQ   FrE   r   r   TN)rF   re   rI   r   r   rJ   )r-   r7   imputer_drop_empty_featuresimputer_keep_empty_featuresr&   r&   r'   (test_iterative_imputer_no_empty_featuresO  s    
(r  r   )r`   rC   rP   ra   )rQ   r   r   r   r   r   r   c           	      C   s   t t jt jddgt jdt jdgt jddt jgg}t| ddd}||}||}t| dd	d}||}||}t||d
d
dd
f  t|d
d
df d |jd |jd ksJ |jd |jd ksJ d
S )a.  Check the behaviour of `keep_empty_features` in the presence of empty features.

    With `keep_empty_features=True`, the empty feature will be imputed with the value
    defined by the initial imputation.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/29375
    r   r`   rC   rP   ra   rQ   Fr  TN)rF   re   rI   r   rJ   r1   r   rK   )	r-   r   r  r  ZX_train_drop_empty_featuresZX_test_drop_empty_featuresr  ZX_train_keep_empty_featuresZX_test_keep_empty_featuresr&   r&   r'   *test_iterative_imputer_with_empty_featuresi  s"    ,



r  )r   r   rB  	itertoolsr   numpyrF   rU   Zscipyr   Zscipy.statsr   Zsklearnr   Zsklearn.datasetsr   Zsklearn.dummyr   Zsklearn.exceptionsr	   Zsklearn.experimentalr
   Zsklearn.imputer   r   r   r   Zsklearn.impute._baser   Zsklearn.linear_modelr   r   r   Zsklearn.model_selectionr   Zsklearn.pipeliner   r   Zsklearn.random_projectionr   Zsklearn.utils._testingr   r   r   r   r   Zsklearn.utils.fixesr   r   r   r   r    r(   r)   r<   markZparametrizerM   rX   rj   rn   rw   rx   r   r   rf   strr   r   r"   r   r   rI   r   r   r   r   r  r   r   r   r   r   r   r   r   r   r  r  r  r  r  r!  r%  r)  r4  r9  r<  r=  rG  re   rI  rM  rP  rQ  rT  rG   r   r_  rg  rj  rr  rs  r   ru  rw  rx  r{  r~  r  r  r  r  r  r  r  rA  r  r  r  r  r  r  r  r  r  ri  r  r  r  r  r  r  r  r&   r&   r&   r'   <module>   s0  	'
	

E
"





#
"
+	
%
!
2
$*"

	
	0


&,


	






9


7<!'&$