a
    ¶Àhã4  ã                   @   s$  d Z ddlZddlmZmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZmZ ddlmZm Z  eƒ Z!eƒ Z"dd„ Z#ej$ %dee  ¡dd„ ƒZ&dd„ Z'dd„ Z(dd„ Z)dd„ Z*dd„ Z+ej$ %dddg¡dd „ ƒZ,d!d"„ Z-d#d$„ Z.d%d&„ Z/d'd(„ Z0d)d*„ Z1ed+ef i d,d-i¤Žd.ej$ %d/d0d1g¡d2d3„ ƒƒZ2ed+ef i d,d4i¤Žd.ej$ %d/d0d1g¡d5d6„ ƒƒZ3d7d8„ Z4ej$ %d9e¡d:d;„ ƒZ5d<d=„ Z6ej$ %dee  ¡d>d?„ ƒZ7ej$ %d@dAdBg¡ej$ %dddg¡dCdD„ ƒƒZ8dS )EzD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
é    N)ÚMockÚpatch)Úparallel_backend)Úload_diabetesÚ	load_irisÚmake_classification)ÚIsolationForest)Ú_average_path_length)Úroc_auc_score)ÚParameterGridÚtrain_test_split)Úcheck_random_state)Úassert_allcloseÚassert_array_almost_equalÚassert_array_equalÚignore_warnings)ÚCSC_CONTAINERSÚCSR_CONTAINERSc                 C   s˜   t  ddgddgg¡}t  ddgddgg¡}tdgg d¢ddgdœƒ}tƒ 8 |D ]"}tf d	| i|¤Ž |¡ |¡ qRW d
  ƒ n1 sŠ0    Y  d
S )z6Check Isolation Forest for various parameter settings.r   é   é   é   )ç      à?ç      ð?r   TF)Ún_estimatorsÚmax_samplesÚ	bootstrapÚrandom_stateN)ÚnpÚarrayr   r   r   ÚfitÚpredict)Úglobal_random_seedÚX_trainÚX_testÚgridÚparams© r&   ú_/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/ensemble/tests/test_iforest.pyÚtest_iforest"   s    ÿÿþr(   Úsparse_containerc                 C   s¨   t | ƒ}ttjdd… |d\}}tddgddgdœƒ}||ƒ}||ƒ}|D ]V}tf d	| d
œ|¤Ž |¡}	|	 |¡}
tf d	| d
œ|¤Ž |¡}| |¡}t|
|ƒ qLdS )z=Check IForest for various parameter settings on sparse input.Né2   ©r   r   r   TF)r   r   é
   )r   r   )	r   r   ÚdiabetesÚdatar   r   r   r    r   )r!   r)   Úrngr"   r#   r$   ZX_train_sparseZX_test_sparser%   Zsparse_classifierZsparse_resultsZdense_classifierZdense_resultsr&   r&   r'   Útest_iforest_sparse2   s.    ÿÿþ
ÿÿþ
r0   c                  C   s(  t j} d}tjt|d  tdd | ¡ W d  ƒ n1 s>0    Y  t ¡ , t 	dt¡ tdd | ¡ W d  ƒ n1 s‚0    Y  t ¡ 2 t 	dt¡ tt
 d¡d | ¡ W d  ƒ n1 sÌ0    Y  t t¡2 tƒ  | ¡ | dd…d	d…f ¡ W d  ƒ n1 s0    Y  dS )
z7Test that it gives proper exception on deficient input.ú3max_samples will be set to n_samples for estimation©Úmatchéè  ©r   NÚerrorÚautor   r   )Úirisr.   ÚpytestÚwarnsÚUserWarningr   r   ÚwarningsÚcatch_warningsÚsimplefilterr   Zint64ZraisesÚ
ValueErrorr    )ÚXÚwarn_msgr&   r&   r'   Útest_iforest_errorL   s    .
.
4rB   c               	   C   sF   t j} tƒ  | ¡}|jD ](}|jtt t 	| j
d ¡¡ƒksJ ‚qdS )zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)r8   r.   r   r   Úestimators_Ú	max_depthÚintr   ÚceilÚlog2Úshape)r@   ÚclfZestr&   r&   r'   Útest_recalculate_max_depthb   s    
rJ   c                  C   s¬   t j} tƒ  | ¡}|j| jd ks&J ‚tdd}d}tjt|d | | ¡ W d   ƒ n1 sb0    Y  |j| jd ks€J ‚tdd | ¡}|jd| jd  ks¨J ‚d S )Nr   iô  r5   r1   r2   gš™™™™™Ù?)	r8   r.   r   r   Úmax_samples_rH   r9   r:   r;   )r@   rI   rA   r&   r&   r'   Útest_max_samples_attributej   s    
(rL   c                 C   sŒ   t | ƒ}ttj|d\}}td| d |¡}|jdd | |¡}|jdd | |¡}t||ƒ td| d |¡}| |¡}t||ƒ dS )zCheck parallel regression.r+   r   )Ún_jobsr   r   ©rM   r   N)	r   r   r-   r.   r   r   Ú
set_paramsr    r   )r!   r/   r"   r#   ZensembleÚy1Úy2Zy3r&   r&   r'   Ú test_iforest_parallel_regressiony   s    



rR   c           	      C   s´   t | ƒ}d| dd¡ }| t |d |d f¡¡}|dd… }|jdddd	}t |dd… |f¡}t d
gd dgd  ¡}td|d |¡}| 	|¡ }t
||ƒdks°J ‚dS )z#Test Isolation Forest performs wellg333333Ó?iX  r   Nr4   éÿÿÿÿr   )éÈ   r   )ÚlowÚhighÚsizer   rT   éd   )r   r   g\Âõ(\ï?)r   ÚrandnZpermutationr   ZvstackÚuniformr   r   r   Údecision_functionr
   )	r!   r/   r@   r"   Z
X_outliersr#   Úy_testrI   Zy_predr&   r&   r'   Útest_iforest_performance   s    r]   Úcontaminationç      Ð?r7   c              	   C   s¦   ddgddgddgddgddgddgddgddgg}t || d	}| |¡ | |¡ }| |¡}t |dd … ¡t |d d… ¡ksˆJ ‚t|d
dg ddg  ƒ d S )NéþÿÿÿrS   r   r   é   é   éûÿÿÿé	   ©r   r^   é   )r   r   r[   r    r   ÚminÚmaxr   )r^   r!   r@   rI   Údecision_funcÚpredr&   r&   r'   Útest_iforest_works¥   s    4

(rk   c                  C   s&   t j} tƒ  | ¡}|j|jks"J ‚d S ©N)r8   r.   r   r   rK   Z_max_samples)r@   rI   r&   r&   r'   Útest_max_samples_consistency´   s    rm   c                  C   sV   t dƒ} ttjd d… tjd d… | d\}}}}tdd}| ||¡ | |¡ d S )Nr   r*   r+   gš™™™™™é?)Zmax_features)r   r   r-   r.   Útargetr   r   r    )r/   r"   r#   Zy_trainr\   rI   r&   r&   r'   Ú test_iforest_subsampled_features»   s    ÿ
ro   c                  C   sÌ   dt  d¡t j  d } dt  d¡t j  d }ttdgƒdgƒ ttdgƒdgƒ ttd	gƒd
gƒ ttdgƒ| gƒ ttdgƒ|gƒ ttt  g d¢¡ƒdd
| |gƒ tt  d¡ƒ}t|t  |¡ƒ d S )Nç       @g      @gš™™™™™ù?g     0@g}ÿ­¿Ì÷ÿ?r   g        r   r   r   é   éç  )r   r   rq   rr   )	r   ÚlogZeuler_gammar   r	   r   Zaranger   Úsort)Z
result_oneZ
result_twoZavg_path_lengthr&   r&   r'   Ú test_iforest_average_path_lengthÆ   s    
þru   c                  C   s¨   ddgddgddgg} t dd | ¡}t ƒ  | ¡}t| ddgg¡| ddgg¡|j ƒ t| ddgg¡| ddgg¡|j ƒ t| ddgg¡| ddgg¡ƒ d S )Nr   r   çš™™™™™¹?)r^   rp   )r   r   r   Zscore_samplesr[   Zoffset_)r"   Zclf1Zclf2r&   r&   r'   Útest_score_samplesÚ   s    þþÿrw   c                  C   sv   t dƒ} |  dd¡}tdd| dd}| |¡ |jd }|jdd | |¡ t|jƒdks`J ‚|jd |u srJ ‚dS )	z/Test iterative addition of iTrees to an iForestr   é   r   r,   T)r   r   r   Z
warm_start)r   N)r   rY   r   r   rC   rO   Úlen)r/   r@   rI   Ztree_1r&   r&   r'   Útest_iforest_warm_startë   s    ÿ


rz   z*sklearn.ensemble._iforest.get_chunk_n_rowsZreturn_valuer   )Zside_effectzcontamination, n_predict_calls)r_   r   )r7   r   c                 C   s   t ||ƒ | j|ksJ ‚d S rl   ©rk   Z
call_count©Zmocked_get_chunkr^   Zn_predict_callsr!   r&   r&   r'   Útest_iforest_chunks_works1  s    
r}   r,   c                 C   s   t ||ƒ | j|ksJ ‚d S rl   r{   r|   r&   r&   r'   Útest_iforest_chunks_works2  s    
r~   c                  C   s|  t  d¡} tƒ }| | ¡ t j d¡}t| | ¡dkƒs<J ‚t| | dd¡¡dkƒsZJ ‚t| | d ¡dkƒstJ ‚t| | d ¡dkƒsŽJ ‚t  	| dd¡dd¡} tƒ }| | ¡ t| | ¡dkƒsÊJ ‚t| | dd¡¡dkƒsèJ ‚t| t  d¡¡dkƒsJ ‚| dd¡} tƒ }| | ¡ t| | ¡dkƒs:J ‚t| | dd¡¡dkƒsZJ ‚t| t  d¡¡dkƒsxJ ‚dS )z=Test whether iforest predicts inliers when using uniform data)rX   r,   r   r   rX   r,   N)
r   Zonesr   r   ÚrandomÚRandomStateÚallr    rY   Úrepeat)r@   Úiforestr/   r&   r&   r'   Útest_iforest_with_uniform_data  s(    



 r„   Úcsc_containerc                 C   s2   t dddd\}}| |ƒ}tdddd |¡ d	S )
zdCheck that Isolation Forest does not segfault with n_jobs=2

    Non-regression test for #23252
    iL rX   r   ©Z	n_samplesZ
n_featuresr   r,   é   r   )r   r   rM   N)r   r   r   )r…   r@   Ú_r&   r&   r'   Ú*test_iforest_with_n_jobs_does_not_segfault=  s    r‰   c                  C   sz   t  d¡} tj d¡}| j| d¡dgd}tddd}t 	¡ & t 
dt¡ | |¡ W d	  ƒ n1 sl0    Y  d	S )
z¾Check that feature names are preserved when contamination is not "auto".

    Feature names are required for consistency checks during scoring.

    Non-regression test for Issue #25844
    Zpandasr   rb   Úa)r.   Úcolumnsgš™™™™™©?re   r6   N)r9   Zimportorskipr   r   r€   Z	DataFramerY   r   r<   r=   r>   r;   r   )Úpdr/   r@   Úmodelr&   r&   r'   Ú#test_iforest_preserve_feature_namesH  s    

rŽ   c                 C   sl   t dddd\}}| |ƒ}| ¡  d}td|dd |¡}| |¡}|dk  ¡ |jd  t |¡kshJ ‚dS )	zÀCheck that `IsolationForest` accepts sparse matrix input and float value for
    contamination.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/27626
    r*   rb   r   r†   rv   rq   )r   r^   r   N)	r   Zsort_indicesr   r   r[   ÚsumrH   r9   Zapprox)r)   r@   rˆ   r^   rƒ   Z
X_decisionr&   r&   r'   Ú-test_iforest_sparse_input_float_contaminationZ  s    ÿþ
r   rM   r   r   c           	   	   C   s   ddgddgddgddgddgddgddgddgg}t | |d	d
}| |¡ | |¡ }| |¡}t |dd	… ¡t |d	d… ¡ksŠJ ‚t|ddg ddg  ƒ t | |dd
}| |¡ td|d | |¡}W d	  ƒ n1 sè0    Y  t||ƒ d	S )z5Check that `IsolationForest.predict` is parallelized.r`   rS   r   r   ra   rb   rc   rd   N)r   r^   rM   rf   Ú	threadingrN   )	r   r   r[   r    r   rg   rh   r   r   )	r!   r^   rM   r@   rI   ri   rj   Zclf_parallelZpred_paralellr&   r&   r'   Útest_iforest_predict_paralleln  s     4ÿ

(ÿ
(r’   )9Ú__doc__r<   Zunittest.mockr   r   Únumpyr   r9   Zjoblibr   Zsklearn.datasetsr   r   r   Zsklearn.ensembler   Zsklearn.ensemble._iforestr	   Zsklearn.metricsr
   Zsklearn.model_selectionr   r   Zsklearn.utilsr   Zsklearn.utils._testingr   r   r   r   Zsklearn.utils.fixesr   r   r8   r-   r(   ÚmarkZparametrizer0   rB   rJ   rL   rR   r]   rk   rm   ro   ru   rw   rz   r}   r~   r„   r‰   rŽ   r   r’   r&   r&   r&   r'   Ú<module>   sf   

þþ"


