a
    h_                     @   s  d Z ddlmZ ddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZmZmZ ddlmZ ddlmZmZmZ dd	lmZmZmZ dd
lmZ ddlmZmZ ddlm Z  ddl!m"Z"m#Z# ddl$m%Z%m&Z&m'Z' ddl(m)Z)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0m1Z1m2Z2 ddl3m4Z4 ddl5m6Z6 ddl7m8Z8 G dd deeZ9dd Z:ej;<de8dd Z=dd Z>dd  Z?d!d" Z@ej;<de8d#d$ ZAd%d& ZBd'd( ZCd)d* ZDd+d, ZEd-d. ZFd/d0 ZGd1d2 ZHd3d4 ZIej;<d5ed6d6gej;<d7ed8fed9fgd:d; ZJej;<d<d=eKfd>eLfd?d@ eLfgej;<dAeegdBdC ZMej;<dDdd8gdEdF ZNdGdH ZOdIdJ ZPej;<g dKdLd9dLeQg dMgdLd8dLeQg dNgdLd9dOeQg dPgdLd8dOeQg dQgdLd9dReQdLd9ggdLd8dReQg dSgdLd9d9eQdLd9ggdLd8d9eQdLd8ggd9d9dOeQd9ggd9d8dLeQd9d8ggd9d8dOeQd9d8gggdTdU ZRej;<dVeegdWdX ZSej;<dVeegdYdZ ZTej;<dVeegej;<d[eeegd\d] ZUd^d_ ZVej;<d`edafedbfgdcdd ZWdedf ZXdgdh ZYdS )iz'
Testing Recursive feature elimination
    )
attrgetterN)parallel_backend)assert_allcloseassert_array_almost_equalassert_array_equal)BaseEstimatorClassifierMixinis_classifier)TransformedTargetRegressor)CCAPLSCanonicalPLSRegression)	load_irismake_classificationmake_friedman1)RandomForestClassifier)RFERFECV)SimpleImputer)LinearRegressionLogisticRegression)
get_scorermake_scorerzero_one_loss)
GroupKFoldcross_val_score)make_pipeline)StandardScaler)SVCSVR	LinearSVR)check_random_state)ignore_warnings)CSR_CONTAINERSc                       sb   e Zd ZdZdddZdd Zdd ZeZeZeZ	dd
dZ
dddZdd Z fddZ  ZS )MockClassifierz@
    Dummy classifier to test recursive feature elimination
    r   c                 C   s
   || _ d S N	foo_param)selfr'    r)   d/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/feature_selection/tests/test_rfe.py__init__#   s    zMockClassifier.__init__c                 C   s>   t |t |ksJ tj|jd tjd| _tt|| _| S )N   )Zdtype)	lennponesshapeZfloat64coef_sortedsetZclasses_r(   Xyr)   r)   r*   fit&   s    zMockClassifier.fitc                 C   s   t |jd S )Nr   )r.   r/   r0   )r(   Tr)   r)   r*   predict,   s    zMockClassifier.predictNc                 C   s   dS )Ng        r)   r4   r)   r)   r*   score3   s    zMockClassifier.scoreTc                 C   s
   d| j iS )Nr'   r&   )r(   deepr)   r)   r*   
get_params6   s    zMockClassifier.get_paramsc                 K   s   | S r%   r)   )r(   paramsr)   r)   r*   
set_params9   s    zMockClassifier.set_paramsc                    s   t   }d|j_|S )NT)super__sklearn_tags__Z
input_tags	allow_nan)r(   tags	__class__r)   r*   r@   <   s    
zMockClassifier.__sklearn_tags__)r   )NN)T)__name__
__module____qualname____doc__r+   r7   r9   Zpredict_probadecision_function	transformr:   r<   r>   r@   __classcell__r)   r)   rC   r*   r$      s   


r$   c                  C   s   t d} t }tj|j| jt|jdfdf }|j}td| dd}t	|ddd	}|
|| t|j|jd
 ksvJ tdd}t	|ddd	}|
|| t| |  d S )Nr      size      )n_estimatorsrandom_state	max_depth   皙?	estimatorn_features_to_selectstepr,   linearZkernel)r!   r   r.   c_datanormalr-   targetr   r   r7   ranking_r0   r   r   Zget_support)	generatoririsr5   r6   clfrfeZclf_svcZrfe_svcr)   r)   r*   test_rfe_features_importanceB   s    "
re   csr_containerc                 C   s8  t d}t }tj|j|jt|jdfdf }| |}|j}tdd}t	|ddd}|
|| ||}|
|| t|j|jd	 ksJ tdd}	t	|	ddd}
|

|| |
|}|j|jjksJ t|d d
 |jd d
  t||||j |||||j|jks&J t||  d S )Nr   rL   rM   rZ   r[   rT   rU   rV   r,   
   )r!   r   r.   r\   r]   r^   r-   r_   r   r   r7   rJ   r`   r0   r   r9   r:   toarray)rf   ra   rb   r5   X_sparser6   rc   rd   X_rZ
clf_sparseZ
rfe_sparse
X_r_sparser)   r)   r*   test_rfeW   s(    "



"rl   c                  C   s   G dd dt t} tdd\}}tjtdd$ t|  d|| W d    n1 sX0    Y  tjtdd0 t|  dj||d	d
|| W d    n1 s0    Y  t|  dj||d	d
j||d	d
 d S )Nc                   @   s    e Zd ZdddZdddZdS )z0test_RFE_fit_score_params.<locals>.TestEstimatorNc                 S   s2   |d u rt dtdd||| _| jj| _| S )Nfit: prop cannot be NonerZ   r[   )
ValueErrorr   r7   svc_r1   r(   r5   r6   propr)   r)   r*   r7   {   s
    
z4test_RFE_fit_score_params.<locals>.TestEstimator.fitc                 S   s   |d u rt d| j||S )Nscore: prop cannot be None)rn   ro   r:   rp   r)   r)   r*   r:      s    z6test_RFE_fit_score_params.<locals>.TestEstimator.score)N)N)rE   rF   rG   r7   r:   r)   r)   r)   r*   TestEstimatorz   s   
rs   TZ
return_X_yrm   matchrW   rr   Zfoo)rq   )	r   r   r   pytestraisesrn   r   r7   r:   )rs   r5   r6   r)   r)   r*   test_RFE_fit_score_paramsw   s    2>rz   c                  C   s   t d} t }tj|j| jt|jdfdf }|j}tdd}t	|ddd}|
|| t	|d	dd}|
|| t|j|j t|j|j d S )
Nr   rL   rM   rZ   r[   rT   rU   rV   g?)r!   r   r.   r\   r]   r^   r-   r_   r   r   r7   r   r`   support_)ra   rb   r5   r6   rc   Zrfe_numZrfe_percr)   r)   r*   test_rfe_percent_n_features   s    "
r|   c                  C   s   t d} t }tj|j| jt|jdfdf }|j}t }t	|ddd}|
|| ||}|
|| t|j|jd ksJ |j|jjksJ d S )Nr   rL   rM   rT   rU   rV   r,   )r!   r   r.   r\   r]   r^   r-   r_   r$   r   r7   rJ   r`   r0   )ra   rb   r5   r6   rc   rd   rj   r)   r)   r*   test_rfe_mockclassifier   s    "
r}   c                 C   s  t d}t }tj|j|jt|jdfdf }t|j}t	t
dddd}||| |j D ] }t|j| |jd ksbJ qbt|j|jd ksJ ||}t||j t	t
dddd}| |}	||	| ||	}
t|
 |j ttdd	}t	t
ddd|d
}t|j|| ||}t||j td}t	t
ddd|d
}||| ||}t||j dd }t	t
ddd|d
}||| |jdksJ t	t
dddd}||| |j D ]}t|j| dksJ qt|j|jd ksJ ||}t||j t	t
dddd}| |}	||	| ||	}
t|
 |j t	t
dddd}| |}	||	| ||	}
t|
 |j d S )Nr   rL   rM   rZ   r[   r,   rW   rY   F)Zgreater_is_better)rW   rY   scoringaccuracyc                 S   s   dS )Ng      ?r)   )rW   r5   r6   r)   r)   r*   test_scorer   s    ztest_rfecv.<locals>.test_scorerrP   皙?)r!   r   r.   r\   r]   r^   r-   listr_   r   r   r7   cv_results_keysr0   r`   rJ   r   rh   r   r   r"   r   n_features_)rf   ra   rb   r5   r6   rfecvkeyrj   Zrfecv_sparseri   rk   r   Zscorerr   r)   r)   r*   
test_rfecv   s^    "







r   c                  C   s   t d} t }tj|j| jt|jdfdf }t|j}t	t
 dd}||| |j D ] }t|j| |jd ks^J q^t|j|jd ksJ d S )Nr   rL   rM   r,   r~   )r!   r   r.   r\   r]   r^   r-   r   r_   r   r$   r7   r   r   r0   r`   )ra   rb   r5   r6   r   r   r)   r)   r*   test_rfecv_mockclassifier  s    "
r   c                  C   s   dd l } ddlm} | | _td}t }tj|j|j	t
|jdfdf }t|j}ttddddd}||| | j}|d t
| dksJ d S )	Nr   )StringIOrL   rM   rZ   r[   r,   )rW   rY   verbose)sysior   stdoutr!   r   r.   r\   r]   r^   r-   r   r_   r   r   r7   seekreadline)r   r   ra   rb   r5   r6   r   Zverbose_outputr)   r)   r*   test_rfecv_verbose_output  s    "

r   c           
      C   s   t | }t }tj|j|jt|jdfdf }t|j}ddgddgddgfD ]\}}t	t
 ||d}||| t|jd | | d }|j D ]}	t|j|	 |ksJ qt|j|jd ksJ |j|ksPJ qPd S )NrL   rM   rP   r,      rW   rY   min_features_to_select)r!   r   r.   r\   r]   r^   r-   r   r_   r   r$   r7   ceilr0   r   r   r`   r   )
global_random_seedra   rb   r5   r6   rY   r   r   Z	score_lenr   r)   r)   r*   test_rfecv_cv_results_size-  s     "
r   c                  C   sD   t tdd} t| sJ t }t| |j|j}| dks@J d S )NrZ   r[   gffffff?)r   r   r	   r   r   r]   r_   min)rd   rb   r:   r)   r)   r*   test_rfe_estimator_tagsF  s
    r   c                 C   s   d}t d|| d\}}|j\}}tdd}t|dd}|||}|j |d ksXJ t|d	d}|||}|j |d ksJ t|d
d}|||}|j |d ksJ d S )Nrg   2   	n_samples
n_featuresrR   rZ   r[   g{Gz?rY   rP   r      )r   r0   r   r   r7   r{   sum)r   r   r5   r6   r   rW   selectorselr)   r)   r*   test_rfe_min_stepO  s    


r   c                 C   s  dd }dd }ddg}ddg}ddg}t |||D ]\}}}t| }	|	jd|fd	}
|	d }ttd
d||d}||
| t	|j
||||ksJ t	|j
||||ks4J q4d}ddg}ddg}t ||D ]\}}t| }	|	jd|fd	}
|	d }ttd
d|d}||
| |j D ]F}t|j| ||||ksVJ t|j| ||||ks2J q2qd S )Nc                 S   s   d| | | d |  S Nr,   r)   r   rX   rY   r)   r)   r*   formula1p  s    z4test_number_of_subsets_of_features.<locals>.formula1c                 S   s   dt | | t|  S r   )r.   r   floatr   r)   r)   r*   formula2s  s    z4test_number_of_subsets_of_features.<locals>.formula2   r   rP   d   rM   rZ   r[   rV   r,   rg   r~   )zipr!   r^   Zrandroundr   r   r7   r.   maxr`   r   r   r   r-   )r   r   r   Zn_features_listZn_features_to_select_listZ	step_listr   rX   rY   ra   r5   r6   rd   r   r   r)   r)   r*   "test_number_of_subsets_of_featuresg  sF    	r   c           	      C   s   t | }t }tj|j|jt|jdfdf }|j}tt	ddd}|
|| |j}|j}|jdd |
|| t|j| | |j ksJ | D ] }|| t|j| ksJ qd S )NrL   rM   rZ   r[   rw   rP   )n_jobs)r!   r   r.   r\   r]   r^   r-   r_   r   r   r7   r`   r   r>   r   r   rx   Zapprox)	r   ra   rb   r5   r6   r   Zrfecv_rankingZrfecv_cv_results_r   r)   r)   r*   test_rfe_cv_n_jobs  s    "r   c                  C   s   t d} t }d}ttd|t|j}|j}|jdkt	}t
t| dddtddd}|j|||d	 |jdks|J d S )
Nr   rT   rR   r,   r   rP   )Zn_splits)rW   rY   r   cv)groups)r!   r   r.   floorZlinspacer-   r_   r]   Zastypeintr   r   r   r7   r   )ra   rb   Znumber_groupsr   r5   r6   Z
est_groupsr)   r)   r*   test_rfe_cv_groups  s    r   importance_getterzregressor_.coef_zselector, expected_n_featuresr   rT   c                 C   s\   t dddd\}}tdd}t|tjtjd}||| d}|||}|j |ksXJ d S )Nr   rg   r   r   r   Z	regressorfuncZinverse_funcr   )	r   r    r
   r.   logexpr7   r{   r   )r   r   Zexpected_n_featuresr5   r6   rW   log_estimatorr   r)   r)   r*   test_rfe_wrapped_estimator  s    

r   zimportance_getter, err_typeautorandomc                 C   s   | j S r%   )Z
importance)xr)   r)   r*   <lambda>      r   Selectorc                 C   sp   t dddd\}}t }t|tjtjd}t|( ||| d}||| W d    n1 sb0    Y  d S )Nr   rg   *   r   r   r   )	r   r    r
   r.   r   r   rx   ry   r7   )r   Zerr_typer   r5   r6   rW   r   modelr)   r)   r*   %test_rfe_importance_getter_validation  s    

r   r   c                 C   sn   t  }|j}|j}tj|d d< tj|d d< t }| d urJt|| d}n
t|d}|	|| |
| d S )Nr   r,   )rW   r   rw   )r   r]   r_   r.   naninfr$   r   r   r7   rJ   )r   rb   r5   r6   rc   rd   r)   r)   r*   test_rfe_allow_nan_inf_in_x  s    
r   c                  C   sR   t t t } tdd\}}t| ddd}||| ||jd dksNJ d S )NTrt   rP   $named_steps.logisticregression.coef_)rX   r   r,   )r   r   r   r   r   r7   rJ   r0   )Zpipeliner]   r6   Zsfmr)   r)   r*   test_w_pipeline_2d_coef_
  s    r   c           	         s   t | }t }tj|j|jt|jdfdf }|j}tt	ddd  
|| dd  j D }t fdd|D }tj|d	d
}tj|d	d
}t jd | t jd | d S )NrL   rM   rZ   r[   rw   c                 S   s   g | ]}d |v r|qS )splitr)   .0r   r)   r)   r*   
<listcomp>   r   z+test_rfecv_std_and_mean.<locals>.<listcomp>c                    s   g | ]} j | qS r)   )r   r   r   r)   r*   r   !  r   r   ZaxisZmean_test_scoreZstd_test_score)r!   r   r.   r\   r]   r^   r-   r_   r   r   r7   r   r   ZasarraymeanZstdr   )	r   ra   rb   r5   r6   Z
split_keysZ	cv_scoresZexpected_meanZexpected_stdr)   r   r*   test_rfecv_std_and_mean  s    "r   )r   r   rY   cv_results_n_featuresr,   )r,   rP   r   rT   )r,   rP   r   rT   r   rP   )r,   rP   rT   )r,   r   r   r   )r,   rP   r   c                    sh   t d||dd\}}ttdd|| d  || t jd | t fdd	 j D sdJ d S )
NrO   r   )r   r   Zn_informativeZn_redundantrZ   r[   r   r   c                 3   s$   | ]}t |t  jd  kV  qdS )r   N)r-   r   )r   valuer   r)   r*   	<genexpr>I  s   z3test_rfecv_cv_results_n_features.<locals>.<genexpr>)r   r   r   r7   r   r   allvalues)r   r   rY   r   r5   r6   r)   r   r*    test_rfecv_cv_results_n_features)  s    
r   ClsRFEc                 C   s@   t jjdd}t jjddd}tdd}| |}||| d S )N)rg   r   rM   rP   )rg   rP   r   )rQ   )r.   r   r^   randintr   r7   )r   r5   r6   rc   Zrfe_testr)   r)   r*   test_multioutputO  s
    
r   c                 C   sF   t dd\}}tj|d< tt t t }| |dd}||| dS )z`Check that RFE works with pipeline that accept nans.

    Non-regression test for gh-21743.
    Trt   )r   r   r   )rW   r   N)r   r.   r   r   r   r   r   r7   )r   r5   r6   pipefsr)   r)   r*   test_pipeline_with_nansX  s    
r   PLSEstimatorc                 C   sH   t dddd\}}|dd}| |dd||}|||dksDJ d	S )
zCheck the behaviour of RFE with PLS estimators.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/12410
    r   rg   r   r   r,   )Zn_componentsr   g      ?N)r   r7   r:   )r   r   r5   r6   rW   r   r)   r)   r*   test_rfe_plsn  s    
r   c                  C   s   t  } tt d}d}d}tjt|d(}|| j| j	| j W d   n1 sV0    Y  t
|jjtsrJ |t|jjv sJ dS )a  Check that we raise the proper AttributeError when the estimator
    does not implement the `decision_function` method, which is decorated with
    `available_if`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28108
    rw   z/This 'RFE' has no attribute 'decision_function'z>'LinearRegression' object has no attribute 'decision_function'ru   N)r   r   r   rx   ry   AttributeErrorr7   r]   r_   rI   
isinstancer   	__cause__str)rb   rd   Z	outer_msgZ	inner_msgZ	exec_infor)   r)   r*   "test_rfe_estimator_attribute_error|  s    6r   zClsRFE, paramrX   r   c                 C   sl   t ddd\}}tjt| dd4 | f dt i|di}||| W d   n1 s^0    Y  dS )	zCheck if the correct warning is raised when trying to initialize a RFE
    object with a n_features_to_select attribute larger than the number of
    features present in the X variable that is passed to the fit method
    rO   r   )r   rR   z=21 > n_features=20ru   rW      N)r   rx   ZwarnsUserWarningr   r7   )r   paramr5   r6   Zclsrfer)   r)   r*   %test_rfe_n_features_to_select_warning  s    r   c                  C   s   t dd\} }| jd }t|}d|d|d < tj| | d|d  gdd}t||d|d  g}tdd}t|dd	}|j| ||d
 t|dd	}||| t|j	|j	 t|dd	}	t|}
|	j| ||
d
 t
|	j	|j	rJ dS )z4Test that `RFE` works correctly with sample weights.r   r   rP   Nr   rZ   r[   rU   r~   )sample_weight)r   r0   r.   Z	ones_likeZconcatenater   r   r7   r   r`   Zarray_equal)r5   r6   r   r   ZX2y2rW   Zrfe_swrd   Zrfe_sw_2Zsample_weight_2r)   r)   r*   test_rfe_with_sample_weight  s     



r   c                 C   sv   t | d\}}t }t|dd}||| |j}td ||| W d    n1 s\0    Y  t||j d S )Nr   rP   )rW   r   	threading)r   r   r   r7   r`   r   r   )r   r5   r6   rc   rd   Zranking_refr)   r)   r*   &test_rfe_with_joblib_threading_backend  s    
*r   )ZrH   operatorr   numpyr.   rx   Zjoblibr   Znumpy.testingr   r   r   Zsklearn.baser   r   r	   Zsklearn.composer
   Zsklearn.cross_decompositionr   r   r   Zsklearn.datasetsr   r   r   Zsklearn.ensembler   Zsklearn.feature_selectionr   r   Zsklearn.imputer   Zsklearn.linear_modelr   r   Zsklearn.metricsr   r   r   Zsklearn.model_selectionr   r   Zsklearn.pipeliner   Zsklearn.preprocessingr   Zsklearn.svmr   r   r    Zsklearn.utilsr!   Zsklearn.utils._testingr"   Zsklearn.utils.fixesr#   r$   re   markZparametrizerl   rz   r|   r}   r   r   r   r   r   r   r   r   r   r   rn   r   r   r   r   r   arrayr   r   r   r   r   r   r   r   r)   r)   r)   r*   <module>   s   $

Q	A





!