a
    htz                     @   s  d Z ddlmZmZ ddlZddlZddlZddlZddl	m
Z
 ddlmZmZmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZ ddlmZ dd	lmZmZ dd
l m!Z!m"Z"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3 ddl4m5Z5 ddl6m7Z7m8Z8 ddl9m:Z:m;Z; e5dZ<e Z=e<>e=j?j@ZAe=jBeA e=_Be=j?eA e=_?e ZCe<>eCj?j@ZAeCjBeA eC_BeCj?eA eC_?dd ZDejEFdee;e: ddddddddddddddddddgg d d!d" ZGd#d$ ZHejEFd%e;e: d&d' ZIG d(d) d)e
ZJd*d+ ZKd,d- ZLd.d/ ZMd0d1 ZNd2d3 ZOd4d5 ZPd6d7 ZQd8d9 ZRd:d; ZSd<d= ZTd>d? ZUd@dA ZVG dBdC dCe
ZWdDdE ZXdsdGdHZYdIdJ ZZdKdL Z[dMdN Z\dOdP Z]dQdR Z^dSdT Z_dUdV Z`dWdX ZadYdZ Zbd[d\ Zcd]d^ Zdd_d` Zedadb Zfdcdd Zgdedf ZhejEFdgeedhdidfeedhdidfee dfee0 dfgdjdk ZiejEFdleedhdmdhdneedhdmdhdngdodp ZjejEFdleedhdmdhdneedhdmdhdngdqdr ZkdS )tzE
Testing for the bagging ensemble module (sklearn.ensemble.bagging).
    )cycleproductN)BaseEstimator)load_diabetes	load_irismake_hastie_10_2)DummyClassifierDummyRegressor)AdaBoostClassifierAdaBoostRegressorBaggingClassifierBaggingRegressorHistGradientBoostingClassifierHistGradientBoostingRegressorRandomForestClassifierRandomForestRegressor)SelectKBest)LogisticRegression
Perceptron)GridSearchCVParameterGridtrain_test_split)KNeighborsClassifierKNeighborsRegressor)make_pipeline)FunctionTransformerscale)SparseRandomProjection)SVCSVR)DecisionTreeClassifierDecisionTreeRegressor)check_random_state)assert_array_almost_equalassert_array_equal)CSC_CONTAINERSCSR_CONTAINERSc            	      C   s   t d} ttjtj| d\}}}}tddgddgddgddgd	}d t td
dtddt	 t
 g}t|t|D ],\}}tf || dd|||| qtd S )Nr   random_state      ?      ?      TFmax_samplesmax_features	bootstrapbootstrap_features   Zmax_iter   )	max_depth)	estimatorr(   n_estimators)r"   r   irisdatatargetr   r   r   r    r   r   zipr   r   fitpredict)	rngX_trainX_testy_trainy_testgrid
estimatorsparamsr6    rF   _/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/ensemble/tests/test_bagging.pytest_classification:   s<    
	
rH   z sparse_container, params, methodr)   r4   Tr-   r*   r,   Fr/   r0   r1   r.   r0   r1   )r=   predict_probapredict_log_probadecision_functionc                    s   G dd dt }td}tttjtj|d\}}}}| |}	| |}
tf |ddddd	||	|}t	|||
}tf |ddddd	|||}t	|||}t
|| t|	 d
d |jD }t fdd|D sJ d S )Nc                       s    e Zd ZdZ fddZ  ZS )z-test_sparse_classification.<locals>.CustomSVC7SVC variant that records the nature of the training setc                    s   t  || t|| _| S Nsuperr<   type
data_type_selfXy	__class__rF   rG   r<   x   s    
z1test_sparse_classification.<locals>.CustomSVC.fit__name__
__module____qualname____doc__r<   __classcell__rF   rF   rX   rG   	CustomSVCu   s   r`   r   r'   Zlinearovr)Zkerneldecision_function_shaper+   r6   r(   c                 S   s   g | ]
}|j qS rF   rS   .0irF   rF   rG   
<listcomp>       z.test_sparse_classification.<locals>.<listcomp>c                    s   g | ]}| kqS rF   rF   rf   tZsparse_typerF   rG   rh      ri   )r   r"   r   r   r8   r9   r:   r   r<   getattrr#   rR   estimators_all)sparse_containerrE   methodr`   r>   r?   r@   rA   rB   X_train_sparseX_test_sparsesparse_classifiersparse_resultsZdense_classifierdense_resultstypesrF   rl   rG   test_sparse_classification[   s:    


rx   c                  C   s   t d} ttjd d tjd d | d\}}}}tddgddgddgddgd}d t t t t	 fD ]0}|D ]&}t
f || d	|||| qrqjd S )
Nr   2   r'   r)   r*   TFr-   rc   )r"   r   diabetesr9   r:   r   r	   r!   r   r   r   r<   r=   )r>   r?   r@   rA   rB   rC   r6   rE   rF   rF   rG   test_regression   s.    
r{   rp   c                    s$  t d}ttjd d tjd d |d\}}}}G dd dt}ddddd	d
dddd	ddddddddg}| |}| |}	|D ]}
tf | dd|
||}||	}tf | dd|
|||}t	| dd |j
D }t|| t fdd|D sJ t|| qd S )Nr   ry   r'   c                       s    e Zd ZdZ fddZ  ZS )z)test_sparse_regression.<locals>.CustomSVRrN   c                    s   t  || t|| _| S rO   rP   rT   rX   rF   rG   r<      s    
z-test_sparse_regression.<locals>.CustomSVR.fitrZ   rF   rF   rX   rG   	CustomSVR   s   r|   r)   r4   Tr-   r*   r,   FrI   rJ   r+   rc   c                 S   s   g | ]
}|j qS rF   rd   re   rF   rF   rG   rh      ri   z*test_sparse_regression.<locals>.<listcomp>c                    s   g | ]}| kqS rF   rF   rj   rl   rF   rG   rh      ri   )r"   r   rz   r9   r:   r   r   r<   r=   rR   rn   r#   ro   )rp   r>   r?   r@   rA   rB   r|   Zparameter_setsrr   rs   rE   rt   ru   rv   rw   rF   rl   rG   test_sparse_regression   sP    




r}   c                   @   s   e Zd Zdd Zdd ZdS )DummySizeEstimatorc                 C   s   |j d | _t|| _d S Nr   )shapetraining_size_joblibhashtraining_hash_rT   rF   rF   rG   r<      s    zDummySizeEstimator.fitc                 C   s   t |jd S r   )nponesr   rU   rV   rF   rF   rG   r=      s    zDummySizeEstimator.predictNr[   r\   r]   r<   r=   rF   rF   rF   rG   r~      s   r~   c                  C   s   t d} ttjtj| d\}}}}t ||}tt dd| d||}||||||ksfJ tt dd| d||}||||||ksJ tt	 dd||}g }|j
D ]$}|j|jd ksJ ||j qtt|t|ksJ d S )Nr   r'   r*   F)r6   r.   r0   r(   T)r6   r0   )r"   r   rz   r9   r:   r!   r<   r   scorer~   rn   r   r   appendr   lenset)r>   r?   r@   rA   rB   r6   ensembleZtraining_hashrF   rF   rG   test_bootstrap_samples   s>    

r   c                  C   s   t d} ttjtj| d\}}}}tt dd| d||}|jD ]$}tjj	d t
|j	d ksBJ qBtt dd| d||}|jD ]$}tjj	d t
|j	d ksJ qd S )Nr   r'   r*   F)r6   r/   r1   r(   r+   T)r"   r   rz   r9   r:   r   r!   r<   estimators_features_r   r   unique)r>   r?   r@   rA   rB   r   featuresrF   rF   rG   test_bootstrap_features#  s0    

"
r   c                  C   s  t d} ttjtj| d\}}}}tjddd tt | d	||}t
tj||ddtt| t
||t|| tt | dd		||}t
tj||ddtt| t
||t|| W d    n1 s0    Y  d S )
Nr   r'   ignore)divideinvalidrc   r+   )Zaxis   )r6   r(   r.   )r"   r   r8   r9   r:   r   Zerrstater   r    r<   r#   sumrK   r   r   exprL   r   r>   r?   r@   rA   rB   r   rF   rF   rG   test_probability?  s6    
r   c            	   	   C   s   t d} ttjtj| d\}}}}t t fD ]}t|ddd| d||}|	||}t
||j dk snJ d}tjt|d. t|d	dd| d}||| W d    q.1 s0    Y  q.d S )
Nr   r'   d   Tr6   r7   r0   	oob_scorer(   皙?{Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.matchr+   )r"   r   r8   r9   r:   r    r   r   r<   r   abs
oob_score_pytestwarnsUserWarning)	r>   r?   r@   rA   rB   r6   clf
test_scorewarn_msgrF   rF   rG   test_oob_score_classificationb  s6    
r   c            	      C   s   t d} ttjtj| d\}}}}tt ddd| d||}|||}t	||j
 dk s`J d}tjt|d0 tt d	dd| d}||| W d    n1 s0    Y  d S )
Nr   r'   ry   Tr   r   r   r   r+   )r"   r   rz   r9   r:   r   r!   r<   r   r   r   r   r   r   )	r>   r?   r@   rA   rB   r   r   r   ZregrrF   rF   rG   test_oob_score_regression  s4    
r   c                  C   sf   t d} ttjtj| d\}}}}tt ddd| d||}t ||}t|	||	| d S )Nr   r'   r+   F)r6   r7   r0   r1   r(   )
r"   r   rz   r9   r:   r   r   r<   r#   r=   )r>   r?   r@   rA   rB   Zclf1Zclf2rF   rF   rG   test_single_estimator  s    
r   c                  C   s2   t jt j } }t }tt|| |dr.J d S )NrM   )r8   r9   r:   r    hasattrr   r<   )rV   rW   baserF   rF   rG   
test_error  s    r   c                  C   s  t tjtjdd\} }}}tt ddd| |}||}|jdd ||}t	|| tt ddd| |}||}t	|| tt
ddddd| |}||}|jdd ||}	t	||	 tt
ddddd| |}||}
t	||
 d S )	Nr   r'      n_jobsr(   r+   r   ra   )rb   )r   r8   r9   r:   r   r    r<   rK   
set_paramsr#   r   rM   )r?   r@   rA   rB   r   y1y2y3Z
decisions1Z
decisions2Z
decisions3rF   rF   rG   test_parallel_classification  sF    









r   c            	      C   s   t d} ttjtj| d\}}}}tt ddd||}|jdd |	|}|jdd |	|}t
|| tt ddd||}|	|}t
|| d S )Nr   r'   r   r   r+   r   r4   )r"   r   rz   r9   r:   r   r!   r<   r   r=   r#   )	r>   r?   r@   rA   rB   r   r   r   r   rF   rF   rG   test_parallel_regression  s"    




r   c                  C   sD   t jt j } }d||dk< ddd}ttt |dd| | d S )Nr+   r4   )r+   r4   )r7   Zestimator__CZroc_auc)Zscoring)r8   r9   r:   r   r   r   r<   )rV   rW   
parametersrF   rF   rG   test_gridsearch	  s    
r   c                  C   s0  t d} ttjtj| d\}}}}td ddd||}t|jt	sHJ tt	 ddd||}t|jt	spJ tt
 ddd||}t|jt
sJ ttjtj| d\}}}}td ddd||}t|jtsJ tt ddd||}t|jtsJ tt ddd||}t|jts,J d S )Nr   r'   r   r   )r"   r   r8   r9   r:   r   r<   
isinstanceZ
estimator_r    r   rz   r   r!   r   r   rF   rF   rG   test_estimator  s6    

r   c                  C   sL   t ttddt dd} | tjtj t| d j	d d j
tsHJ d S )Nr+   )kr4   )r/   r   )r   r   r   r    r<   r8   r9   r:   r   stepsr(   int)r6   rF   rF   rG   test_bagging_with_pipelineA  s
    r   c                   @   s   e Zd Zdd Zdd ZdS )DummyZeroEstimatorc                 C   s   t || _| S rO   )r   r   classes_rT   rF   rF   rG   r<   J  s    zDummyZeroEstimator.fitc                 C   s   | j tj|jd td S )Nr   )dtype)r   r   Zzerosr   r   r   rF   rF   rG   r=   N  s    zDummyZeroEstimator.predictNr   rF   rF   rF   rG   r   I  s   r   c                  C   s~   t t } td}| tjtjtj t	t
6 | jtjtj|jdtjjd dd W d    n1 sp0    Y  d S )Nr   
   )size)Zsample_weight)r   r   r"   r<   r8   r9   r:   r=   r   raises
ValueErrorrandintr   )r6   r>   rF   rF   rG   1test_bagging_sample_weight_unsupported_but_passedR  s    
r   *   c                 C   s   t ddd\}}d }dD ]D}|d u r4t|| dd}n|j|d ||| t||ksJ qtd| d	d}||| td
d |D tdd |D ksJ d S )Nr2   r+   Z	n_samplesr(   )r   r   T)r7   r(   
warm_startr7   r   Fc                 S   s   g | ]
}|j qS rF   r'   rf   treerF   rF   rG   rh   t  ri   z#test_warm_start.<locals>.<listcomp>c                 S   s   g | ]
}|j qS rF   r'   r   rF   rF   rG   rh   u  ri   )r   r   r   r<   r   r   )r(   rV   rW   clf_wsr7   Z	clf_no_wsrF   rF   rG   test_warm_start_  s"    r   c                  C   sn   t ddd\} }tddd}|| | |jdd tt || | W d    n1 s`0    Y  d S )	Nr2   r+   r   r   T)r7   r   r,   r   )r   r   r<   r   r   r   r   rV   rW   r   rF   rF   rG   $test_warm_start_smaller_n_estimatorsy  s    r   c            	      C   s   t ddd\} }t| |dd\}}}}tdddd	}||| ||}|d
7 }d}tjt|d ||| W d    n1 s0    Y  t||| d S )Nr2   r+   r   +   r'   r   TS   r7   r   r(   r*   z;Warm-start fitting without increasing n_estimators does notr   )	r   r   r   r<   r=   r   r   r   r$   )	rV   rW   r?   r@   rA   rB   r   Zy_predr   rF   rF   rG   "test_warm_start_equal_n_estimators  s    
*r   c            
      C   s   t ddd\} }t| |dd\}}}}tdddd	}||| |jd
d ||| ||}td
ddd	}||| ||}	t||	 d S )Nr2   r+   r   r   r'   r   TiE  r   r   r   F)r   r   r   r<   r   r=   r#   )
rV   rW   r?   r@   rA   rB   r   r   r   r   rF   rF   rG   test_warm_start_equivalence  s    

r   c                  C   sX   t ddd\} }tdddd}tt || | W d    n1 sJ0    Y  d S )Nr2   r+   r   r   T)r7   r   r   )r   r   r   r   r   r<   r   rF   rF   rG   $test_warm_start_with_oob_score_fails  s    r   c                  C   s|   t ddd\} }tddd}|| | |jdddd	 || | tt t|d
 W d    n1 sn0    Y  d S )Nr   r+   r   r   T)r7   r   Fr   )r   r   r7   r   )r   r   r<   r   r   r   AttributeErrorrm   r   rF   rF   rG   $test_oob_score_removed_on_warm_start  s    r   c                  C   sH   t ddd\} }tt ddddd}|| |j|| |jksDJ d S )N   r+   r   r)   T)r.   r/   r   r(   )r   r   r   r<   r   rV   rW   baggingrF   rF   rG   test_oob_score_consistency  s    r   c                  C   s   t ddd\} }tt ddddd}|| | |j}|j}|j}t|t|ksVJ t|d t| d ksrJ |d jj	d	ksJ d}|| }|| }|| }	| | d d |f }
|| }|	j
}|	|
| |	j
}t|| d S )
Nr   r+   r   r)   F)r.   r/   r(   r0   r   r4   rg   )r   r   r   r<   estimators_samples_r   rn   r   r   kindcoef_r#   )rV   rW   r   Zestimators_samplesZestimators_featuresrD   Zestimator_indexZestimator_samplesZestimator_featuresr6   r?   rA   Z
orig_coefsZ	new_coefsrF   rF   rG   test_estimators_samples  s2    r   c                  C   s   t  } | j| j }}ttddt }t|ddd}||| |jd j	d d j
 }|jd }|jd }|jd }|| d d |f }	|| }
||	|
 t|j	d d j
| d S )Nr4   )Zn_componentsr)   r   )r6   r.   r(   r   r+   )r   r9   r:   r   r   r   r   r<   rn   r   r   copyr   r   r$   )r8   rV   rW   Zbase_pipeliner   Zpipeline_estimator_coefr6   Zestimator_sampleZestimator_featurer?   rA   rF   rF   rG   %test_estimators_samples_deterministic  s    


r   c                  C   sH   d} t d|  dd\}}tt | ddd}||| |j| ksDJ d S )Nr   r4   r+   r   r)   )r.   r/   r(   )r   r   r   r<   Z_max_samples)r.   rV   rW   r   rF   rF   rG   test_max_samples_consistency  s    r   c                  C   s   d} dgdgdggd }g dd }g dd }g dd }t d| d	||j}t d| d	||j}t d| d	||j}||g||gksJ d S )
Nr   r   r   r+   )ABC)r   r   r+   )r   r+   r4   T)r   r(   )r   r<   r   )r(   rV   ZY1ZY2ZY3x1Zx2Zx3rF   rF   rG   !test_set_oob_score_label_encoding  s$    r   c                 C   s"   | j ddd} d| t|  < | S )NfloatT)r   r   )Zastyper   isfinite)rV   rF   rF   rG   replace7  s    r   c               	   C   sN  t g dg ddt jdgdt jdgdt j dgg} t g dt g dg dg dg dg dgg}|D ]}t }ttt|}|| |	|  t
|}|| |	| }|j|jksJ t }t|}tt || | W d    n1 s0    Y  t
|}tt || | W d    qp1 s>0    Y  qpd S )Nr+   r   r   r4   N   r4   r   )r4   r   r   r   r   )r4   r+   	   )r   r      )r   arraynaninfr!   r   r   r   r<   r=   r   r   r   r   r   )rV   Zy_valuesrW   Z	regressorpipelineZbagging_regressory_hatrF   rF   rG   *test_bagging_regressor_with_missing_inputs=  s@    


*r   c               	   C   s4  t g dg ddt jdgdt jdgdt j dgg} t g d}t }ttt|}|| |	|  t
|}|| | |	| }|j|jksJ ||  ||  t }t|}tt || | W d    n1 s0    Y  t
|}tt || | W d    n1 s&0    Y  d S )Nr   r   r4   r   )r   r   r   r   r   )r   r   r   r   r    r   r   r   r<   r=   r   r   rL   rK   r   r   r   )rV   rW   
classifierr   Zbagging_classifierr   rF   rF   rG   +test_bagging_classifier_with_missing_inputsf  s2    

	


*r   c                  C   sD   t ddgddgg} t ddg}tt ddd}|| | d S )Nr+   r4   r   r,   r   g333333?)r/   r(   )r   r   r   r   r<   r   rF   rF   rG   test_bagging_small_max_features  s    r   c                  C   sj   t jd} | dd}t d}G dd dt}t| ddd}||| t|j	d j
|jd  d S )Nr      r,   c                   @   s   e Zd ZdZdd ZdS )z8test_bagging_get_estimators_indices.<locals>.MyEstimatorz7An estimator which stores y indices information at fit.c                 S   s
   || _ d S rO   )_sample_indicesrT   rF   rF   rG   r<     s    z<test_bagging_get_estimators_indices.<locals>.MyEstimator.fitN)r[   r\   r]   r^   r<   rF   rF   rF   rG   MyEstimator  s   r  r+   )r6   r7   r(   )r   randomZRandomStateZrandnZaranger!   r   r<   r$   rn   r   r   )r>   rV   rW   r  r   rF   rF   rG   #test_bagging_get_estimators_indices  s    
r  zbagging, expected_allow_nanr+   r3   c                 C   s   |   jj|ksJ dS )z*Check that bagging inherits allow_nan tag.N)Z__sklearn_tags__Z
input_tags	allow_nan)r   Zexpected_allow_nanrF   rF   rG   test_bagging_allow_nan_tag  s    r  modelr   )r6   r7   c                 C   s@   t jdd  | tjtj W d   n1 s20    Y  dS )zAMake sure that metadata routing works with non-default estimator.T)Zenable_metadata_routingN)sklearnZconfig_contextr<   r8   r9   r:   r  rF   rF   rG   "test_bagging_with_metadata_routing  s    r	  c                 C   s   |  tjtj dS )z^Make sure that we still can use an estimator that does not implement the
    metadata routing.N)r<   r8   r9   r:   r  rF   rF   rG   -test_bagging_without_support_metadata_routing  s    r
  )r   )lr^   	itertoolsr   r   r   numpyr   r   r  Zsklearn.baser   Zsklearn.datasetsr   r   r   Zsklearn.dummyr   r	   Zsklearn.ensembler
   r   r   r   r   r   r   r   Zsklearn.feature_selectionr   Zsklearn.linear_modelr   r   Zsklearn.model_selectionr   r   r   Zsklearn.neighborsr   r   Zsklearn.pipeliner   Zsklearn.preprocessingr   r   Zsklearn.random_projectionr   Zsklearn.svmr   r   Zsklearn.treer    r!   Zsklearn.utilsr"   Zsklearn.utils._testingr#   r$   Zsklearn.utils.fixesr%   r&   r>   r8   Zpermutationr:   r   permr9   rz   rH   markZparametrizerx   r{   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r	  r
  rF   rF   rF   rG   <module>   s   (
!


)
8	*#%$),	

() 
	




