a
    hU                     @   s  d Z ddlZddlZddlZddlmZ ddlmZm	Z	 ddl
mZmZ ddlmZmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZmZ ddlmZ ddlm Z  ddl!m"Z"m#Z#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z)m*Z* ej+,dZ-ddgddgddgddgddgddggZ.g dZ/g dZ0ddgddgddggZ1g dZ2g dZ3e4 Z5e-6e5j7j8Z9ee5j:e5j7e-d\e5_:e5_7e; Z<ee<j:e<j7e-d\e<_:e<_7dd Z=dd Z>dd Z?d d! Z@d"d# ZAejBCd$g d%d&d' ZDd(d) ZEd*d+ ZFd,d- ZGd.d/ ZHd0d1 ZId2d3 ZJd4d5 ZKejBCd6eLg e'e(e*e&e)e'd7e(  d8d9 ZMejBCd6eLg e'e(e*e&e)e'd7e(  d:d; ZNd<d= ZOd>d? ZPd@dA ZQdBdC ZRdDdE ZSejBCdFe e5j:e5j7fe e<j:e<j7fgdGdH ZTdIdJ ZUdKdL ZVdMdN ZWdS )Oz6Testing for the boost module (sklearn.ensemble.boost).    N)datasets)BaseEstimatorclone)DummyClassifierDummyRegressor)AdaBoostClassifierAdaBoostRegressor)_samme_proba)LinearRegression)GridSearchCVtrain_test_split)SVCSVR)DecisionTreeClassifierDecisionTreeRegressor)shuffle)NoSampleWeightWrapper)assert_allcloseassert_array_almost_equalassert_array_equal)COO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERSDOK_CONTAINERSLIL_CONTAINERS      )foor   r   r   r   r   )r   r   r   r   r   r      )r   r   r   )r   r   r   random_statec                     s   t g dg dg dg dg  t  jddd d t jf   G  fddd} |  }t|d	t  }t|j j t 	|
 sJ tt j|ddg d
 tt j|ddg d d S )N)r   ư>r   )gRQ?g333333?皙?)igRQ?g      ?)r#   r   g&.>r   Zaxisc                       s   e Zd Z fddZdS )z'test_samme_proba.<locals>.MockEstimatorc                    s   t |j j  S N)r   shapeselfXZprobs g/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/ensemble/tests/test_weight_boosting.pypredict_probaC   s    z5test_samme_proba.<locals>.MockEstimator.predict_probaN)__name__
__module____qualname__r.   r,   r+   r,   r-   MockEstimatorB   s   r2   r    )r   r   r   r   )r   r   r   r   )nparrayabssumnewaxisr	   	ones_liker   r'   isfiniteallZargminargmax)r2   ZmockZsamme_probar,   r+   r-   test_samme_proba7   s    $r<   c                  C   s>   t tt} t t| }t|tt ttdf d S )Nr   )r3   Zoneslenr*   r   fitr   r.   )Zy_tclfr,   r,   r-   test_oneclass_adaboost_probaT   s    r@   c                  C   sx   t dd} | tt t| tt tt	t
t| j | tjttdfksZJ | tjttfkstJ d S )Nr   r!   r   )r   r>   r*   y_classr   predictT	y_t_classr3   uniqueasarrayclasses_r.   r'   r=   decision_functionr?   r,   r,   r-   test_classification_toy]   s    
rJ   c                  C   s*   t dd} | tt t| tt d S Nr   r!   )r   r>   r*   y_regrr   rB   rC   y_t_regrrI   r,   r,   r-   test_regression_toyg   s    
rN   c                  C   s   t tj} t }|tjtj t| |j |	tj}|j
d t| ksPJ |tjj
d t| ksnJ |tjtj}|dksJ d|t|jdksJ ttdd |jD t|jksJ d S )Nr   g?zFailed with score = c                 s   s   | ]}|j V  qd S r&   r!   .0Zestr,   r,   r-   	<genexpr>       ztest_iris.<locals>.<genexpr>)r3   rE   iristargetr   r>   datar   rG   r.   r'   r=   rH   scoreestimators_set)classesr?   probarV   r,   r,   r-   	test_irisn   s    r[   loss)ZlinearZsquareZexponentialc                 C   st   t | dd}|tjtj |tjtj}|dks8J t|jdksJJ ttdd |jD t|jkspJ d S )Nr   )r\   r"   g?r   c                 s   s   | ]}|j V  qd S r&   r!   rO   r,   r,   r-   rQ      rR   z test_diabetes.<locals>.<genexpr>)	r   r>   diabetesrU   rT   rV   r=   rW   rX   )r\   regrV   r,   r,   r-   test_diabetes   s    r_   c            
      C   s  t jd} | jdtjjd}| jdtjjd}tdd}|j	tj
tj|d |tj
}dd |tj
D }|tj
}dd |tj
D }|jtj
tj|d}d	d |jtj
tj|dD }	t|dksJ t||d
  t|dksJ t||d
  t|	dksJ t||	d
  tddd}|j	tj
tj|d |tj
}dd |tj
D }|jtj
tj|d}dd |jtj
tj|dD }	t|dksJ t||d
  t|	dksJ t||	d
  d S )Nr   
   sizen_estimatorssample_weightc                 S   s   g | ]}|qS r,   r,   rP   pr,   r,   r-   
<listcomp>   rR   z'test_staged_predict.<locals>.<listcomp>c                 S   s   g | ]}|qS r,   r,   rg   r,   r,   r-   ri      rR   c                 S   s   g | ]}|qS r,   r,   rP   sr,   r,   r-   ri      s   r   rd   r"   c                 S   s   g | ]}|qS r,   r,   rg   r,   r,   r-   ri      rR   c                 S   s   g | ]}|qS r,   r,   rj   r,   r,   r-   ri      s   )r3   randomRandomStaterandintrS   rT   r'   r]   r   r>   rU   rB   staged_predictr.   staged_predict_probarV   staged_scorer=   r   r   )
rngZiris_weightsZdiabetes_weightsr?   ZpredictionsZstaged_predictionsrZ   Zstaged_probasrV   Zstaged_scoresr,   r,   r-   test_staged_predict   sB    

rt   c                  C   sf   t t d} ddd}t| |}|tjtj tt dd} ddd}t| |}|t	jt	j d S )N	estimator)r   r   )rd   Zestimator__max_depthr   rv   r"   )
r   r   r   r>   rS   rU   rT   r   r   r]   )boost
parametersr?   r,   r,   r-   test_gridsearch   s    


rz   c                  C   s   dd l } t }|tjtj |tjtj}| |}| |}t	||j
ksTJ |tjtj}||kspJ tdd}|tjtj |tjtj}| |}| |}t	||j
ksJ |tjtj}||ksJ d S rK   )pickler   r>   rS   rU   rT   rV   dumpsloadstype	__class__r   r]   )r{   objrV   rk   obj2Zscore2r,   r,   r-   test_pickle   s"    




r   c               	   C   sp   t jdddddddd\} }t }|| | |j}|jd dksFJ |d dtjf |dd  k slJ d S )Ni  r`   r    r   Fr   )	n_samples
n_featuresZn_informativeZn_redundantZ
n_repeatedr   r"   )	r   make_classificationr   r>   feature_importances_r'   r3   r7   r:   )r*   yr?   Zimportancesr,   r,   r-   test_importances   s    

r   c                  C   sZ   t  } td}tjt|d( | jttt	
dgd W d    n1 sL0    Y  d S )Nz*sample_weight.shape == (1,), expected (6,)matchr   re   )r   reescapepytestraises
ValueErrorr>   r*   rA   r3   rF   )r?   msgr,   r,   r-   ,test_adaboost_classifier_sample_weight_error  s    
r   c                  C   s   ddl m}  t|  }|tt tt }|tt ddl m} t	| dd}|tt t	t
 dd}|tt ddgddgddgddgg}g d}tt }tjtdd ||| W d    n1 s0    Y  d S )	Nr   )RandomForestClassifier)RandomForestRegressorr!   r   )r   barr   r   zworse than randomr   )sklearn.ensembler   r   r>   r*   rL   r   rA   r   r   r   r   r   r   )r   r?   r   ZX_failZy_failr,   r,   r-   test_estimator	  s    


r   c                  C   sR   d} t ddd}tjt| d  |tjtj W d    n1 sD0    Y  d S )Nz+Sample weights have reached infinite values   g      7@)rd   Zlearning_rater   )r   r   warnsUserWarningr>   rS   rU   rT   )r   r?   r,   r,   r-   test_sample_weights_infinite%  s    r   z(sparse_container, expected_internal_type   c                    s(  G dd dt }tjddddd\}}t|}t||dd	\}}}}| |}	| |}
t|d
ddd|	|}t|d
ddd||}||
}||}t	|| |
|
}|
|}t|| ||
}||}t|| ||
}||}t|| ||
|}|||}t|| ||
}||}t||D ]\}}t|| qB||
}||}t||D ]\}}t	|| qv||
}||}t||D ]\}}t|| q||
|}|||}t||D ]\}}t	|| qdd |jD }t fdd|D s$J d S )Nc                       s"   e Zd ZdZd fdd	Z  ZS )z-test_sparse_classification.<locals>.CustomSVCz8SVC variant that records the nature of the training set.Nc                    s    t  j|||d t|| _| S z<Modification on fit caries data type for later verification.re   superr>   r~   
data_type_r)   r*   r   rf   r   r,   r-   r>   ?  s    
z1test_sparse_classification.<locals>.CustomSVC.fit)Nr/   r0   r1   __doc__r>   __classcell__r,   r,   r   r-   	CustomSVC<  s   r   r         *   )	n_classesr   r   r"   r   r!   T)Zprobabilityrw   c                 S   s   g | ]
}|j qS r,   r   rP   ir,   r,   r-   ri     rR   z.test_sparse_classification.<locals>.<listcomp>c                    s   g | ]}| kqS r,   r,   rP   texpected_internal_typer,   r-   ri     rR   )r   r   Zmake_multilabel_classificationr3   Zravelr   r   r>   rB   r   rH   r   Zpredict_log_probar.   rV   staged_decision_functionziprp   rq   rr   rW   r:   )sparse_containerr   r   r*   r   X_trainX_testy_trainy_testX_train_sparseX_test_sparseZsparse_classifierZdense_classifierZsparse_clf_resultsZdense_clf_resultsZsparse_clf_resZdense_clf_restypesr,   r   r-   test_sparse_classification,  sj    	




















r   c                    s   G dd dt }tjddddd\}}t||dd	\}}}}| |}	| |}
t| dd
|	|}t| dd
||}||
}||}t|| ||
}||}t	||D ]\}}t|| qdd |j
D }t fdd|D sJ d S )Nc                       s"   e Zd ZdZd fdd	Z  ZS )z)test_sparse_regression.<locals>.CustomSVRz8SVR variant that records the nature of the training set.Nc                    s    t  j|||d t|| _| S r   r   r   r   r,   r-   r>     s    
z-test_sparse_regression.<locals>.CustomSVR.fit)Nr   r,   r,   r   r-   	CustomSVR  s   r   r   2   r   r   )r   r   	n_targetsr"   r   r!   rw   c                 S   s   g | ]
}|j qS r,   r   r   r,   r,   r-   ri     rR   z*test_sparse_regression.<locals>.<listcomp>c                    s   g | ]}| kqS r,   r,   r   r   r,   r-   ri     rR   )r   r   Zmake_regressionr   r   r>   rB   r   rp   r   rW   r:   )r   r   r   r*   r   r   r   r   r   r   r   Zsparse_regressorZdense_regressorZsparse_regr_resultsZdense_regr_resultsZsparse_regr_resZdense_regr_resr   r,   r   r-   test_sparse_regression  s,    	





r   c                  C   sF   G dd dt } t|  dd}|tt t|jt|jksBJ dS )z
    AdaBoostRegressor should work without sample_weights in the base estimator
    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    c                   @   s   e Zd Zdd Zdd ZdS )z=test_sample_weight_adaboost_regressor.<locals>.DummyEstimatorc                 S   s   d S r&   r,   )r)   r*   r   r,   r,   r-   r>     s    zAtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.fitc                 S   s   t |jd S )Nr   )r3   Zzerosr'   r(   r,   r,   r-   rB     s    zEtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.predictN)r/   r0   r1   r>   rB   r,   r,   r,   r-   DummyEstimator  s   r   r    rc   N)r   r   r>   r*   rL   r=   Zestimator_weights_Zestimator_errors_)r   rx   r,   r,   r-   %test_sample_weight_adaboost_regressor  s    r   c                  C   s   t jd} | ddd}| ddgd}| d}ttdd}||| || |	| t
t }||| || dS )zX
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    r   3   r    r   Zmost_frequent)ZstrategyN)r3   rm   rn   Zrandnchoicer   r   r>   rB   r.   r   r   )rs   r*   Zycyrrx   r,   r,   r-   test_multidimensional_X  s    



r   c                  C   sn   t jt j } }tt }t|d}d|jj}t	j
t|d || | W d    n1 s`0    Y  d S )Nru   z {} doesn't support sample_weightr   )rS   rU   rT   r   r   r   formatr   r/   r   r   r   r>   )r*   r   rv   r?   err_msgr,   r,   r-   -test_adaboostclassifier_without_sample_weight  s    

r   c            
      C   sR  t jd} t jdddd}d| d | |jd d  }|d	d
}|d	  d9  < d|d	< tt d
dd}t	|}t	|}|
|| |
|d d	 |d d	  t |}d|d	< |j
|||d ||d d	 |d d	 }||d d	 |d d	 }||d d	 |d d	 }	||k s,J ||	k s:J |t|	ksNJ d S )Nr   r   d     )numg?r$   g-C6?r   r   r`   i'  rv   rd   r"   re   )r3   rm   rn   ZlinspaceZrandr'   Zreshaper   r
   r   r>   r8   rV   r   Zapprox)
rs   r*   r   Zregr_no_outlierZregr_with_weightZregr_with_outlierrf   Zscore_with_outlierZscore_no_outlierZscore_with_weightr,   r,   r-   $test_adaboostregressor_sample_weight   s,     
r   c                  C   sX   t tjddddi\} }}}tdd}|| | ttj||dd|	| d S )NT)Z
return_X_yr"   r   r!   r   r%   )
r   r   Zload_digitsr   r>   r   r3   r;   r.   rB   )r   r   r   r   modelr,   r,   r-    test_adaboost_consistent_predict&  s    

r   zmodel, X, yc                 C   sX   t |}d|d< d}tjt|d  | j|||d W d    n1 sJ0    Y  d S )Nir   z1Negative values in data passed to `sample_weight`r   re   )r3   r8   r   r   r   r>   )r   r*   r   rf   r   r,   r,   r-   #test_adaboost_negative_weight_error5  s
    
r   c                  C   s~   t jd} | jdd}| jddgdd}t |d }tdd	d
}t|dd	d}|j|||d t 	|j
 dkszJ dS )zCheck that we don't create NaN feature importance with numerically
    instable inputs.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20320
    r   )r   r`   ra   r   r   r   gtDS 'T	r`      )	max_depthr"      r   re   N)r3   rm   rn   normalr   r8   r   r   r>   isnanr   r6   )rs   r*   r   rf   treeZ	ada_modelr,   r,   r-   Ftest_adaboost_numerically_stable_feature_importance_with_small_weightsE  s    r   c                 C   s  d}t j|d| d\}}td| d||}||}t|jddddd tt	|dd	|d  hksnJ |
|D ]<}t|jddddd tt	|dd	|d  hksxJ qx|jd
d|| ||}t|jddddd |
|D ]}t|jddddd qdS )zCheck that the decision function respects the symmetric constraint for weak
    learners.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/26520
    r    r   )r   Zn_clusters_per_classr"   rl   r%   r   g:0yE>)Zatolr   r   rc   N)r   r   r   r>   rH   r   r6   rX   r3   rE   r   Z
set_params)Zglobal_random_seedr   r*   r   r?   Zy_scorer,   r,   r-   test_adaboost_decision_functionV  s     

"$
r   c                  C   sJ   t ddd} tjtdd | tt W d    n1 s<0    Y  d S )Nr   ZSAMME)rd   	algorithmz'The parameter 'algorithm' is deprecatedr   )r   r   r   FutureWarningr>   r*   rA   )Zadaboost_clfr,   r,   r-   test_deprecated_algorithm|  s    r   )Xr   r   numpyr3   r   Zsklearnr   Zsklearn.baser   r   Zsklearn.dummyr   r   r   r   r   Z!sklearn.ensemble._weight_boostingr	   Zsklearn.linear_modelr
   Zsklearn.model_selectionr   r   Zsklearn.svmr   r   Zsklearn.treer   r   Zsklearn.utilsr   Zsklearn.utils._mockingr   Zsklearn.utils._testingr   r   r   Zsklearn.utils.fixesr   r   r   r   r   rm   rn   rs   r*   rA   rL   rC   rD   rM   Z	load_irisrS   ZpermutationrT   rb   permrU   Zload_diabetesr]   r<   r@   rJ   rN   r[   markZparametrizer_   rt   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r,   r,   r,   r-   <module>   s   	(
	

-

Z

/	&
	&