a
    h                  
   @   sJ  d Z ddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZmZ ddlmZmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0m1Z1 ddl2m3Z3m4Z4m5Z5 eegZ6ddgddgddgddgddgddggZ7g dZ8ddgddgddggZ9g dZ:eddd d!d"d#\Z;Z<e%e<Z<ej=>dZ?e	@ ZAe?BeAjCjDZEeAjFeE eA_FeAjCeE eA_Cd$d% ZGd&d' ZHejIJd(d)d*d+ ZKejIJd(d)d,d- ZLejIJd(d.ejIJd/d0d1d2 ZMejIJd/d0ejIJd3d4d5d6 ZNd7d8 ZOejIJd9ee;e<feeAjFeAjCfgd:d; ZPd<d= ZQd>d? ZRejIJd@e4dAdB ZSdCdD ZTdEdF ZUdGdH ZVdIdJ ZWdKdL ZXejIJdMe6dNdO ZYdPdQ ZZdRdS Z[dTdU Z\dVdW Z]dXdY Z^dZd[ Z_d\d] Z`ejIJd^e6d_d` ZaejIJd^e6dadb ZbejIJdceddfedefedffeddfedefedffgdgdh Zcdidj Zddkdl Zedmdn ZfejIJdoe6dpdq ZgejIJdoe6drds ZhejIJdoe6dtdu ZiejIJdoe6dvdw ZjejIJdxe6dydz ZkejIJdoe6d{d| ZlejIJdoe6d}d~ ZmejIJdoe6dd ZnejIJdoe6dd ZoejIJdoe6ejIJde3e4 e5 dd ZpejIJdoe6dd Zqdd ZrejIJdoe6dd Zsdd Ztdd Zudd Zvdd ZwejIJde6dd ZxejIJde6dd Zydd ZzejIJdg ddd Z{dd Z|e1ejIJdeefejIJde3e4 e5 dd Z}ejIJd^eegdd Z~dd Zdd Zdd Zdd ZejIjJdeeefeeefeeefgg dddd Zdd Zdd Zdd Zdd Zdd Zdd Ze1dd Zdd ZddĄ ZddƄ ZdS )zP
Testing for the gradient boosting module (sklearn.ensemble.gradient_boosting).
    N)assert_allclose)datasets)clone)make_classificationmake_regression)DummyClassifierDummyRegressor)GradientBoostingClassifierGradientBoostingRegressor)_safe_divide)predict_stages)DataConversionWarningNotFittedError)LinearRegression)mean_squared_error)train_test_split)make_pipeline)scale)NuSVR)check_random_state)NoSampleWeightWrapper)InvalidParameterError)assert_array_almost_equalassert_array_equalskip_if_32bit)COO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERS      )r   r   r   r    r    r       )r   r    r    d         
      )	n_samples
n_featuresZn_informativenoiserandom_statec                  C   sP   t dd} d}tjt|d  | tjtj W d   n1 sB0    Y  dS )z/Test exponential loss raises for n_classes > 2.exponentiallossz?loss='exponential' is only suitable for a binary classificationmatchN)r	   pytestraises
ValueErrorfitirisdatatargetclfmsg r;   i/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/ensemble/tests/test_gradient_boosting.pytest_exponential_n_classes_gt_2;   s    
r=   c                  C   sL   t td} d}tjt|d | tt W d   n1 s>0    Y  dS )z0Test raise if init_ has no predict_proba method.initzThe 'init' parameter of GradientBoostingClassifier must be a str among {'zero'}, None or an object implementing 'fit' and 'predict_proba'.r/   N)r	   r
   r1   r2   r3   r4   Xyr8   r;   r;   r<   'test_raise_if_init_has_no_predict_probaC   s
    
rB   r.   log_lossr,   c                 C   s   t | d|d}tt |t W d    n1 s80    Y  |tt t	|tt
 dt|jkspJ |jd d |jdd   }t|dksJ |t}|jdksJ d S )Nr&   )r.   n_estimatorsr+   r   r            )   r&   r    )r	   r1   r2   r3   predictTr4   r@   rA   r   true_resultlenestimators_train_score_npanyapplyshape)r.   global_random_seedr9   Zlog_loss_decreaseleavesr;   r;   r<   test_classification_toyN   s    (
rT   c                 C   s  t jd|d\}}d}|d | ||d   }}|d | ||d   }}dd| |d}	tf ddi|	}
|
|| tf dd	i|	}||| |
|||||k sJ d
d| |d}	tf ddi|	}||| tf ddi|	}||| ||||||ksJ d S )N  r(   r+   i  r          ?)	max_depthlearning_rater.   r+   rE   r&   2      )rE   rY   r.   r+   rX   max_leaf_nodes)r   make_hastie_10_2r	   r4   score)r.   rR   r@   rA   Z	split_idxX_trainX_testy_trainy_testZcommon_paramsZgbrt_10_stumpsZgbrt_50_stumpsZgbrt_stumpsZgbrt_10_nodesr;   r;   r<   test_classification_syntheticc   s0    rc   )squared_errorabsolute_errorhuber	subsample)rW         ?c           
   
   C   s   t tt}d }d |d| fD ]n}td| d|d|dd}|jtt|d |t}|jdksbJ |	t}t
t|}	|	dk sJ |d ur|}q d S )	Nr!      r$   rh   )rE   r.   rX   rg   min_samples_splitr+   rY   sample_weight)r#   ri   g?)rN   onesrK   y_regr
   r4   X_regrP   rQ   rH   r   )
r.   rg   rR   rm   Zlast_y_predrl   regrS   y_predmser;   r;   r<   test_regression_dataset   s(    



	rs   rl   )Nr    c                 C   sv   |dkrt ttj}tdd|| d}|jtjtj|d |tjtj}|dksXJ |	tj}|j
dksrJ d S )Nr    r#   rD   rE   r.   r+   rg   rk   ?)   r#   r"   )rN   rm   rK   r5   r7   r	   r4   r6   r^   rP   rQ   )rg   rl   rR   r9   r^   rS   r;   r;   r<   	test_iris   s    rw   c                 C   s  t | }ddddd| d}tjd|dd	\}}|d d
 |d d
  }}|d
d  |d
d   }}tf i |}	|	|| t||	|}
|
dk sJ tjd|d\}}|d d
 |d d
  }}|d
d  |d
d   }}tf i |}	|	|| t||	|}
|
dk sJ tjd|d\}}|d d
 |d d
  }}|d
d  |d
d   }}tf i |}	|	|| t||	|}
|
dk sJ d S )Nr#   r$   r!   皙?rd   )rE   rX   rj   rY   r.   r+     rW   r(   r+   r*   r[   g      @rV   g     @g?)	r   r   make_friedman1r
   r4   r   rH   Zmake_friedman2Zmake_friedman3)rR   r+   Zregression_paramsr@   rA   r_   ra   r`   rb   r9   rr   r;   r;   r<   test_regression_synthetic   s:    
r|   zGradientBoosting, X, yc                 C   s2   |  }t |drJ ||| t |ds.J d S )Nfeature_importances_)hasattrr4   )GradientBoostingr@   rA   Zgbdtr;   r;   r<   test_feature_importances   s    
r   c                 C   s   t d| d}tt |t W d    n1 s60    Y  |tt t	|
tt |t}t|dksxJ t|dksJ |jj|jdddd}t	|t d S )Nr#   rE   r+   rF   rW   r    )Zaxisr   )r	   r1   r2   r3   predict_probarI   r4   r@   rA   r   rH   rJ   rN   allZclasses_ZtakeZargmax)rR   r9   Zy_probarq   r;   r;   r<   test_probability_log
  s    (
r   c                  C   sZ   g d} t ddd}d}tjt|d  |jtt| d W d    n1 sL0    Y  d S )N)r   r   r   r    r    r    r#   r    r   zty contains 1 class after sample_weight trimmed classes with zero weights, while a minimum of 2 classes are required.r/   rk   )r	   r1   r2   r3   r4   r@   rA   )rl   r9   r:   r;   r;   r<   $test_single_class_with_sample_weight  s    r   csc_containerc                 C   s   t jddd\}}| |}tddd}||| t|jdd}d}tj	t
|d" t|j||j| W d    n1 s0    Y  t|}tj	t
dd" t|j||j| W d    n1 s0    Y  d S )	Nr#   r    rV   r   r   z3When X is a sparse matrix, a CSR format is expectedr/   z X should be C-ordered np.ndarray)r   r]   r	   r4   rN   zerosrQ   Zreshaper1   r2   r3   r   rL   rY   asfortranarray)r   xrA   Zx_sparse_cscr9   r^   err_msgZ	x_fortranr;   r;   r<    test_check_inputs_predict_stages)  s    0
r   c           	      C   s   t jd| d\}}|d d |dd   }}|d d |dd   }}tddddd| d}||| ||||}|d	k sJ d
| d S )N.  rV   rU   r#      r!   rx   )rE   rj   rX   rY   max_featuresr+   rh   zGB failed with deviance %.4f)r   r]   r	   r4   _lossdecision_function)	rR   r@   rA   r_   r`   ra   rb   gbrtrD   r;   r;   r<   test_max_feature_regression:  s    r   c                    s   |    j  j }}t|||d\}}}}tdddd|d}||| t|jddd }	 fd	d
|	D }
|
d dksJ t|
dd h dksJ dS )a  Test that Gini importance is calculated correctly.

    This test follows the example from [1]_ (pg. 373).

    .. [1] Friedman, J., Hastie, T., & Tibshirani, R. (2001). The elements
       of statistical learning. New York: Springer series in statistics.
    r+   rf   rx   rG   r#   )r.   rY   r\   rE   r+   Nr   c                    s   g | ]} j | qS r;   )Zfeature_names).0sZ
californiar;   r<   
<listcomp>g      z6test_feature_importance_regression.<locals>.<listcomp>r   ZMedIncr    r$   >   ZAveOccupZ	LongitudeZLatitude)	r6   r7   r   r
   r4   rN   Zargsortr}   set)Zfetch_california_housing_fxtrR   r@   rA   r_   r`   ra   rb   rp   Z
sorted_idxZsorted_featuresr;   r   r<   "test_feature_importance_regressionN  s"    
r   c                  C   sF  t jddd\} }| j\}}| d d }|d d }tdd d}||| |j|ksZJ tdd d}||| |j|ksJ tddd}||| |jt|d ksJ tddd}||| |jtt	|ksJ tddd}||| |jtt
|ksJ tdd	| jd  d}||| |jdksBJ d S )
Nr   r    rV   rU   )rE   r   333333?sqrtlog2g{Gz?)r   r]   rQ   r	   r4   Zmax_features_r
   intrN   r   r   )r@   rA   _r)   r_   ra   r   r;   r;   r<   test_max_featuresr  s,    
r   c                  C   s   t jdddd\} }| d d |d d  }}| dd  }t }tt& tj||tj	d W d    n1 sv0    Y  |
|| ||}||D ]}|j|jksJ qt|| d S )Nry   r    rW   rz   r[   Zdtype)r   r{   r
   r1   r2   r3   rN   fromiterstaged_predictfloat64r4   rH   rQ   r   )r@   rA   r_   ra   r`   r9   rq   r;   r;   r<   test_staged_predict  s    4
r   c            	      C   s  t jddd\} }| d d |d d  }}| dd  |dd   }}tdd}tt& tj||tj	d W d    n1 s0    Y  |
|| ||D ]}|j|jksJ qt||| ||D ].}|jd |jd ksJ d	|jd ksJ qt||| d S )
Nry   r    rV   r[      rE   r   r   r!   )r   r]   r	   r1   r2   r   rN   r   Zstaged_predict_probar   r4   r   rQ   r   rH   r   r   )	r@   rA   r_   ra   r`   rb   r9   rq   Zstaged_probar;   r;   r<   test_staged_predict_proba  s    
4r   	Estimatorc           	   	   C   s   t j|}|jdd}d|d d df  td }|  }||| dD ]|}t|d| d }|d u rjqLtj	dd	 t
||}W d    n1 s0    Y  d|d d d < t |d dksLJ qLd S )
N)r&   r"   )sizer$   r   r    )rH   r   r   Zstaged_T)record)rN   randomRandomStateuniformZastyper   r4   getattrwarningscatch_warningslistr   )	r   rR   rngr@   rA   	estimatorfuncZstaged_funcZstaged_resultr;   r;   r<   test_staged_functions_defensive  s    *r   c                  C   s   t ddd} | tt t| tt dt| j	ks:J zdd l
}W n ty`   dd l}Y n0 |j| |jd}d } ||} t| tt dt| j	ksJ d S )Nr#   r    r   r   )protocol)r	   r4   r@   rA   r   rH   rI   rJ   rK   rL   cPickleImportErrorpickledumpsHIGHEST_PROTOCOLloads)r9   r   Zserialized_clfr;   r;   r<   test_serialization  s    
r   c                  C   s   t ddd} tt& | tttt W d    n1 sB0    Y  t	ddd} | tttt | 
tdg ttjdtjd| 
tdg d S )Nr#   r    r   r!   )r    r   )r	   r1   r2   r3   r4   r@   rN   rm   rK   r
   rH   r   randr   r   r9   r;   r;   r<   test_degenerate_targets  s    4r   c                 C   s\   t dddd| d}|tt |t}t ddd| d}|tt |t}t|| d S )Nr#   quantiler$   rh   )rE   r.   rX   alphar+   re   )rE   r.   rX   r+   )r
   r4   ro   rn   rH   r   )rR   Zclf_quantileZ
y_quantileZclf_aeZy_aer;   r;   r<   test_quantile_loss  s$    

r   c                  C   sV   t ddd} tttt}| t| t| t	tttt
 dt| jksRJ d S )Nr#   r    r   )r	   r   mapstrrA   r4   r@   r   rH   rI   rJ   rK   rL   )r9   Zsymbol_yr;   r;   r<   test_symbol_labels  s
    r   c                  C   sZ   t ddd} tjttjd}| t| t| t	tjt
tjd dt| jksVJ d S Nr#   r    r   r   )r	   rN   asarrayrA   Zfloat32r4   r@   r   rH   rI   rJ   rK   rL   )r9   Zfloat_yr;   r;   r<   test_float_class_labels  s
    r   c                  C   s   t ddd} tjttjd}|d d tjf }d}tjt|d | 	t
| W d    n1 sb0    Y  t| tt dt| jksJ d S )Nr#   r    r   r   zA column-vector y was passed when a 1d array was expected. Please change the shape of y to \(n_samples, \), for example using ravel().r/   )r	   rN   r   rA   int32Znewaxisr1   warnsr   r4   r@   r   rH   rI   rJ   rK   rL   )r9   y_Zwarn_msgr;   r;   r<   test_shape_y'  s    *r   c                  C   s6  t t} tddd}|| t t|tt	 dt
|jksDJ t t} tddd}|| t t|tt	 dt
|jksJ t jtt jd}t |}tddd}|t| t|tt	 dt
|jksJ t jtt jd}t |}tddd}|t| t|tt	 dt
|jks2J d S r   )rN   r   r@   r	   r4   rA   r   rH   rI   rJ   rK   rL   Zascontiguousarrayr   r   )ZX_r9   r   r;   r;   r<   test_mem_layout<  s,    



r   GradientBoostingEstimatorc                 C   sT   | dddd}| tt |jjd dks.J t|jd d tg ddd	 d S )
Nr#   r    rh   rE   r+   rg   r   r   )gRQ?g333333?gQ?g)\(g)\(?r!   )decimal)r4   r@   rA   oob_improvement_rQ   r   rN   array)r   r   r;   r;   r<   test_oob_improvementY  s    r   c                 C   s   t jddd\}}| dddd}||| |jjd dks@J |jd t|jksZJ | ddddd	}||| |jjd dk sJ |jd t|jksJ d S )
Nr#   r    rV   rh   r   r   r   r   )rE   r+   rg   n_iter_no_change)r   r]   r4   oob_scores_rQ   r1   approx
oob_score_)r   r@   rA   r   r;   r;   r<   test_oob_scoresi  s     r   z(GradientBoostingEstimator, oob_attributer   r   r   c                 C   s`   t jddd\}}| dddd}||| tt |j W d   n1 sR0    Y  dS )zZ
    Check that we raise an AttributeError when the OOB statistics were not computed.
    r#   r    rV   rW   r   N)r   r]   r4   r1   r2   AttributeErroroob_attribute)r   r   r@   rA   r   r;   r;   r<   test_oob_attributes_error  s    r   c                  C   s   t ddddd} | tjtj | tjtj}|dks<J | jjd | jksRJ | j	jd | jkshJ | j	d t
| jksJ t ddddd	d
} | tjtj | tjtj}| jjd | jk sJ | j	jd | jk sJ | j	d t
| jksJ d S )Nr#   rD   r    rh   rt   ru   r   r   r   )rE   r.   r+   rg   r   )r	   r4   r5   r6   r7   r^   r   rQ   rE   r   r1   r   r   )r   r^   r;   r;   r<   test_oob_multilcass_iris  s*    r   c                  C   s   dd l } ddlm} | j}| | _tddddd}|tt | j}|| _|d |	 
 }ddgd	gd
  d }||ksJ tdd | D }d|ksJ d S )Nr   StringIOr#   r    皙?)rE   r+   verboserg    %10s%16sr"   )Iter
Train LosszOOB ImproveRemaining Timec                 s   s   | ]
}d V  qdS r    Nr;   r   lr;   r;   r<   	<genexpr>  r   z&test_verbose_output.<locals>.<genexpr>   sysior   stdoutr	   r4   r@   rA   seekreadlinerstripjoinsum	readlinesr   r   Z
old_stdoutr9   Zverbose_outputheaderZtrue_headerZn_linesr;   r;   r<   test_verbose_output  s     
r   c                  C   s   dd l } ddlm} | j}| | _tdddd}|tt | j}|| _|d |	 
 }ddgd	gd  d
 }||ksJ tdd | D }d|ksJ d S )Nr   r   r#   r    r!   )rE   r+   r   r   r   r   )r   r   r   c                 s   s   | ]
}d V  qdS r   r;   r   r;   r;   r<   r     r   z+test_more_verbose_output.<locals>.<genexpr>r   r   r;   r;   r<   test_more_verbose_output  s    
r   Clsc                 C   s   t jd|d\}}| dd|d}||| | ddd|d}||| |jdd ||| | tu rt|||| n,t|||| t|||| d S )	Nr#   rV   r[   r    rE   rX   r+   TrE   rX   
warm_startr+   r   )	r   r]   r4   
set_paramsr
   r   rH   r   r   r   rR   r@   rA   estest_wsr;   r;   r<   test_warm_start  s    r  c                 C   sz   t jd|d\}}| dd|d}||| | ddd|d}||| |jdd ||| t|||| d S )	Nr#   rV   i,  r    r   Tr   r   )r   r]   r4   r   r   rH   r  r;   r;   r<   test_warm_start_n_estimators  s    r  c                 C   s   t jddd\}}| dddd}||| |jddd ||| |jd	 jdksZJ tdd
D ]}|j| df jdksdJ qdd S )Nr#   r    rV   TrE   rX   r   n   r!   rE   rX   r   r      r   )r   r]   r4   r   rL   rX   range)r   r@   rA   r  ir;   r;   r<   test_warm_start_max_depth  s    r  c                 C   sv   t jddd\}}| ddd}||| | dddd}||| |jdd ||| t|||| d S )	Nr#   r    rV   r  Tr  F)r   )r   r]   r4   r   r   rH   )r   r@   rA   r  Zest_2r;   r;   r<   test_warm_start_clear+  s    r  r   c                 C   s  t jddd\}}d}| |ddddd}||| |j|j }}t||ksRJ |d t|kshJ d}|j|d	|| t|j|ksJ t	|jd
| | |j|dd|| |j|usJ |j|usJ t	|j| |jt|ksJ |d t|ksJ d
S )zZ
    Check that the states of the OOB scores are cleared when used with `warm_start`.
    r#   r    rV   rh   T)rE   rX   rg   r   r+   r   r[   r   NFrE   r   )
r   r]   r4   r   r   rK   r1   r   r   r   )r   r@   rA   rE   r   Z
oob_scoresZ	oob_scoreZn_more_estimatorsr;   r;   r<    test_warm_start_state_oob_scores:  s.    r  c                 C   sr   t jddd\}}| dddd}||| |jdd tt ||| W d    n1 sd0    Y  d S )Nr#   r    rV   Tr  c   r   )r   r]   r4   r   r1   r2   r3   r   r@   rA   r  r;   r;   r<   $test_warm_start_smaller_n_estimatorsZ  s    r  c                 C   sh   t jddd\}}| ddd}||| t|}|j|jdd ||| t|||| d S )Nr#   r    rV   r  Tr  )r   r]   r4   r   r   rE   r   rH   )r   r@   rA   r  Zest2r;   r;   r<   "test_warm_start_equal_n_estimatorse  s    r  c                 C   s   t jddd\}}| dddd}||| |jddd ||| t|jd d td t|jd d td |jd	d  d
k	 sJ |jd	d  d
k	 sJ |jd t
|jksJ d S )Nr#   r    rV   Tr  r  rh   )rE   rg   rF   r   )r   r]   r4   r   r   r   rN   r   r   r   r1   r   r   r  r;   r;   r<   test_warm_start_oob_switchs  s    r  c                 C   s   t jddd\}}| ddddd}||| | dddddd}||| |jdd	 ||| t|jd d |jd d  t|jd d |jd d  |jd
 t|j	ksJ |jd
 t|j	ksJ d S )Nr#   r    rV   r[   rh   )rE   rX   rg   r+   TrE   rX   rg   r+   r   r   r   )
r   r]   r4   r   r   r   r   r1   r   r   )r   r@   rA   r  r  r;   r;   r<   test_warm_start_oob  s    
r  sparse_containerc           	      C   s.  t jddd\}}| dddddd}||| || |jdd ||| ||}||}| dddddd}||| || |jdd ||| ||}t|jd d |jd d  |jd	 t	|j
ksJ t|jd d |jd d  |jd	 t	|j
ks J t|| d S )
Nr#   r    rV   rh   Tr  r[   r   r   )r   r]   r4   rH   r   r   r   r   r1   r   r   )	r   r  r@   rA   Z	est_denseZy_pred_denseX_sparseZ
est_sparseZy_pred_sparser;   r;   r<   test_warm_start_sparse  s:    




r  c                 C   s   t jd|d\}}| d|dd}| d|dd}||| |jdd ||| t|}||| |jdd ||| t|||| d S )Nr#   rV   r    T)rE   r+   r   r
  r   )r   r]   r4   r   rN   r   r   rH   )r   rR   r@   rA   Zest_cZest_fortranZ	X_fortranr;   r;   r<   test_warm_start_fortran  s    
r  c                 C   s   | dkrdS dS dS )z#Returns True on the 10th iteration.	   TFNr;   )r  r  localsr;   r;   r<   early_stopping_monitor  s    r  c                 C   s  t jddd\}}| ddddd}|j||td |jdks@J |jjd d	ksTJ |jjd d	kshJ |jjd d	ks|J |j	jd d	ksJ |j	d
 t
|jksJ |jdd ||| |jdksJ |jjd dksJ |jjd dksJ |jjd dksJ |j	jd dks$J |j	d
 t
|jks@J | dddddd}|j||td |jdksrJ |jjd d	ksJ |jjd d	ksJ |jjd d	ksJ |j	jd d	ksJ |j	d
 t
|jksJ |jddd ||| |jdksJ |jjd dks&J |jjd dks<J |jjd dksRJ |j	jd dkshJ |j	d
 t
|jksJ d S )Nr#   r    rV   r   rh   )rE   rX   r+   rg   )Zmonitorr   r&   r   ri   r   T)rE   rX   r+   rg   r   Fr  )r   r]   r4   r  rE   rL   rQ   rM   r   r   r1   r   r   r   r  r;   r;   r<   test_monitor_early_stopping  sF    
r   c                  C   s   ddl m}  tjddd\}}d}tdd d|d d}||| |jd	 j}|j|ks\J |j	|j	| k j
d |d ks~J d S )
Nr   	TREE_LEAFr#   r    rV   r$   r   rE   rX   r+   r\   r	  )sklearn.tree._treer"  r   r]   r	   r4   rL   tree_rX   children_leftrQ   )r"  r@   rA   kr  treer;   r;   r<   test_complete_classification  s    r)  c                  C   sb   ddl m}  d}tdd d|d d}|tt |jd j}|j|j| k j	d |d ks^J d S )Nr   r!  r$   r   r    r#  )r   r   )
r$  r"  r
   r4   ro   rn   rL   r%  r&  rQ   )r"  r'  r  r(  r;   r;   r<   test_complete_regression  s    r*  c                 C   sd   t ddtt}t|tt}tdd| ddd}|tt |t}tt|}||k s`J d S )Nmean)Zstrategyr   r    zerorh   )rE   rX   r+   r?   rY   )r   r4   ro   rn   r   rH   r
   )rR   ZbaselineZmse_baseliner  rq   Zmse_gbdtr;   r;   r<   test_zero_estimator_reg,  s    

r-  c                 C   s   t j}tt j}tdd| dd}||| |||dksBJ |dk}d||< d|| < tdd| dd}||| |||dksJ d S )Nr   r    r,  )rE   rX   r+   r?   gQ?r   )r5   r6   rN   r   r7   r	   r4   r^   )rR   r@   rA   r  maskr;   r;   r<   test_zero_estimator_clf?  s    
r/  GBEstimatorc                 C   st   t jddd\}}d}| d|d||}|jd j}|jdksDJ | dd||}|jd j}|jdkspJ d S )Nr#   r    rV   r$   )rX   r\   r	  )rX   )r   r]   r4   rL   r%  rX   )r0  r@   rA   r'  r  r(  r;   r;   r<   test_max_leaf_nodes_max_depthV  s    r1  c                 C   sH   t jddd\}}| dd}||| |jjD ]}|jdks0J q0d S )Nr#   r    rV   rx   )min_impurity_decrease)r   r]   r4   rL   Zflatr2  )r0  r@   rA   r  r(  r;   r;   r<   test_min_impurity_decreasef  s
    
r3  c                  C   sp   t ddd} | ddgddggddg | jjd dks<J | ddgddggddg | jjd dkslJ d S )Nr&   Tr  r   r    r!   r"   )r	   r4   rL   rQ   r   r;   r;   r<   %test_warm_start_wo_nestimators_changer  s
    r4  )r.   value))rd   rh   )re   rF   )rf   rh   )r   rh   c                 C   sj   ddgddgddgddgg}g d}g d}t dd| d}|j|||d |ddggd |ksfJ d S )	Nr    r   r   r   r    r   r   r   r    r    rW   r!   )rY   rE   r.   rk   )r
   r4   rH   )r.   r5  r@   rA   rl   gbr;   r;   r<   *test_non_uniform_weights_toy_edge_case_reg|  s    
r9  c                  C   sn   ddgddgddgddgg} g d}g d}dD ]8}t d|d}|j| ||d t|ddggdg q0d S )	Nr    r   r6  r7  rC   r   )rE   r.   rk   )r	   r4   r   rH   )r@   rA   rl   r.   r8  r;   r;   r<   *test_non_uniform_weights_toy_edge_case_clf  s    r:  EstimatorClassc           	      C   sX  t jddddd\}}|d d df }||}| ddddd	||}| ddddd	||}t|||| t|||| t|j|j t|||| t|||| t| trTt|	||	| t|
||
| t|||| t|||| t||||D ]\}}t|| q>d S )
Nr   rZ   r    r   )r+   r(   r)   	n_classesr&   r!   gHz>)rE   r+   rX   r2  )r   Zmake_multilabel_classificationr4   r   rP   rH   r}   
issubclassr	   r   Zpredict_log_probar   zipZstaged_decision_function)	r;  r  rA   r@   r  ZdensesparseZ
res_sparseresr;   r;   r<   test_sparse_input  sH    
rA  c           
      C   s   t ddd\}}d}| |dddddd}| |ddddd	d}t||dd
\}}}}	||| ||| |j|j  k r|k sn J |||	dksJ |||	dksJ d S )N  r   rV   r&   rx   r"   *   )rE   r   rY   rX   r+   ZtolMbP?r   gffffff?)r   r   r4   n_estimators_r^   )
r   r@   rA   rE   Zgb_large_tolZgb_small_tolr_   r`   ra   rb   r;   r;   r<   %test_gradient_boosting_early_stopping  s0    		rF  c                  C   sh   t ddd\} }tddddd}|| | td	dddd}|| | |jdksVJ |jd	ksdJ d S )
NrB  r   rV   rZ   rx   r"   rC  )rE   rY   rX   r+   ri   )r   r	   r4   r
   rE  )r@   rA   gbcgbrr;   r;   r<   -test_gradient_boosting_without_early_stopping  s    rI  c                  C   s  t ddd\} }tddddddd	}t|jd
d}t|jdd}tddddddd}t|jd
d}t|jdd}t| |dd\}}	}
}|||
 |||
 |j|jksJ |||
 |||
 |j|jksJ |||
 |||
 |j|jk sJ |j|jk sJ d S )NrB  r   rV   r#   r&   rx   r"   rC  )rE   r   validation_fractionrY   rX   r+   r   )rJ  r   r   )rE   r   rY   rX   rJ  r+   r   )r   r	   r   r   r
   r   r4   rE  )r@   rA   rG  Zgbc2Zgbc3rH  Zgbr2Zgbr3r_   r`   ra   rb   r;   r;   r<   *test_gradient_boosting_validation_fraction  s@    rL  c                  C   sl   ddgddgddgddgg} g d}t dd}tjtdd	 || | W d    n1 s^0    Y  d S )
Nr    r!   r"   r$   r   )r   r   r   r    rK  z0The least populated class in y has only 1 memberr/   r	   r1   r2   r3   r4   )r@   rA   rG  r;   r;   r<   test_early_stopping_stratified-  s    
rN  c                   C   s   t dddS )Nr"   r    )r<  Zn_clusters_per_class)r   r;   r;   r;   r<   _make_multiclass9  s    rO  z!gb, dataset_maker, init_estimator)zbinary classificationzmulticlass classificationZ
regression)Zidsc                 C   s   | \}}t j|d}| }| |dj|||d t| }| |d|| tjtdd& | |dj|||d W d    n1 s0    Y  d S )Nr#   r>   rk   z*estimator.*does not support sample weightsr/   )	rN   r   r   r   r4   r   r1   r2   r3   )r8  Zdataset_makerZinit_estimatorrR   r@   rA   rl   Zinit_estr;   r;   r<    test_gradient_boosting_with_init=  s    

rP  c                  C   s   t dd\} }tt }t|d}|| | tjtdd, |j| |t	| j
d d W d    n1 sn0    Y  d}d| d	}tjtt|dB td
|d}t|d}|j| |t	| j
d d W d    n1 s0    Y  d S )Nr   r   r>   z>The initial estimator Pipeline does not support sample weightsr/   rk   g      ?zIThe 'nu' parameter of NuSVR must be a float in the range (0.0, 1.0]. Got z	 instead.auto)gammanu)r   r   r   r
   r4   r1   r2   r3   rN   rm   rQ   r   reescaper   )r@   rA   r?   r8  Z
invalid_nur   r;   r;   r<   )test_gradient_boosting_with_init_pipeline\  s$    

:
rV  c                  C   sx   dggd } ddgdgd  }t dddd}tjtdd	 || | W d    n1 s\0    Y  t ddd
d}d S )Nr    r&   r   r%   r   r   )r   r+   rJ  z0The training data after the early stopping splitr/   g?rM  )r@   rA   r8  r;   r;   r<   test_early_stopping_n_classesz  s    *rW  c                  C   s>   t d} t d}t | |}t|jt jdt jd d S )N)r&   r&   )r&   r&   r   )rN   r   rm   r
   r4   r   r}   r   )r@   rA   rH  r;   r;   r<   'test_gbr_degenerate_feature_importances  s    

rX  c                  C   s   d} d}t t || }t ||d }t | | d }t j||f }t jd}||jd|jd }t	dd
||}t	d	d
||}t	 
||}	||}
t |||
ksJ t |
|	|ksJ d
S )z9Check that huber lies between absolute and squared error.r#   r&   r!   rC  r    )r   r   re   r-   rf   N)rN   tilearangeminimumc_r   r   r,   rQ   r
   r4   rH   r   )Zn_repr(   rA   x1x2r@   r   Zgbt_absolute_errorZ	gbt_huberZgbt_squared_errorZgbt_huber_predictionsr;   r;   r<   test_huber_vs_mean_and_median  s    
r_  c                   C   s   t  P t d ttdddks,J ttdtddksJJ W d   n1 s^0    Y  tjtdd  ttdd W d   n1 s0    Y  dS )	z0Test that _safe_divide handles division by zero.errorgu <7~r   rF   Noverflowr/   g|=)	r   r   simplefilterr   rN   r   r1   r   RuntimeWarningr;   r;   r;   r<   test_safe_divide  s    

<rd  c            	      C   s  d} t | }t || d }t | |  d }t j||f }tddd||}t g d}t|||dd t g d	}t|j	d
d |dd t 
ddg| d }tdddj|||d}t g d}t|||ddd t g d}t|j	d
d |ddd dS )zTest squared error GBT backward compat on a simple dataset.

    The results to compare against are taken from scikit-learn v1.2.0.
    r&   r!   rd   r#   r.   rE   )
gѵO@"?g!m ?g_$  @gT )= @g6 @gN@glU@gu@g̀#@g'aK4!@:0yE>rtol)
geԩ(j>g􄾳<e>gkx?a>gzU:\>gWwV>gwR>gM>g0~JYH>gh1`C>gñح
@>r  Nr    rk   )
gh#?gq)u ?g	1~(  @g_Q @gi @gŪ@g3%a@g[A"@gd*@ggh9!@gư>gh㈵>)rh  Zatol)
gwF$f>g۪5a>g:Z]>glF2UW>g!S>gO>g1&I>g͗KcD>gU}އ@>g:>rD  gdy=)rN   rZ  r[  r\  r
   r4   r   r   rH   rM   rY  )	r(   rA   r]  r^  r@   gbtpred_resulttrain_scoreZsample_weightsr;   r;   r<   (test_squared_error_exact_backward_compat  s4    
rl  c                  C   s   d} t | }t || d }t | |  d }t j||f }tdddd||}t|jjj	d t 
g d}t|||d	d
 t 
g d}t|jdd |d	d
 dS )zTest huber GBT backward compat on a simple dataset.

    The results to compare against are taken from scikit-learn v1.2.0.
    r&   r!   rf   r#   r   )r.   rE   r   g  2%?)
gzݮj#?g7h?g31e @gZZ@gk @ga8t @gsu@g%e 	@g>T  @gx!@rf  rg  )
gi>gAɤ{j>g#$%t>gD9u>gߝ>g:"L(>gݏ|>gzԇx>gu}v>ggݱ@s>r  N)rN   rZ  r[  r\  r
   r4   r   r   Zclossdeltar   rH   rM   r(   rA   r]  r^  r@   ri  rj  rk  r;   r;   r<    test_huber_exact_backward_compat  s    
ro  c                  C   s   d} t | d }t || d }t | |  d }t j||f }tddd||}t ddgddgddgddgddgddgddgddgddgddgg
}t|||dd	 t g d
}t|j	dd |dd	 dS )zTest binary log_loss GBT backward compat on a simple dataset.

    The results to compare against are taken from scikit-learn v1.2.0.
    r&   r!   rD   r#   re  g
t?gr3>rf  rg  )
gu>?g5Ү^?g?g	i?gN?gv^q!?g2C6 ?gȍl?g1oEa	?g5C?r  N
rN   rZ  r[  r\  r	   r4   r   r   r   rM   rn  r;   r;   r<   )test_binomial_error_exact_backward_compat<  s0    rq  c                  C   s   d} t | d }t || d }t | |  d }t j||f }tddd||}t g dg dg d	g d
g dg dg d	g d
g dg dg
}t|||dd t g d}t|j	dd |dd dS )zTest multiclass log_loss GBT backward compat on a simple dataset.

    The results to compare against are taken from scikit-learn v1.2.0.
    r&   r$   r!   rD   r#   re  )k*om?grLD~>l-au>h,au>)g?~>rr  rs  rt  )0Q(>ru  WQ?g?m}w>)ru  ru  gX
}w>rv  rf  rg  )
g]R4>g{a\>g5|)>guB=>g@3Bݤ>g bC>gB2h>g@,y>g"y>g=>r  Nrp  rn  r;   r;   r<   ,test_multinomial_error_exact_backward_compati  s0    rw  c                 C   sv   t jddd\}}ddddd| dd	}tf i |}t & td
 ||| W d   n1 sh0    Y  dS )a`  Test _update_terminal_regions denominator is not zero.

    For instance for log loss based binary classification, the line search step might
    become nan/inf as denominator = hessian = prob * (1 - prob) and prob = 0 or 1 can
    happen.
    Here, we create a situation were this happens (at least with roughly 80%) based
    on the random seed.
    r#   r   rV   rW   rh   r$   Nr!   )rY   rg   rE   r\   rX   r+   Zmin_samples_leafr`  )r   r]   r	   r   r   rb  r4   )rR   r@   rA   paramsr9   r;   r;   r<   test_gb_denominator_zero  s    	


ry  )__doc__rT  r   numpyrN   r1   Znumpy.testingr   Zsklearnr   Zsklearn.baser   Zsklearn.datasetsr   r   Zsklearn.dummyr   r   Zsklearn.ensembler	   r
   Zsklearn.ensemble._gbr   Z#sklearn.ensemble._gradient_boostingr   Zsklearn.exceptionsr   r   Zsklearn.linear_modelr   Zsklearn.metricsr   Zsklearn.model_selectionr   Zsklearn.pipeliner   Zsklearn.preprocessingr   Zsklearn.svmr   Zsklearn.utilsr   Zsklearn.utils._mockingr   Zsklearn.utils._param_validationr   Zsklearn.utils._testingr   r   r   Zsklearn.utils.fixesr   r   r   ZGRADIENT_BOOSTING_ESTIMATORSr@   rA   rI   rJ   ro   rn   r   r   r   Z	load_irisr5   Zpermutationr7   r   permr6   r=   rB   markZparametrizerT   rc   rs   rw   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r  r  r  r  r   r)  r*  r-  r/  r1  r3  r4  r9  r:  rA  rF  rI  rL  rN  rO  rP  rV  rW  rX  r_  rd  rl  ro  rq  rw  ry  r;   r;   r;   r<   <module>   sZ  (



-',
	
$!



 









$

-


	

(
#+	
S
/--