a
    h'                     @   s  d Z ddlZddlZddlZddlmZmZmZm	Z	 ddl
mZ ddlmZ ddlmZmZmZ ddlmZ ddlmZmZmZmZmZ dd	lmZ dd
lmZ ddlmZm Z m!Z! ddl"m#Z#m$Z$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5 ddl6m7Z7 ddl8m9Z9 ddgddgddgddgddgddggZ:g dZ;eddddfZ<eddddddfZ=eddddfZ>edddddfZ?e Z@ejABded e<fed e=fed!e<fed!e=fed e>fed!e>fe0d!e>fe#d!e>fe#d!e?fe$d!e<fe$d!e=fe%d!e?fgejABd"d#ejABd$dgddgfejABd%d&d'd( ZCd)d* ZDejABd"dd+gd,d- ZEejABd"dd+gd.d/ ZFejABd0g d1d2d3 ZGejABd4eHd5ejABd6e# d!fedd7d!fedd7d8fedd7d!fedd7d8fgd9d: ZIejABd;eHdd<d= ZJejABd>edd7edd7fejABd4d?d@dA ZKejABd>e# edd7edddddBe0dd7fejABdCdDdEdF ZLejABdGejMjNejMjOejPjQejRjSejRjTejPjUfdHdI ZVG dJdK dKeeZWejABdLedd dMd$dgidNfe# dgdOdPdQfedd7dgdOd8dRdSfedd7dgdOd dRdSfe# dgd8dTdUdVfe# dgd8dWdUdVfe# dgd8dXdYfgdZd[ ZXejABd\e# edd7gejABd$dd]gd^d_ ZYejABd\e# edd7gd`da ZZejABd\e# edd7gdbdc Z[ddde Z\dfdg Z]dhdi Z^djdk Z_ejAjBd\e$dlddmedd5dngdodpgdqejAjBdrdee- dsdt duD fe, dvdt dwD fee- dxdt duD fdydzgg d{dqejAjBd$ddgd|dt duD gd}d~gdqdd Z`ejAjBdde@jad dfddgdfddt duD dfg ddfgg ddqdd ZbejABd\e# e$ e e gdd ZcejABde#e?fe$e<fgdd ZdejABde#e?fe$e<fgdd ZeejABd\e# e$ e e gejABdg ddd ZfejABde#e?fe$e<fgdd Zgdd Zhdd Zidd ZjdS )z,
Testing for the partial dependence module.
    N)BaseEstimatorClassifierMixincloneis_regressor)KMeans)make_column_transformer)	load_irismake_classificationmake_regression)DummyClassifier)GradientBoostingClassifierGradientBoostingRegressorHistGradientBoostingClassifierHistGradientBoostingRegressorRandomForestRegressor)NotFittedError)partial_dependence)_grid_from_X_partial_dependence_brute_partial_dependence_recursion)LinearRegressionLogisticRegressionMultiTaskLasso)r2_score)make_pipeline)PolynomialFeaturesRobustScalerStandardScalerscale)DecisionTreeRegressor)assert_is_subtree)assert_allcloseassert_array_equal)	_IS_32BIT)check_random_state      )r&   r&   r&   r'   r'   r'   2   )	n_samplesrandom_state   )r*   	n_classesn_clusters_per_classr+   )r*   	n_targetsr+   zEstimator, method, dataautobrutegrid_resolution)   
   featureskind)average
individualbothc                    s.  |  }t |dr|jdd |\\}}}	|jd }
||| t||||| d}||d  }}|	g fddtt|D R }|	|
g fd	dtt|D R }|d
kr|jj|ksJ n:|dkr|jj|ksJ n |jj|ksJ |jj|ksJ t| f}|d usJ t	
|j|ks*J d S )Nn_estimatorsr(   )r:   r   )Xr5   methodr6   r2   grid_valuesc                    s   g | ]} qS  r>   .0_r2   r>   l/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/inspection/tests/test_partial_dependence.py
<listcomp>r       z%test_output_shape.<locals>.<listcomp>c                    s   g | ]} qS r>   r>   r?   rB   r>   rC   rD   v   rE   r7   r8   )hasattrZ
set_paramsshapefitr   rangelenr7   r8   npasarray)	Estimatorr<   datar2   r5   r6   estr;   yr/   Zn_instancesresultpdpaxesZexpected_pdp_shapeZexpected_ice_shapeZexpected_axes_shaper>   rB   rC   test_output_shapeB   s<    

"rT   c                  C   sN  d} d}ddg}t ddgddgg}t|| ||\}}t|ddgddgddgddgg t||j t jd}d	}|jd
d}t|| ||d\}}|j|| |jd fksJ t |jd|fksJ d}d||d d df< |	| t|| ||d\}}|j|| |jd fksJ |d j|fks4J |d j|fksJJ d S )N皙?ffffff?d   Fr'   r(   r,      r      )   r(   sizerB      90  )
rK   rL   r   r"   TrandomRandomStatenormalrG   shuffle)percentilesr2   is_categoricalr;   gridrS   rngZn_unique_valuesr>   r>   rC   test_grid_from_X   s0    "


ri   rX   c                 C   sh   t d}d}dg}|dg di}t|||| d\}}|jd|jd fksRJ |d	 jd
ksdJ dS )jCheck that `_grid_from_X` always sample from categories and does not
    depend from the percentiles.
    pandasrU   TZcat_feature)ABCrl   rm   DErB   r3   r'   r   )r3   N)pytestimportorskip	DataFramer   rG   )r2   pdre   rf   r;   rg   rS   r>   r>   rC   !test_grid_from_X_with_categorical   s    

ru   c                 C   s   t d}d}ddg}|g dg dd}| }t|||| d\}}| d	kr|jd
ks`J |d jd |d kszJ |d jd | ksJ nB|jdksJ |d jd |d ksJ |d jd |d ksJ dS )rj   rk   rU   TF)
rl   rm   rn   rl   rm   ro   rp   rl   rm   ro   )
r'   r'   r'   r(   r3      rv   rv   rv      )catnumrB   r,   )rZ   r(   r   ry   r'   )   r(   rx   N)rq   rr   rs   nuniquer   rG   )r2   rt   re   rf   r;   r{   rg   rS   r>   r>   rC   #test_grid_from_X_heterogeneous_type   s&    

r|   z%grid_resolution, percentiles, err_msg))r(   )r   g-C6?zpercentiles are too close)rX   )r'   r(   r,   rY   .'percentiles' must be a sequence of 2 elements)rX   r_   r}   )rX   )r&   rW   ('percentiles' values must be in \[0, 1\])rX   )rV   r(   r~   )rX   )g?皙?z+percentiles\[0\] must be strictly less than)r'   rU   z1'grid_resolution' must be strictly greater than 1c                 C   s\   t ddgddgg}dg}tjt|d t||||  W d    n1 sN0    Y  d S )Nr'   r(   r,   rY   Fmatch)rK   rL   rq   raises
ValueErrorr   )r2   re   err_msgr;   rf   r>   r>   rC   test_grid_from_X_error   s    r   target_featurer3   zest, methodr+   	recursionc                 C   s   t dddd\}}||  }t| ||} tj|gtjd}tdgdgg}|dkrrt| |||dd	\}}nt| ||}g }	d
D ]0}
|	 }|
|d d |f< |	
| |  q|d }|dkrdnd}tj||	|dsJ d S )Nr   r3   )r+   
n_featuresZn_informativedtype      ?{   r1   r0   )response_method)r   r   r   r   gMbP?)rtol)r
   meanr   rH   rK   arrayintpr   r   copyappendpredictZallclose)rO   r<   r   r;   rP   r5   rg   rR   ZpredictionsZmean_predictionsvalZX_r   r>   r>   rC   test_partial_dependence_helpers   s$    
r   seedc                 C   sp  t j| }d}d}|||}||d }||  }d}d}tdd d||d}t|t t j	j
}	tddd||	d	}
t||	d
}||| |
|| ||| z(t|j|
d j t|j|d j W n  ty   tsJ dY d S 0 |ddd}t|D ]X}t j|gt jd}t|||}t|
||}t|||}t j|| t j|| qd S )N  r3   r4   r   r'   F)r:   Zmax_featuresZ	bootstrap	max_depthr+   Zsquared_error)r:   Zlearning_rate	criterionr   r+   )r   r+   )r   r   z)this should only fail on 32 bit platformsr)   r&   r   )rK   ra   rb   Zrandnr   r   r$   randintZiinfoZint32maxr   r   rH   r    Ztree_AssertionErrorr#   reshaperI   r   r   r   Ztestingr!   )r   rh   r*   r   r;   rP   r   Z	tree_seedZforestZequiv_random_stateZgbdttreerg   fr5   Z
pdp_forestZpdp_gbdtZpdp_treer>   r>   rC   /test_recursion_decision_tree_vs_forest_and_gbdt+  sR    
r   rO   )r   r'   r(   r,   rY   r3   c                 C   sz   t dddd\}}t|dks$J t| ||} t| ||gdddd}t| ||gdd	dd}t|d |d d
d d S )Nr(   r'   r-   r.   r+   r   Zdecision_functionr   r7   )r   r<   r6   r1   gHz>)Zatol)r	   rK   r   r   rH   r   r!   )rO   r   r;   rP   Zpreds_1Zpreds_2r>   r>   rC    test_recursion_decision_functionr  s(    	r   )r+   Zmin_samples_leafZmax_leaf_nodesmax_iterpower)r'   r(   c                 C   s   t jd}d}d}|j|dfd}|d d |f | }t| ||} t| |g|ddd}|d	 d d
d}|d d }	t|d	|}t
 ||	}
t|	|
|}|dksJ d S )Nr      r(   r3   r\   r   r7   )r5   r;   r2   r6   r=   r&   r'   )ZdegreeGz?)rK   ra   rb   rc   r   rH   r   r   r   fit_transformr   r   r   )rO   r   rh   r*   Ztarget_variabler;   rP   rR   Znew_XZnew_ylrr2r>   r>   rC   #test_partial_dependence_easy_target  s    r   rM   c                 C   st   t dddd\}}t||gj}|  }||| tjtdd t||dg W d    n1 sf0    Y  d S )Nr,   r'   r   r   z3Multiclass-multioutput estimators are not supportedr   )	r	   rK   r   r`   rH   rq   r   r   r   )rM   r;   rP   rO   r>   r>   rC   test_multiclass_multioutput  s    r   c                   @   s   e Zd Zdd ZdS ) NoPredictProbaNoDecisionFunctionc                 C   s   ddg| _ | S )Nr   r'   )Zclasses_)selfr;   rP   r>   r>   rC   rH     s    
z$NoPredictProbaNoDecisionFunction.fitN)__name__
__module____qualname__rH   r>   r>   r>   rC   r     s   r   zestimator, params, err_msg)r+   Zn_initz4'estimator' must be a fitted regressor or classifierZpredict_proba)r5   r   z7The response_method parameter is ignored for regressors)r5   r   r<   zC'recursion' method, the response_method must be 'decision_function'r8   )r5   r<   r6   zCThe 'recursion' method only applies when 'kind' is set to 'average'r9   )r5   r<   z=Only the following estimators support the 'recursion' method:c                 C   sb   t dd\}}t| ||} tjt|d" t| |fi | W d    n1 sT0    Y  d S )Nr   r   r   r	   r   rH   rq   r   r   r   )	estimatorparamsr   r;   rP   r>   r>   rC   test_partial_dependence_error  s    -r   r   i'  c                 C   sb   t dd\}}t| ||} d}tjt|d t| ||g W d    n1 sT0    Y  d S )Nr   r   zall features must be inr   r   )r   r5   r;   rP   r   r>   r>   rC   /test_partial_dependence_unknown_feature_indices  s
    r   c                 C   sz   t d}tdd\}}||}t| ||} dg}d}t jt|d t| || W d    n1 sl0    Y  d S )Nrk   r   r   ra   z/A given column is not a column of the dataframer   )	rq   rr   r	   rs   r   rH   r   r   r   )r   rt   r;   rP   dfr5   r   r>   r>   rC   .test_partial_dependence_unknown_feature_string&  s    

r   c                 C   s8   t dd\}}t| ||} t| t|dgdd d S )Nr   r   r7   )r6   )r	   r   rH   r   list)r   r;   rP   r>   r>   rC   test_partial_dependence_X_list5  s    r   c                  C   s   t t dd} | tt tjtdd$ t| tdgddd W d    n1 sR0    Y  tjtdd$ t| tdgddd W d    n1 s0    Y  d S )Nr   )initr+   z9Using recursion method with a non-constant init predictorr   r   r7   )r<   r6   )	r   r   rH   r;   rP   rq   ZwarnsUserWarningr   )Zgbcr>   r>   rC   (test_warning_recursion_non_constant_init?  s    2r   c            	      C   s   d} t jd}|jd| td}|| }| }||   || < t j||f }t | }d||< t	ddd}|j
|||d	 t||dgd
d}t |d
 |d d dksJ d S )Nr   i@ r(   )r]   r   g     @@r4   r'   )r:   r+   sample_weightr7   )r5   r6   r=   )r   r'   r   )rK   ra   rb   r   boolZrandr   Zc_onesr   rH   r   Zcorrcoef)	Nrh   maskxrP   r;   r   clfrR   r>   r>   rC   9test_partial_dependence_sample_weight_of_fitted_estimatorQ  s    

r   c                  C   sf   t dd} | jtttttd tjt	dd  t
| tdgd W d    n1 sX0    Y  d S )Nr'   r   r   z#does not support partial dependencer   r5   )r   rH   r;   rP   rK   r   rJ   rq   r   NotImplementedErrorr   )r   r>   r>   rC   test_hist_gbdt_sw_not_supportedk  s    
r   c                  C   s   t  } t }tdd}t||}||| j| j || j| j d}t|| j|gddd}t||	| j|gddd}t
|d |d  t
|d d |d d |j|  |j|   d S )N*   r   r   r4   r7   r5   r2   r6   r=   )r   r   r   r   rH   r   rN   targetr   Z	transformr!   scale_mean_)irisscalerr   piper5   pdp_pipepdp_clfr>   r>   rC    test_partial_dependence_pipelinev  s,    



r   r   r   r+   )r+   r:   zestimator-brutezestimator-recursion)Zidspreprocessorc                 C   s   g | ]}t j| qS r>   r   feature_namesr@   ir>   r>   rC   rD     rE   rD   r   r(   c                 C   s   g | ]}t j| qS r>   r   r   r>   r>   rC   rD     rE   r'   r,   c                 C   s   g | ]}t j| qS r>   r   r   r>   r>   rC   rD     rE   Zpassthrough)	remainder)Nonezcolumn-transformerzcolumn-transformer-passthroughc                 C   s   g | ]}t j| qS r>   r   r   r>   r>   rC   rD     rE   zfeatures-integerzfeatures-stringc                 C   s  t d}|jttjtjd}t|t| }|	|tj
 t|||ddd}|d urnt||}ddg}n|}ddg}t| 	|tj
}	t|	||d	ddd
}
t|d |
d  |d ur|jd }t|d d |
d d |jd  |jd   nt|d d |
d d  d S )Nrk   columnsr4   r7   r   r   r'   r(   r1   )r5   r<   r2   r6   Zstandardscalerr=   )rq   rr   rs   r   r   rN   r   r   r   rH   r   r   r   r!   Znamed_transformers_r   r   )r   r   r5   rt   r   r   r   ZX_procZfeatures_clfr   r   r   r>   r>   rC   !test_partial_dependence_dataframe  s:    


	

r   zfeatures, expected_pd_shape)r   r,   r4   r   )r,   r4   r4   c                 C   s   g | ]}t j| qS r>   r   r   r>   r>   rC   rD     rE   )TFTF)z
scalar-intz
scalar-strzlist-intzlist-strr   c                 C   s   t d}|jtjtjd}tt dd dD ft dd dD f}t	|t
dd	d
}||tj t||| ddd}|d j|ksJ t|d t|d jd ksJ d S )Nrk   r   c                 S   s   g | ]}t j| qS r>   r   r   r>   r>   rC   rD     rE   z8test_partial_dependence_feature_type.<locals>.<listcomp>r   c                 S   s   g | ]}t j| qS r>   r   r   r>   r>   rC   rD     rE   r   r   r   r   r4   r7   r   r=   r'   )rq   rr   rs   r   rN   r   r   r   r   r   r   rH   r   r   rG   rJ   )r5   Zexpected_pd_shapert   r   r   r   r   r>   r>   rC   $test_partial_dependence_feature_type  s    

r   c                 C   s   t j}tt ddgft ddgf}t|| }tjtdd$ t	||ddgdd W d    n1 sf0    Y  tjtdd$ t	| |ddgdd W d    n1 s0    Y  d S )	Nr   r(   r'   r,   zis not fitted yetr   r4   )r5   r2   )
r   rN   r   r   r   r   rq   r   r   r   )r   r;   r   r   r>   r>   rC    test_partial_dependence_unfitted  s    

2r   zEstimator, datac           	      C   sj   |  }|\\}}}| || t||ddgdd}t||ddgdd}tj|d dd}t||d  d S )Nr'   r(   r7   r;   r5   r6   r8   )Zaxis)rH   r   rK   r   r!   )	rM   rN   rO   r;   rP   r/   Zpdp_avgpdp_indZavg_indr>   r>   rC   +test_kind_average_and_average_of_individual  s    r   c           	      C   s   |  }|\\}}}t |jd }||| t||ddgdd}t||ddgd|d}t|d |d  t|d |d  dS )	zDCheck that `sample_weight` does not have any effect on reported ICE.r   r'   r(   r8   r   )r;   r5   r6   r   r=   N)rK   ZarangerG   rH   r   r!   )	rM   rN   rO   r;   rP   r/   r   Zpdp_nswpdp_swr>   r>   rC   =test_partial_dependence_kind_individual_ignores_sample_weight   s    	r   non_null_weight_idx)r   r'   r&   c                 C   s   t jt j }}tt ddgft ddgf}t|t| ||}t	
|}d||< t||ddgd|dd}t||ddgddd	}t|rdntt	|}	t|	D ]"}
t|d |
 | |d |
  qd
S )a   Check that if we pass a `sample_weight` of zeros with only one index with
    sample weight equals one, then the average `partial_dependence` with this
    `sample_weight` is equal to the individual `partial_dependence` of the
    corresponding index.
    r   r(   r'   r,   r7   r4   )r6   r   r2   r8   )r6   r2   N)r   rN   r   r   r   r   r   r   rH   rK   Z
zeros_liker   r   rJ   uniquerI   r!   )r   r   r;   rP   r   r   r   r   r   Z
output_dimr   r>   r>   rC   +test_partial_dependence_non_null_weight_idx6  s,    

r   c                 C   s   |  }|\\}}}| || d|ddgdd }}t|fi |d|i}tt|}t|fi |d|i}	t|d |	d  dtt| }t|fi |d|i}
t|d |
d  dS )zFCheck that `sample_weight=None` is equivalent to having equal weights.Nr'   r(   r7   r   r   )rH   r   rK   r   rJ   r!   )rM   rN   rO   r;   rP   r/   r   r   Zpdp_sw_noneZpdp_sw_unitZpdp_sw_doublingr>   r>   rC   7test_partial_dependence_equivalence_equal_sample_weight_  s    
r   c                  C   sv   t  } t\\}}}t|}| || tjtdd, t| |dg|dd dd W d   n1 sh0    Y  dS )zjCheck that we raise an error when the size of `sample_weight` is not
    consistent with `X` and `y`.
    zsample_weight.shape ==r   r   r'   Nr4   )r5   r   r2   )	r   binary_classification_datarK   	ones_likerH   rq   r   r   r   rO   r;   rP   r/   r   r>   r>   rC   0test_partial_dependence_sample_weight_size_errorw  s    
r   c                  C   sr   t  } t\\}}}t|}| j|||d tjtdd$ t| |dgd|d W d   n1 sd0    Y  dS )zaCheck that we raise an error when `sample_weight` is provided with
    `"recursion"` method.
    r   z+'recursion' method can only be applied whenr   r   r   )r5   r<   r   N)	r   regression_datarK   r   rH   rq   r   r   r   r   r>   r>   rC   4test_partial_dependence_sample_weight_with_recursion  s    
r   c                  C   s   t jdddt jgtddd} t g d}dd	lm} t|dd
t 	| |}t
jtdd  t|| dgd W d   n1 s0    Y  dS )znCheck that we raise a proper error when a column has mixed types and
    the sorting of `np.unique` will fail.rl   rm   rn   r   r&   r'   )r   r'   r   r'   r   )OrdinalEncoder)Zencoded_missing_valuez'The column #0 contains mixed data typesr   r   N)rK   r   nanobjectr   sklearn.preprocessingr   r   r   rH   rq   r   r   r   )r;   rP   r   r   r>   r>   rC   test_mixed_type_categorical  s     r   )k__doc__numpyrK   rq   ZsklearnZsklearn.baser   r   r   r   Zsklearn.clusterr   Zsklearn.composer   Zsklearn.datasetsr   r	   r
   Zsklearn.dummyr   Zsklearn.ensembler   r   r   r   r   Zsklearn.exceptionsr   Zsklearn.inspectionr   Z&sklearn.inspection._partial_dependencer   r   r   Zsklearn.linear_modelr   r   r   Zsklearn.metricsr   Zsklearn.pipeliner   r   r   r   r   r   Zsklearn.treer   Zsklearn.tree.tests.test_treer    Zsklearn.utils._testingr!   r"   Zsklearn.utils.fixesr#   Zsklearn.utils.validationr$   r;   rP   r   Zmulticlass_classification_datar   Zmultioutput_regression_datar   markZparametrizerT   ri   ru   r|   r   rI   r   r   r   r   r   ZDecisionTreeClassifierZExtraTreeClassifierZensembleZExtraTreesClassifierZ	neighborsZKNeighborsClassifierZRadiusNeighborsClassifierZRandomForestClassifierr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r>   r>   r>   rC   <module>   s  (/'



.
F




,
	



+


	


	
