a
    h9                    @   s  U d Z ddlZddlZddlZddlZddlZddlZddlmZm	Z	 ddl
Z
ddlZddlZddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddl m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z: ddl3m;Z< ddl=m>Z> ddl?m@Z@mAZAmBZBmCZCmDZDmEZE ddlFmGZGmHZHmIZImJZJ ddlKmLZL dZMdZNe(e*dZOe)e+dZPeQ ZReQeSd< eRTeO eRTeP g dZUeVg dg dg dg dg d g d!g d"g d#g d$g d%g d&g d'g d(g d)g d*g d+g d,g d-g d.g d/g d0g d1g d2gZWg d3ZXg d4ZYd5d6gd6d6gd6d5gd7d7gd7d8gd8d7ggZZg d9Z[d6d6gd8d8gd:d8ggZ\g d;Z]e^ Z_ej`ad7Zbebce_jdjeZfe_jgef e__ge_jdef e__deh ZiebceijdjeZfeijgef ei_geijdef ei_dej ZkebcekjdjeZfekjgef ek_gekjdef ek_deLdZlejmdd<d=d>\ZnZoeljpd?d@ZqdAeqeqdBk< eljrddCdDd@Zse&dEd=dFddGt Zue_jge_jddHeijgeijddHekjgekjddHeZe[dHeWeXdHeWeYdHeneodHeqesdHeq esdHeuesdHevdIesdHdJZwdKdL ZxdMdN ZydOdP Zzej{|dQeP} ej{|dReNdSdT Z~dUdV ZdWdX Zej{|dYeP ej{|dReNdZd[ ZeEej{|dYeP ej{|d\d]d^ed_fd`dEed_fdad^ed_fdbd^ed<fgdcdd Zdedf Zdgdh Zdidj Zdkdl Zdmdn Zdodp Zdqdr Zdsdt Zdudv Zdwdx Zdydz ZdKd{d|Zej{|d}eRd~d Zej{|d}eUej{|deIdd ZdLddZej{|d}eRdd Zej{|d}eUej{|deIdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zej{|d}eOdd Zej{|d}eOdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdMddZej{|deUej{|dddd Zej{|deeeUePej{|dddgdd Zej{|deUej{|dg dej{|deIdd Zej{|dee	dd eUD eNee	dd eUD eM ej{|dg dej{|deIdd Zej{|deUej{|deeIeJddń ZddǄ Zej{|d}eRddɄ Zej{|d}eRej{|ddgeI dd̄ Zej{|d}eRdd΄ Zej{|d}eUej{|deJddф Zddӄ Zej{|d}eRddՄ Zej{|d}eRej{|deJddׄ Zddل Zddۄ Zej{|ddgeI dd݄ Zej{|deeew ddh ej{|de(e*gdd Zej{|dew ej{|de)e+gdd Zdd Zdd Zdd Zej{|d}eRej{|dddgej{|ddgeI eJ dd Zej{|dRg dej{|dQeP} dd Zej{|ded:dd Zdd Zej{|dQe(e*gej{|dd8dCgdd Zdd Zdd Zdd Zdd  Z͐dd Zΐdd Zϐdd ZАdd Zѐd	d
 ZҐdd Zej{|dee/} e0} dd ZԐdd Zej{|dQeR} dd Zej{|dRd]dagdd Zej{|ded:ej{|dRd]dagdd Zej{|dRddgdd Zej{|dRddgdd Zej{|dRddgdd Zej{|dRddgd d! Zej{|ddgeJ ej{|d"e)d`d#e+d`d#gd$d% Zej{|dQeP} d&d' Zސd(d) Zej{|d*eje)dfeje+d+fee(d,fee*d-fgej{|d.dd/gd0d1 Zej{|d2eeO} d3d4gd5d6 Zej{|d7eje)feje(fgd8d9 Zd:d; Zej{|dQe)e+gej{|d<eVejd8ejdCd=d>geVejejd:dCd=d>geVd7d8d:dCejejgeVd7d8d:ejd>ejggej{|dRd]dagd?d@ ZdAdB ZdCdD ZdEdF ZdGdH ZdIdJ ZdS (N  z-
Testing for the tree module (sklearn.tree).
    N)chainproduct)NumpyPickler)assert_allclose)clonedatasetstree)DummyRegressor)NotFittedError)SimpleImputer)accuracy_scoremean_poisson_deviancemean_squared_error)cross_val_scoretrain_test_split)make_pipeline)_sparse_random_matrix)DecisionTreeClassifierDecisionTreeRegressorExtraTreeClassifierExtraTreeRegressor)CRITERIA_CLFCRITERIA_REGDENSE_SPLITTERSSPARSE_SPLITTERS)_py_sort)
NODE_DTYPE	TREE_LEAFTREE_UNDEFINED_build_pruned_tree_py_check_n_classes_check_node_ndarray_check_value_ndarray)Tree)compute_sample_weight)assert_almost_equalassert_array_almost_equalassert_array_equalcreate_memmap_backed_dataignore_warningsskip_if_32bit)	_IS_32BITCOO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERS)check_random_state)ginilog_loss)squared_errorabsolute_errorfriedman_msepoisson)r   r   )r   r   	ALL_TREES)r   r      r   r   r      ir   r   r   r   r   )r   r         r   r9   r   r   r8   皙?r   r7   r8   )r>   r   r         r   r    @r8   r   r   r?   r   r8   )r>   r>   r   g333333r   r   r   r   r   r   r=   r   r   r8   )r>   r>   r   r   r   r   r   r;   r   r   r   r   r   r8   )r>   r   r7   
   r7   r   皙	r   r7   r;   r9   r8   )zG @r         r      r   r   rD            ?r   rB   r8   )rE   r   rF   rG   r   rH   r   r   rD   rI   r   r   rA   r8   )rE      rF   rG   r   rH   r   r   rD   rI   r   r   rA   r8   )rE   rK   rF   rG   r   rH   r   r   rD   rI   rJ   r   r>   r   )   rK   r:   r8   rJ   r9   rC   r   r8   r<   r;   r   rL   r   )rL   r   r8   r8   r8   r>   r8   r   r   rA   r;   r   r8   r   )rL   r   r8   rL   r;   r>   rC   rL   r   r>   r8   rL   rL   r   )r8   r8   r   rL   rL   r>   r8   rL   r   r<   r8   rL   r;   r   )r;   r8   r   r;   r   r9   rC   r   r8   r<   r;   r   r;   r8   )rE   rK   rF   rG   r   r8   r   r   rD   rI   rJ   r   rB   r8   )rE   rK   rF   rG   r   r8   r   r   rD   rI         ?r8   r>   r>   )rE   rK   rF   rG   r   rC   r   r   rD   rI   rJ   r   r>   r>   )rL   r   r:   r8   rJ   rA   rC   r   r8   r<   r;   r8   r   r>   )rL   r   r8   r8   r8   rA   r8   r   r   rA   r   r   r   r8   )rL   r8   r8   r8   rL   r>   rC   rL   r   r>   r   rL   r8   r8   )r8   r8   r   r   r8   rB   r8   rL   r   r<   r8   rL   r8   r8   )r;   r8   r   r8   r   r9   r8   r   r8   rA   r   r   r8   r   )r8   r8   r   r   r   r   r8   r8   r8   r8   r8   r8   r   r   r   r8   r   r   r8   r   r   r   r   )      ?r@   333333?皙?rC   g333333@@g)\(?{Gz?gףp=
@rQ   g?        rO   rL   rH   r   r         @g|?5^?g(\??r   rA   r>   r8   rL   )r>   r>   r>   r8   r8   r8   r;   )r>   r8   r8      rC   )random_state	n_samples
n_features)   r:   sizerS   g?r7   )rZ   rZ   g      ?)ZdensityrW   Xy)rZ   r;   )irisdiabetesdigitstoy	clf_small	reg_small
multilabel
sparse-pos
sparse-neg
sparse-mixzerosc                 C   s   |j | j ks"J d||j | j t| j|j|d  t| j|j|d  | jtk}t|}t| j| |j| |d  t| j	| |j	| |d  t| j
 |j
 |d  t| j
|j
|d  t| j|j|d d	 t| j| |j| |d
 d	 d S )Nz({0}: inequal number of node ({1} != {2})z: inequal children_rightz: inequal children_leftz: inequal featuresz: inequal thresholdz: inequal sum(n_node_samples)z: inequal n_node_samplesz: inequal impurityerr_msgz: inequal value)
node_countformatr'   children_rightchildren_leftr   npZlogical_notfeature	thresholdn_node_samplessumr%   impurityr&   value)dsmessageZexternalinternal r|   X/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/tree/tests/test_tree.pyassert_tree_equal   s@    



r~   c                  C   st   t  D ]f\} }|dd}|tt t|ttd	|  |ddd}|tt t|ttd	|  qd S )Nr   rW   Failed with {0}r8   )max_featuresrW   )
	CLF_TREESitemsfitr^   r_   r'   predictTtrue_resultrn   namer#   clfr|   r|   r}   test_classification_toy   s    
r   c                  C   s   t  D ]x\} }|dd}|jtttttd t|	t
td|  |jtttttdd t|	t
td|  qd S )Nr   r   sample_weightr   rJ   )r   r   r   r^   r_   rq   oneslenr'   r   r   r   rn   fullr   r|   r|   r}    test_weighted_classification_toy   s    
r   r#   	criterionc                 C   s   |dkr:t t td }t t| }t t| }nt}t}| |dd}|t| t|	t
| | |ddd}|t| t|	t
| d S )Nr5   r8   r   rW   r   r   rW   )rq   absminr_   arrayr   r   r^   r   r   r   )r#   r   ay_trainy_testregr   r|   r|   r}   test_regression_toy  s    r   c                  C   s   t d} d| d dd df< d| dd dd f< t | j\}}t | | gj}|  } t D ]r\}}|dd}|	||  |
|| dksJ d||ddd}|	||  |
|| dksjJ d|qjd S )	N)rC   rC   r8   r:   r   r   rN   r   rW   r   )rq   rj   indicesshapevstackravelr   r   r   r   scorern   )r_   ZgridxZgridyr^   r   r#   r   r|   r|   r}   test_xor  s    

r   c                  C   s   t t tD ]\\} }}||dd}|tjtj t|	tjtj}|dksdJ d
| ||||ddd}|tjtj t|	tjtj}|dksJ d
| ||qd S )Nr   r   rU   z0Failed with {0}, criterion = {1} and score = {2}rL   r   rJ   )r   r   r   CLF_CRITERIONSr   r`   datatargetr   r   rn   )r   r#   r   r   r   r|   r|   r}   	test_iris3  s    r   z
name, Treec                 C   s\   ||dd}| tjtj ttj|tj}|tdksXJ d|  d| d| d S )Nr   r   zFailed with z, criterion = z and score = )r   ra   r   r   r   r   pytestapprox)r   r#   r   r   r   r|   r|   r}   test_diabetes_overfitE  s    r   z&criterion, max_depth, metric, max_lossr2      <   r3   r4   r5   c                 C   sR   |||ddd}| tjtj |tj|tj}d|  k rH|k sNn J d S )NrI   r   )r   	max_depthr   rW   )r   ra   r   r   r   )r   r#   r   r   ZmetricZmax_lossr   Zlossr|   r|   r}   test_diabetes_underfitR  s    r   c                  C   s   t  D ]\} }|dddd}|tjtj |tj}tt	|dt
tjjd d| d tt|d|tjd| d t|tjt|tjdd| d qd S )Nr8   *   r   r   rW   r   r   rk   rK   )r   r   r   r`   r   r   predict_probar&   rq   ru   r   r   rn   r'   Zargmaxr   r%   exppredict_log_proba)r   r#   r   Zprob_predictr|   r|   r}   test_probabilityg  s(    



r   c                  C   sP   t dd d t jf } t d}t D ] \}}|d dd}|| | q*d S )Ni'  r   r   rW   )rq   arangenewaxis	REG_TREESr   r   r^   r_   r   r#   r   r|   r|   r}   test_arrayrepr  s
    
r   c                  C   s   ddgddgddgddgddgddgg} g d}t  D ]8\}}|dd}|| | t|| |d|d	 q8t D ]8\}}|dd}|| | t|| |d|d	 qzd S )
NrA   r>   r8   rL   )r8   r8   r8   r8   r8   r8   r   r   r   rk   )r   r   r   r'   r   rn   r   r%   )r^   r_   r   TreeClassifierr   TreeRegressorr   r|   r|   r}   test_pure_set  s    (

r   c               
   C   s   t g dg dg dg dg dg dg dg} t g d}t jd	d
d t D ]J\}}|dd}|| | || |  ||  | ||  |  qXW d    n1 s0    Y  d S )N)gs_c@d	a@籛 `8`@?c@)g_9a@g 8`@g-Vu]@g    @Xd@)gSW j_@r   r   r   )g ً`@4Ta@	lKa@{c@)g|@Y@g~G`a@gwI?lKa@g/"c@)g_@r   r   r   )g:^@r   r   r   )rN   gAw?gtQ?5??rS   g7G?gۺ?gb'?raise)allr   r   )rq   r   Zerrstater   r   r   r   r|   r|   r}   test_numerical_stability  s$    
r   c               	   C   s   t jdddddddd\} }t D ]d\}}|dd}|| | |j}t|dk}|jd dksrJ d		||dks$J d		|q$t
dd}|tjtj t
dttjd
}|tjtj t|j|j d S )N  rC   r;   r   FrX   rY   n_informativen_redundantZ
n_repeatedshufflerW   r   皙?r   rW   max_leaf_nodes)r   make_classificationr   r   r   feature_importances_rq   ru   r   rn   r   r`   r   r   r   r'   )r^   r_   r   r#   r   ZimportancesZn_importantclf2r|   r|   r}   test_importances  s*    



r   c                  C   s>   t  } tt t| d W d    n1 s00    Y  d S )Nr   )r   r   raises
ValueErrorgetattrr   r|   r|   r}   test_importances_raises  s    r   c               	   C   s   t jdddddddd\} }tdddd	| |}td
ddd	| |}t|j|j t|jj	|jj	 t|jj
|jj
 t|jj|jj t|jj|jj d S )Ni  rC   r;   r   Fr   r0   r:   )r   r   rW   r2   )r   r   r   r   r   r%   r   r'   tree_rr   rp   ro   rt   )r^   r_   r   r   r|   r|   r}   )test_importances_gini_equal_squared_error  s,    
r   c                  C   s  t  D ]\} }|dd}|tjtj |jtt	tjj
d ksLJ |dd}|tjtj |jtttjj
d ksJ |dd}|tjtj |jdksJ |dd}|tjtj |jdksJ |dd}|tjtj |jdksJ |dd}|tjtj |jtdtjj
d  ks8J |dd}|tjtj |jtjj
d ksjJ |d d}|tjtj |jtjj
d ksJ qd S )	Nsqrt)r   r8   log2r;   rR   rJ   rN   )r6   r   r   r`   r   r   Zmax_features_intrq   r   r   r   )r   TreeEstimatorestr|   r|   r}   test_max_features  s2    
 
 



 

r   c            	   	   C   s  t  D ]\} }| }tt |t W d    n1 sB0    Y  |tt g dg}tt	 || W d    n1 s0    Y  | }td d }tt	 |t| W d    n1 s0    Y  t
t}| }||t t|tt | }tt |t W d    n1 s<0    Y  |tt t
t}tt	* ||d d dd f  W d    n1 s0    Y  t
tj}| }|t
t|t tt	 |t W d    n1 s0    Y  tt	 |t W d    n1 s*0    Y  | }|tt tt	 || W d    n1 sr0    Y  tt	 || W d    n1 s0    Y  | }tt |t W d    q1 s0    Y  qtdd}tjt	dd& |g dgg d	 W d    n1 s60    Y  tjt	d
d& |g dgg d W d    n1 s|0    Y  d S )N)rA   r>   r8   r>   r8   r5   r   zy is not positive.*Poissonmatchr   r8   rL   )r   r   r   zSome.*y are negative.*Poisson)r:   grL   )r   r   r   r   r
   r   r^   r   r_   r   rq   asfortranarrayr%   r   r   r   asarrayr   dotapplyr   )	r   r   r   X2y2ZXftZXtr   r|   r|   r}   
test_error  sX    (
(*
*
:****,
6r   c                  C   s   t jtjtjjd} tj}tdt	
 D ]\}}t	| }|d|dd}|| | |jj|jjdk }t |dksJ d||d	|dd}|| | |jj|jjdk }t |dks(J d|q(d
S )z Test min_samples_split parameterdtypeN  rC   r   )min_samples_splitr   rW   r>   	   r   r=   N)rq   r   r`   r   r   _treeDTYPEr   r   r6   keysr   r   rt   rp   r   rn   )r^   r_   r   r   r   r   Znode_samplesr|   r|   r}   test_min_samples_split_  s     r   c            	      C   s   t jtjtjjd} tj}tdt	
 D ]\}}t	| }|d|dd}|| | |j| }t |}||dk }t |dksJ d||d|dd}|| | |j| }t |}||dk }t |dks(J d|q(d S )	Nr   r   r:   r   )min_samples_leafr   rW   r7   r   r   )rq   r   r`   r   r   r   r   r   r   r6   r   r   r   r   bincountr   rn   )	r^   r_   r   r   r   r   outZnode_countsZ
leaf_countr|   r|   r}   test_min_samples_leaf~  s(    

r   c                 C   s  t | d tj}|dur$||}t | d }t|jd }t|}t|  }t	dt
dddD ]\}}	||	|dd}
|
j|||d	 |dur|
j| }n|
j|}tj||d
}||dk }t|||
j ksfJ d| |
jqf|jd }t	dt
dddD ]\}}	||	|dd}
|
|| |durP|
j| }n|
j|}t|}||dk }t|||
j ksJ d| |
jqdS )zPTest if leaves contain at least min_weight_fraction_leaf of the
    training setr^   Nr_   r   r   rJ   rI   )min_weight_fraction_leafr   rW   r   )weightsz,Failed with {0} min_weight_fraction_leaf={1})DATASETSastyperq   float32rngrandr   ru   r6   r   linspacer   r   r   tocsrr   r   r   rn   )r   r   sparse_containerr^   r_   r   total_weightr   r   fracr   r   node_weightsleaf_weightsr|   r|   r}   check_min_weight_fraction_leaf  sP    



r  r   c                 C   s   t | d d S Nr`   r  r   r|   r|   r}   ,test_min_weight_fraction_leaf_on_dense_input  s    r  csc_containerc                 C   s   t | d|d d S Nrf   )r  r
  r   r  r|   r|   r}   -test_min_weight_fraction_leaf_on_sparse_input  s    r  c                 C   s  t | d tj}|dur$||}t | d }|jd }t|  }tdtdddD ]\}}|||ddd	}	|	|| |dur|	j	
| }
n|	j	
|}
t|
}||dk }t|t||	j dksVJ d
| |	j|	jqVtdtdddD ]\}}|||ddd	}	|	|| |dur<|	j	
| }
n|	j	
|}
t|
}||dk }t|t||	j ||	j ksJ d
| |	j|	jqdS )zzTest the interaction between min_weight_fraction_leaf and
    min_samples_leaf when sample_weights is not provided in fit.r^   Nr_   r   r   rJ   r;   r:   )r   r   r   rW   zBFailed with {0} min_weight_fraction_leaf={1}, min_samples_leaf={2}r   )r   r   rq   r   r   r6   r   r  r   r   r   r  r   r   maxr   rn   r   )r   r   r  r^   r_   r  r   r   r  r   r   r  r  r|   r|   r}   4check_min_weight_fraction_leaf_with_min_samples_leaf  s^    








r  c                 C   s   t | d d S r	  r  r  r|   r|   r}   Btest_min_weight_fraction_leaf_with_min_samples_leaf_on_dense_input"  s    r  c                 C   s   t | d|d d S r  r  r  r|   r|   r}   Ctest_min_weight_fraction_leaf_with_min_samples_leaf_on_sparse_input'  s    r  c                 C   s  t jd| d\}}tdt D ]r\}}t| }||dd}||ddd}||ddd}||d	dd}	|d
f|df|df|	d	ffD ]\}
}|
j|ksJ d|
j||
|| t|
j	j
D ]}|
j	j| tkr|
j	j| }|
j	j| }|
j	j| }|
j	j| }|
j	j| }|| }|
j	j| }|
j	j| }|
j	j| }|| }|| }|| }|
j	j| |jd  }|||  }||ksJ d||qqq d S )Nd   rX   rW   r   r   r   rW   rP   )r   min_impurity_decreaserW   g-C6?r   gHz>z)Failed, min_impurity_decrease = {0} > {1}z2Failed with {0} expected min_impurity_decrease={1})r   r   r   r6   r   r  rn   r   ranger   rm   rp   r   rv   weighted_n_node_samplesro   r   )global_random_seedr^   r_   r   r   r   Zest1est2Zest3Zest4r   Zexpected_decreasenodeZ
imp_parentZ
wtd_n_nodeleftZ
wtd_n_leftZimp_leftZwtd_imp_leftrightZwtd_n_rightZ	imp_rightZwtd_imp_rightZwtd_avg_left_right_impZfractional_node_weightZactual_decreaser|   r|   r}   test_min_impurity_decrease1  sd    r!  c               	      s   t  D ]\} }d| v r(tjtj }}ntjtj }}|dd  ||  ||}g d} fdd|D }t	 }t
|}t| jksJ |||}	||	ksJ d| |D ]*}
tt|j|
||
 d|
 d	|  d
 qqdS )z8Test pickling preserves Tree properties and performance.
Classifierr   r   )r   rm   capacity	n_classesrp   ro   Zn_leavesrr   rs   rv   rt   r  rw   c                    s   i | ]}|t  j|qS r|   )r   r   ).0	attributer   r|   r}   
<dictcomp>  s   ztest_pickle.<locals>.<dictcomp>z6Failed to generate same score  after pickling with {0}z"Failed to generate same attribute z after pickling with rk   N)r6   r   r`   r   r   ra   r   r   pickledumpsloadstype	__class__rn   r'   r   r   )r   r   r^   r_   r   
attributesZfitted_attributeZserialized_objectr  Zscore2r&  r|   r'  r}   test_pickley  s6    




r/  c                  C   s  ddgddgddgddgddgddgddgddgddgddgddgddgg} ddgddgddgddgddgddgddgddgddgddgddgddgg}ddgddgddgddgg}ddgddgddgddgg}t  D ]\}}|dd}|| ||}t|| |jdksJ ||}t|dks2J |d jdksFJ |d jd	ksZJ ||}	t|	dksvJ |	d jdksJ |	d jd	ksJ qt	 D ]@\}}
|
dd}|| ||}t
|| |jdksJ qd S )
NrA   r>   r8   rL   r   r;   r   r7   rL   )r7   r7   )r   r   r   r   r'   r   r   r   r   r   r%   )r^   r_   r   Zy_truer   r   r   Zy_hatZprobaZ	log_probar   r   r|   r|   r}   test_multioutput  s\    





r1  c                  C   s   t  D ]\} }|dd}|tt |jdks4J t|jddg t	tt
td fj}|dd}|t| t|jdksJ t|jdksJ t|jddg t|jddgddgg qd S )Nr   r   rL   r>   r8   rA   )r   r   r   r^   r_   
n_classes_r'   Zclasses_rq   r   r   r   r   )r   r   r   _yr|   r|   r}   test_classes_shape  s    

r4  c                  C   sf   t jd d } t jd d }td|}t D ]2\}}|dd}|j| ||d t|| | q.d S )N}   balancedr   r   r   )	r`   r   r   r$   r   r   r   r%   r   )Zunbalanced_XZunbalanced_yr   r   r   r   r|   r|   r}   test_unbalanced_iris  s    

r7  c                  C   s  t t tjtjgD ]\\} }}|dd}tjtj|d}tj	}t
||||| tjtjd|d}tj	}t
||||| tjtjd|d}tj	}t
||||| tjtj|d}tj	}t
||||| tD ]0}|tj|d}tj	}t
||||| qtD ]2}|tj|d}tj	}t
||||| q$tjtjd d d |d}tj	d d d }t
||||| qd S )Nr   r   r   C)orderr   Fr;   )r   r6   r   rq   float64r   r   r`   r   r   r'   r   r   Zascontiguousarrayr.   r-   )r   r   r   r   r^   r_   csr_containerr  r|   r|   r}   test_memory_layout  s6    
r=  c                  C   s  t dd d t jf } t d}d|d d< t d}d||dk< tdd}|j| ||d t|| t d t dd d t jf } t d}d|dd< d	|dd< d| dddf< t d}d
||d	k< tddd}|j| ||d |j	j
d dksJ d||d	k< tddd}|j| ||d |j	j
d dksDJ tj} tj}td| jd d}tdd}|| | ||  t j|| jd d}tdd}|j| ||d |j	jtjjk}t|j	j
| |j	j
|  d S )Nr  rS   2   r   r   r      r8   rL   gRQ?r   g     b@rJ   g     H@)Z	minlength)rq   r   r   r   r   r   r'   r   rj   r   rs   r`   r   r   r   randintr   r   rp   r   r   r   r&   )r^   r_   r   r   
duplicatesr   r{   r|   r|   r}   test_sample_weight5  sD    






rB  c                  C   s   t dd d t jf } t d}d|d d< tdd}t jdd}tt	  |j
| ||d W d    n1 sv0    Y  t d}td}tjt|d	  |j
| ||d W d    n1 s0    Y  d S )
Nr  rS   r>  r   r   r8   r   zgInput should have at least 1 dimension i.e. satisfy `len(x.shape) > 0`, got scalar `array(0.)` instead.r   )rq   r   r   r   r   randomr   r   r   r   r   r   reescape	TypeError)r^   r_   r   r   Zexpected_errr|   r|   r}   test_sample_weight_invalidi  s    

.
rG  c           	      C   s  t |  }|dd}|tjtj |ddd}|tjtj t|j|j ttjtjtjfj	}|ddddddddddddgdd}|tj| t|j|j |ddd}|tj| t|j|j t
tjj}|tjdk  d	9  < dd
dd}|dd}|tjtj| ||dd}|tjtj t|j|j |dd}|tjtj|d  ||dd}|tjtj| t|j|j d S )Nr   r   r6  class_weightrW   g       @rN   r   r8   r  g      Y@rL   )r   r   r`   r   r   r%   r   rq   r   r   r   r   )	r   r   clf1r   Z
iris_multiZclf3Zclf4r   rI  r|   r|   r}   test_class_weights  s@    





rK  c                 C   sx   t |  }ttttd fj}|dddgdd}d}tjt|d |	t
| W d    n1 sj0    Y  d S )	NrL   rJ   rN   r>   r8   r   rH  zBnumber of elements in class_weight should match number of outputs.r   )r   rq   r   r_   r   r   r   r   r   r   r^   )r   r   r3  r   rl   r|   r|   r}   test_class_weight_errors  s    rM  c                  C   sX   t jddd\} }d}t D ]4\}}|d |d d| |}| |d ksJ qd S Nr  r8   r  r7   )r   r   )r   make_hastie_10_2r6   r   r   Zget_n_leavesr^   r_   kr   r   r   r|   r|   r}   test_max_leaf_nodes  s
    rR  c                  C   sP   t jddd\} }d}t D ],\}}|d|d| |}| dksJ qd S rN  )r   rO  r6   r   r   Z	get_depthrP  r|   r|   r}   test_max_leaf_nodes_max_depth  s
    rS  c                  C   sT   dD ]J} t t dgdggddgj| }d|jd   krDdk sn J dqd S )N)r$  rw   rp   ro   rs   rv   rr   rt   r   r8   rB   r;   z Array points to arbitrary memory)r   r   r   r   flat)attrrw   r|   r|   r}   test_arrays_persist  s    
"rV  c                  C   s\   t d} td}| ddd}t D ].\}}|dd}||| |jjdks(J q(d S )Nr   )rC   rZ   rL   )rC   r   )	r/   rq   rj   r@  r6   r   r   r   r   )rW   r^   r_   r   r   r   r|   r|   r}   test_only_constant_features  s    

rW  c                  C   s~   t t g dgt df} g d}t D ]H\}}d|vr0|ddd}|| | |jjdkshJ |jj	d	ks0J q0d S )
N)r   r   r   r   r   r8   rL   r7   r:   rI      )r7   rH   )r   r   r   r8   r8   rL   rL   rL   r;   r;   r;   Z	ExtraTreer   r8   r   rL   r:   )
rq   Z	transposer   rj   r6   r   r   r   r   rm   r^   r_   r   r   r   r|   r|   r}   ,test_behaviour_constant_feature_after_splits  s    rZ  c                  C   s   t t dgdgdgdggt dg} t g d}t D ]H\}}|ddd}|| | |jjdkspJ t	|
| t dd	 q@t D ]H\}}|ddd}|| | |jjdksJ t	|| t d
d	 qd S )NrN   rS   )r7   r   )rS   rN   rS   rN   r   r8   r   r0  rJ   )r7   )rq   Zhstackr   rj   r   r   r   r   r   r'   r   r   r   r   rY  r|   r|   r}   (test_with_only_one_non_constant_features  s    *r[  c                  C   sd   t ddt jdd} t }tjtdd  |	| g d W d    n1 sV0    Y  d S )Ng\)c=Hr7   r>   r8   r   r   )r   r8   r   r8   )
rq   repeatr   r;  reshaper   r   r   r   r   )r^   r   r|   r|   r}   test_big_input  s    r^  c                  C   s@   ddl m}  tt |   W d    n1 s20    Y  d S )Nr   _realloc_test)Zsklearn.tree._utilsr`  r   r   MemoryErrorr_  r|   r|   r}   test_realloc  s    rb  c                  C   s   dt d } tjdd}tjddd}d| d  }td|d}tt	 |
|| W d    n1 sp0    Y  d| d  d }td|d}tt |
|| W d    n1 s0    Y  d S )	NrK   PrC   rL   r   r8   best)splitterr   )structcalcsizerq   rC  randnr@  r   r   r   	Exceptionr   ra  )Zn_bitsr^   r_   Zhuger   r|   r|   r}   test_huge_allocations  s    *rj  c                 C   s(  t |  }t| d }t| d }|dv rN|jd d }|d | }|d | }tt t D ]}||}|d|d||}	|d|d||}
t|	j|
jd	|  |	
|}| tv r|	|}|	|}tt t D ]J}||tjd}t|

|| | tv rt|
|| t|
|| qqZd S )	Nr^   r_   )rb   ra   r   r:   rW   r   5{0} with dense and sparse format gave different treesr   )r6   r   r   r,   r-   r.   r   r~   r   rn   r   r   r   r   rq   r   r&   )r   datasetr   r   r^   r_   rX   r  X_sparserx   ry   y_predZy_probaZy_log_probaZsparse_container_testX_sparse_testr|   r|   r}   check_sparse_input/  s8    



rq  	tree_typerm  )rd   rc   rb   rf   rg   rh   ri   rj   c                 C   s    |dkrdnd }t | || d S )Nrb   r;   rq  )rr  rm  r   r|   r|   r}   test_sparse_inputX  s    rt  ra   re   c                 C   s   t | |d d S )NrL   rs  )rr  rm  r|   r|   r}   test_sparse_input_reg_treesk  s    ru  )rg   rh   ri   rj   c           	      C   s  t |  }t| d }||}t| d }|dddd||}|dddd||}t|j|jd|  t|||| |dddd	||}|dddd	||}t|j|jd|  t|||| |d|jd d d
||}|d|jd d d
||}t|j|jd|  t|||| |ddd||}|ddd||}t|j|jd|  t|||| d S )Nr^   r_   r   r8   rL   )rW   r   r   rl  rC   )rW   r   r   )rW   r   r;   r   )	r6   r   r   r~   r   rn   r&   r   r   )	rr  rm  r  r   r^   rn  r_   rx   ry   r|   r|   r}   test_sparse_parameterss  sP    rv  ztree_type, criterionc                 C   s   g | ]}|t v r|qS r|   )r   r%  r   r|   r|   r}   
<listcomp>      rx  c                 C   s   g | ]}|t v r|qS r|   )r   rw  r|   r|   r}   rx    ry  c           
      C   s   t |  }t| d }||}t| d }|dd|d||}|dd|d||}	t|j|	jd|  t|	||| d S )Nr^   r_   r   r;   rW   r   r   rl  )r6   r   r   r~   r   rn   r&   r   )
rr  rm  r  r   r   r^   rn  r_   rx   ry   r|   r|   r}   test_sparse_criteria  s    
r{  zcsc_container,csr_containerc                 C   s  t |  }d}d}|}t|}td}g }	g }
d}|g}t|D ]^}||d}||d | }|	| |jdd|fdd }|
| ||7 }|| q@t|		tj
}	tj|tj
d}tjt|
tjd}
||
|	|f||fd}| }||
|	|f||fd}| }|jdd|fd}| }|jd	k dksFJ |jd	k dks^J |d|d
||}|d|d
||}t|j|jdt ||f}t||D ]\}}t|j||j| t|||| t|||j| t|j| |j|  t|| ||  t|| |j|  t|||| ttv rt|||| qd S )Nr;   rC   r   rJ   r[   r8   r   r   rS   rk  rl  )r6   rq   r   r/   r  binomialpermutationappendconcatenater   int32r   r   toarrayr@  copyr   ru   r   r~   r   rn   r   r   r&   r   decision_pathr   r   r   )rr  r  r<  r   r   rY   rX   samplesrW   r   r   offsetindptriZn_nonzero_iZ	indices_iZdata_irn  r^   rp  X_testr_   rx   ry   ZXsZX1r   r|   r|   r}   test_explicit_sparse_zeros  sn    



r  c                 C   s   t |  }tjd d df  }tjd d df d}tj}tt" |dd	|| W d    n1 sn0    Y  |dd}|	|| tt |
|g W d    n1 s0    Y  d S )Nr   rL  r   )r6   r`   r   r   r]  r   r   r   r   r   r   )r   r   r^   ZX_2dr_   r   r|   r|   r}   check_raise_error_on_1d_input  s    0
r  c                 C   s2   t   t|  W d    n1 s$0    Y  d S N)r)   r  r  r|   r|   r}   test_1d_input"  s    r  r  c                 C   s   t |  }tdgdgdgdgdgg}g d}g d}|d urF||}|dd}|j|||d |jjdkspJ |ddd}|j|||d |jjdksJ d S )	Nr   r8   )r   r   r   r   r8   )r=   r=   r=   r=   r=   r   r   g?)rW   r   )r6   rq   r   r   r   r   )r   r  r   r^   r_   r   r   r|   r|   r}    test_min_weight_leaf_split_level(  s    
r  c                 C   sD   t jtjjdd}t|   }|t t t|	t |j
	| d S NFr  X_smallr   r   r   r   r6   r   y_smallr'   r   r   )r   	X_small32r   r|   r|   r}   test_public_apply_all_trees<  s    
r  r<  c                 C   sH   |t jtjjdd}t|   }|t t t|	t |j
	| d S r  r  )r   r<  r  r   r|   r|   r}   test_public_apply_sparse_treesE  s    
r  c                  C   sP   t j} t j}tddd| |}|| d d  }t|g dg dg d S )Nr   r8   rk  rL   )r8   r8   r   r8   r   r8   )r`   r   r   r   r   r  r  r'   )r^   r_   r   node_indicatorr|   r|   r}   test_decision_path_hardcodedO  s
    r  c                    s   t j}t j}|jd }t|  }|ddd}||| ||}|   j||jj	fks^J |
|} fddt|D }t|tj|d |jjtk}	tt |	tj|d  jdd }
|jj|
ksJ d S )	Nr   rL   rk  c                    s   g | ]\}} ||f qS r|   r|   )r%  r  jr  r|   r}   rx  g  ry  z&test_decision_path.<locals>.<listcomp>r|  r8   Zaxis)r`   r   r   r   r6   r   r  r  r   rm   r   	enumerater&   rq   r   rp   r   r   ru   r  r   )r   r^   r_   rX   r   r   Znode_indicator_csrleavesZleave_indicatorZ
all_leavesr   r|   r  r}   test_decision_pathW  s$    


r  c                 C   sV   t |t }}t|  }tt" |dd|| W d    n1 sH0    Y  d S Nr   r   )X_multilabely_multilabelr6   r   r   rF  r   )r   r<  r^   r_   r   r|   r|   r}   test_no_sparse_y_supportu  s    r  c                  C   s  t dddd} | jdgdgdgdgdggg dg d	d
 t| jjg d t| jjjg d | jdgdgdgdgdggg dt	dd
 t| jjg d t| jjjg d | jdgdgdgdgdggg dd t| jjg d t| jjjg d dS )aQ	  Check MAE criterion produces correct results on small toy dataset:

    ------------------
    | X | y | weight |
    ------------------
    | 3 | 3 |  0.1   |
    | 5 | 3 |  0.3   |
    | 8 | 4 |  1.0   |
    | 3 | 6 |  0.6   |
    | 5 | 7 |  0.3   |
    ------------------
    |sum wt:|  2.3   |
    ------------------

    Because we are dealing with sample weights, we cannot find the median by
    simply choosing/averaging the centre value(s), instead we consider the
    median where 50% of the cumulative weight is found (in a y sorted data set)
    . Therefore with regards to this test data, the cumulative weight is >= 50%
    when y = 4.  Therefore:
    Median = 4

    For all the samples, we can get the total error by summing:
    Absolute(Median - y) * weight

    I.e., total error = (Absolute(4 - 3) * 0.1)
                      + (Absolute(4 - 3) * 0.3)
                      + (Absolute(4 - 4) * 1.0)
                      + (Absolute(4 - 6) * 0.6)
                      + (Absolute(4 - 7) * 0.3)
                      = 2.5

    Impurity = Total error / total weight
             = 2.5 / 2.3
             = 1.08695652173913
             ------------------

    From this root node, the next best split is between X values of 3 and 5.
    Thus, we have left and right child nodes:

    LEFT                    RIGHT
    ------------------      ------------------
    | X | y | weight |      | X | y | weight |
    ------------------      ------------------
    | 3 | 3 |  0.1   |      | 5 | 3 |  0.3   |
    | 3 | 6 |  0.6   |      | 8 | 4 |  1.0   |
    ------------------      | 5 | 7 |  0.3   |
    |sum wt:|  0.7   |      ------------------
    ------------------      |sum wt:|  1.6   |
                            ------------------

    Impurity is found in the same way:
    Left node Median = 6
    Total error = (Absolute(6 - 3) * 0.1)
                + (Absolute(6 - 6) * 0.6)
                = 0.3

    Left Impurity = Total error / total weight
            = 0.3 / 0.7
            = 0.428571428571429
            -------------------

    Likewise for Right node:
    Right node Median = 4
    Total error = (Absolute(4 - 3) * 0.3)
                + (Absolute(4 - 4) * 1.0)
                + (Absolute(4 - 7) * 0.3)
                = 1.2

    Right Impurity = Total error / total weight
            = 1.2 / 1.6
            = 0.75
            ------
    r   r3   rL   )rW   r   r   r;   r:   rK   )rI   rX  r;   r7   r;   )333333?333333?r   rN   r  )r^   r_   r   )g,d?gܶm۶m?g?)      @g      @r  )ffffff?rM   gUUUUUU?)r7   rT   r  r]   N)
r   r   r   r   rv   r'   rw   rT  rq   r   )Zdt_maer|   r|   r}   test_mae  s     J.&r  c                  C   s   d} t jdt jd}d}dd }tjtj|fD ]}t D ]N\}}|| |}|| }|\}	\}
}}||	ksrJ | |
ks~J t|| q:t	 D ]P\}}|| |}|| }|\}	\}
}}||	ksJ | |
ksJ ||ksJ qq.d S )Nr;   r   r  c                 S   s   t t | S r  )r)  r+  r*  )objr|   r|   r}   _pickle_copy  s    z)test_criterion_copy.<locals>._pickle_copy)
rq   r   intpr  deepcopyr   r   
__reduce__r'   r   )	n_outputsr$  rX   r  Z	copy_func_typenamecriteriaresultZ	typename_
n_outputs_r2  Z
n_samples_r|   r|   r}   test_criterion_copy  s&    

r  c           	      C   s   t jdddd }t |d}|d d d df }| d urL| |}|d d df }tdd||}||}t	t 
|jjtkd }||}t 
t |jj d }t|dksJ t|dksJ d S )Nr   r  rH   g*Gr   r>   r   )rq   rC  RandomStaterh  Z
nan_to_numr   r   r   r   setwherer   rp   r   
differenceisfiniters   r   )	r  r   r^   r_   r   Zterminal_regionsZ	left_leafZ
empty_leafZinfinite_thresholdr|   r|   r}   "test_empty_leaf_infinite_threshold  s    

r  tree_clsc                 C   s   t |  } | d | d  }}|ddd}|||}|j}|j}tt|dksVJ tt|dksnJ t|||| d S Nr^   r_   rZ   r   r  r   Zcost_complexity_pruning_pathZ
ccp_alphas
impuritiesrq   r   diffassert_pruning_creates_subtreerm  r  r^   r_   r   infopruning_pathr  r|   r|   r}   'test_prune_tree_classifier_are_subtrees  s    r  c                 C   s   t |  } | d | d  }}|ddd}|||}|j}|j}tt|dksVJ tt|dksnJ t|||| d S r  r  r  r|   r|   r}   'test_prune_tree_regression_are_subtrees$  s    r  c                  C   sX   t dd} | dgdggddg t ddd}|dgdggddg t| j|j d S )Nr   r   r8   rC   )rW   	ccp_alpha)r   r   assert_is_subtreer   )rJ  r   r|   r|   r}   test_prune_single_node_tree5  s
    
r  c           	      C   s\   g }|D ]$}| d|dd ||}|| qt||dd  D ]\}}t|j|j q@d S )NrZ   r   )r   r  rW   r8   )r   r  zipr  r   )	Zestimator_clsr^   r_   r  Z
estimatorsr  r   Zprev_estZnext_estr|   r|   r}   r  A  s    r  c           	      C   s  | j |j ksJ | j|jks J | j}| j}|j}|j}dg}|r| \}}t| j| |j|  t| j| |j|  t| j	| |j	|  t| j
| |j
|  || || krtt|j|  q>t| j| |j|  ||| || f ||| || f q>d S )N)r   r   )rm   r   rp   ro   popr&   rw   r%   rv   rt   r  r   rs   r  )	r   ZsubtreeZtree_c_leftZtree_c_rightZsubtree_c_leftZsubtree_c_rightstackZtree_node_idxZsubtree_node_idxr|   r|   r}   r  P  s>    r  re  rd  rC  c                 C   s   t d }|d jtjjdd}|d u r0t|}nB||d }tj|jtjjd|_t|j|j	|j
f\|_|_	|_
ttjttjjd}t|  |d}||| t|||| t|| ||  d S )Nrd   r^   Fr  r   )re  )r   r   r   r   r   r(   rq   r   r   r   r  r  r6   r   r'   r   r  Ztodense)r   re  r  rm  r  Z
X_readonlyZ
y_readonlyr   r|   r|   r}   "test_apply_path_readonly_all_treesx  s(    
r  )r2   r4   r5   c                 C   sL   t jt j }}|| d}||| t||tt|ksHJ d S )Nr   )	ra   r   r   r   rq   ru   r   r   r   )r   r#   r^   r_   r   r|   r|   r}   test_balance_property  s    
r  seedc              	   C   s  ddgddgddgddgddgddgddgddgg}g d}t d| d}||| t||dkslJ t d| d}||| t||dksJ d	}tj|d d d
d||d d | d\}}d|d|k |dk @ < t|}t d| d}||| t||dksJ d S )Nr   r8   rL   r;   )r   r   r   r   r8   rL   r;   r7   r2   r   r5   rC   r  r   )Zeffective_rankZtail_strengthrX   rY   r   rW   r>   )	r   r   rq   Zaminr   r   r   make_regressionr   )r  r^   r_   r   rY   r|   r|   r}   test_poisson_zero_nodes  s,    4


	
r  c                  C   sF  t jd} d\}}}tj|| || d}| jdd|dt j|dd }| jt || d	}t	|||| d
\}}}	}
t
dd| d}t
dd| d}|||	 |||	 tdd||	}||	df||
dffD ]p\}}}t|||}t|t ||dd }t|||}|dkr0|d| k s0J |d| k sJ qd S )Nr   )  r  rC   rX   rY   rW   rA   rL   )lowhighr\   r   r  )Zlam)Z	test_sizerW   r5   rC   )r   r   rW   r2   mean)ZstrategytraintestgV瞯<rJ   g      ?)rq   rC  r  r   Zmake_low_rank_matrixuniformr  r5   r   r   r   r   r	   r   r   Zclip)r   Zn_trainZn_testrY   r^   Zcoefr_   X_trainr  r   r   Ztree_poiZtree_msedummyvalZ
metric_poiZ
metric_mseZmetric_dummyr|   r|   r}   test_poisson_vs_mse  s4    


r  r$  c                 C   sz   d\}}t j||||ddd\}}| ddd||}| ddd||}t|j|j| d	 t|||| d
S )z3Test that criterion=entropy gives same as log_loss.)r>  r:   r   r   )r$  rX   rY   r   r   rW   r1   +   r   entropyz> with criterion 'entropy' and 'log_loss' gave different trees.N)r   r   r   r~   r   r   r   )r#   r$  rX   rY   r^   r_   Ztree_log_lossZtree_entropyr|   r|   r}   'test_criterion_entropy_same_as_log_loss  s"    
r  c                     sv   t jdd\} }tddd  | |  | |}dd  fdd}t| }|| |}t||srJ d S )	Nr   r   r;   rk  c                 S   s   |   | j  S r  )byteswapviewr   newbyteorderr  )arrr|   r|   r}   reduce_ndarray  s    z8test_different_endianness_pickle.<locals>.reduce_ndarrayc                     sB   t  } t| }tj |_|jtj< |	  | 
d | S Nr   )ioBytesIOr)  Picklercopyregdispatch_tabler  rq   ndarraydumpseekfpr   r  r|   r}    get_pickle_non_native_endianness  s    


zJtest_different_endianness_pickle.<locals>.get_pickle_non_native_endianness)	r   r   r   r   r   r)  loadrq   isclose)r^   r_   r   r  new_clf	new_scorer|   r  r}    test_different_endianness_pickle  s    
r  c                     s~   t jdd\} }tddd| | | |}G dd dt  fdd}t| }|| |}t	||szJ d S )	Nr   r   r;   rk  c                       s   e Zd Z fddZ  ZS )zPtest_different_endianness_joblib_pickle.<locals>.NonNativeEndiannessNumpyPicklerc                    s0   t |tjr | |j }t | d S r  )	
isinstancerq   r  r  r  r   r  supersave)selfr  r-  r|   r}   r  (  s    zUtest_different_endianness_joblib_pickle.<locals>.NonNativeEndiannessNumpyPickler.save)__name__
__module____qualname__r  __classcell__r|   r|   r  r}   NonNativeEndiannessNumpyPickler'  s   r  c                     s(   t  }  | }| | d | S r  )r  r  r  r  r  r  r   r|   r}   'get_joblib_pickle_non_native_endianness-  s
    

zXtest_different_endianness_joblib_pickle.<locals>.get_joblib_pickle_non_native_endianness)
r   r   r   r   r   r   joblibr  rq   r  )r^   r_   r   r  r  r  r|   r  r}   'test_different_endianness_joblib_pickle   s    r  c                 C   sn   t r
tjntj}g d}dd | jj D }|D ]}|||< q2tt| t|	 d}| j
|ddS )N)
left_childright_childrr   rt   c                 S   s   i | ]\}\}}||qS r|   r|   r%  r   r   r  r|   r|   r}   r(  @  s   z6get_different_bitness_node_ndarray.<locals>.<dictcomp>namesformats	same_kindZcasting)r+   rq   int64r  r   fieldsr   listr   valuesr   )node_ndarrayZnew_dtype_for_indexing_fieldsZindexing_field_namesnew_dtype_dictr   	new_dtyper|   r|   r}   "get_different_bitness_node_ndarray:  s    

r  c                 C   sj   dd | j j D }dd | j j D }dd |D }t t| t| |d}| j|ddS )	Nc                 S   s   i | ]\}\}}||qS r|   r|   r  r|   r|   r}   r(  M  s   z8get_different_alignment_node_ndarray.<locals>.<dictcomp>c                 S   s   g | ]\}}|qS r|   r|   )r%  r   r  r|   r|   r}   rx  P  ry  z8get_different_alignment_node_ndarray.<locals>.<listcomp>c                 S   s   g | ]}d | qS )rK   r|   )r%  r  r|   r|   r}   rx  Q  ry  )r
  r  offsetsr  r  )r   r  r   r  rq   r  r   r   )r  r  r  Zshifted_offsetsr  r|   r|   r}   $get_different_alignment_node_ndarrayL  s    


r  c           	      C   sZ   t r
tjntj}|  \}\}}}}|j|dd}| }t|d |d< ||||f|fS )Nr  r  nodes)r+   rq   r  r  r  r   r  r  )	r   r  r  rY   r$  r  stateZnew_n_classes	new_stater|   r|   r}   "reduce_tree_with_different_bitness]  s    r  c                     sn   t jdd\} }tddd  | |  | |} fdd}t| }|| |}|t|ksjJ d S )Nr   r   r;   rk  c                     s@   t  } t| }tj |_t|jt< |	  | 
d | S r  )r  r  r)  r  r  r  r  r  
CythonTreer  r  r  r   r|   r}   "pickle_dump_with_different_bitnesso  s    



zItest_different_bitness_pickle.<locals>.pickle_dump_with_different_bitness)	r   r   r   r   r   r)  r  r   r   )r^   r_   r   r  r  r  r|   r   r}   test_different_bitness_pickleh  s    
r  c                     sn   t jdd\} }tddd  | |  | |} fdd}t| }|| |}|t|ksjJ d S )Nr   r   r;   rk  c                     s>   t  } t| }tj |_t|jt< |  | 	d | S r  )
r  r  r   r  r  r  r  r  r  r  r  r   r|   r}   "joblib_dump_with_different_bitness  s    


zPtest_different_bitness_joblib_pickle.<locals>.joblib_dump_with_different_bitness)	r   r   r   r   r   r  r  r   r   )r^   r_   r   r  r  r  r|   r   r}   $test_different_bitness_joblib_pickle~  s    
r   c                  C   s  t rttjn
ttj} ttjttjg}|dd |D 7 }tjddg| d}|D ]}t|||  q\tj	t
dd. tjddgg| d}t||  W d    n1 s0    Y  tj	t
dd& |tj}t||  W d    n1 s0    Y  d S )	Nc                 S   s   g | ]}|  qS r|   )r  )r%  dtr|   r|   r}   rx    ry  z(test_check_n_classes.<locals>.<listcomp>r   r8   r   zWrong dimensions.+n_classesr   zn_classes.+incompatible dtype)r+   rq   r   r  r  r   r    r   r   r   r   r;  )expected_dtypeallowed_dtypesr$  r!  Zwrong_dim_n_classesZwrong_dtype_n_classesr|   r|   r}   test_check_n_classes  s    (r$  c               	   C   s0  t t j} d}t j|| d}| |  g}|D ]}t|||d q.tjtdd t|| dd W d    n1 st0    Y  |d d d d d df t 	|fD ]B}tjtdd  t|| |j
d W d    q1 s0    Y  qtjtd	d& t|t j| |d W d    n1 s"0    Y  d S )
N)r:   r8   rL   r   )r"  expected_shapezWrong shape.+value arrayr   )r8   rL   r8   zvalue array.+C-contiguouszvalue array.+incompatible dtype)rq   r   r;  rj   r  r"   r   r   r   r   r   r   r   )r"  r%  Zvalue_ndarrayr#  r!  Zproblematic_arrr|   r|   r}   test_check_value_ndarray  s2    $(&
r&  c                  C   s  t } tjd| d}|t|t|g}|dd |D 7 }|D ]}t|| d q:tjtdd* tjd| d}t|| d W d    n1 s0    Y  tjtd	d* |d d d
 }t|| d W d    n1 s0    Y  dd |j	j
 D }| }tj|d< t	t| t| d}||}tjtdd t|| d W d    n1 s`0    Y  | }tj|d< t	t| t| d}||}tjtdd t|| d W d    n1 s0    Y  d S )N)r:   r   c                 S   s   g | ]}| |j qS r|   )r   r   r  )r%  r  r|   r|   r}   rx    s   z+test_check_node_ndarray.<locals>.<listcomp>)r"  zWrong dimensions.+node arrayr   )r:   rL   znode array.+C-contiguousrL   c                 S   s   i | ]\}\}}||qS r|   r|   r  r|   r|   r}   r(    ry  z+test_check_node_ndarray.<locals>.<dictcomp>rs   r	  znode array.+incompatible dtyper  )r   rq   rj   r  r  r!   r   r   r   r   r  r   r  r  r  r   r  r   r;  )r"  r  Zvalid_node_ndarraysr  Zproblematic_node_ndarrayZ
dtype_dictr  r  r|   r|   r}   test_check_node_ndarray  sD    **

,

r'  Splitterc           	      C   s   t jd}d}dt jddgt jd }}td ||}| ||dd|d	d
}t|}t|}|j	|ksnJ t
|| s|J d	S )z&Check that splitters are serializable.r   rC   rL   r;   r   r0   r:   rJ   N)Zmonotonic_cst)rq   rC  r  r   r  r   r)  r*  r+  r   r  )	r(  r   r   r  r$  r   re  Zsplitter_serializeZsplitter_backr|   r|   r}   test_splitter_serializable 	  s    

r)  c                 C   sR   t | d}tdd}|tt t|| tj|dd}t	|j
|j
d dS )zhCheck that Trees can be deserialized with read only buffers.

    Non-regression test for gh-25584.
    z
clf.joblibr   r   r)Z	mmap_modez?The trees of the original and loaded classifiers are not equal.N)strjoinr   r   r  r  r  r  r  r~   r   )ZtmpdirZpickle_pathr   Z
loaded_clfr|   r|   r}   /test_tree_deserialization_from_read_only_buffer	  s    
r-  c                 C   s   t ddgddgg}t ddg}| dd|| | dd}d}tjt|d ||| W d   n1 st0    Y  dS )zhCheck that an error is raised when min_sample_split=1.

    non-regression test for issue gh-25481.
    r   r8   rN   )r   zb'min_samples_split' .* must be an int in the range \[2, inf\) or a float in the range \(0.0, 1.0\]r   N)rq   r   r   r   r   r   )r#   r^   r_   r   msgr|   r|   r}   test_min_sample_split_1_error%	  s    
r/  c                 C   s   t g dgj}t g d}tdd| d}||| |t jgg}t|t |dd g |dd }|dd }tdd| d}||| |t jgg}t|t |d	d g dS )
z=Check missing values goes to correct node during predictions.	r   r8   rL   r;   rK   r   rH      r   	r   r=   r  r=   r  r  rM   g?g@r   r8   rz  r<   Nr>   r9   )	rq   r   r   r   r   r   nanr   r  )r   r^   r_   dtcro  ZX_equalZy_equalr|   r|   r}   ;test_missing_values_best_splitter_on_equal_nodes_no_missing;	  s    r5  c                 C   s   t g dgj}t g d}t|d| d}||| |jjd }|jjd }|jj| }|jj| }||k}	|jj	| d }
|jj	| d }|
t jgg}|	rt|
| n
t|| dS )zCheck missing values go to the correct node during predictions for ExtraTree.

    Since ETC use random splits, we use different seeds to verify that the
    left/right node is chosen correctly when the splits occur.
    r0  r2  r8   rz  r   N)rq   r   r   r   r   r   rp   ro   r  rw   r   r3  r   )r   r  r^   r_   Zetrr  r  Zleft_samplesZright_samplesZ	went_leftZy_pred_leftZy_pred_rightro  r|   r|   r}   =test_missing_values_random_splitter_on_equal_nodes_no_missingU	  s    r6  r  r0   c                 C   s   d}t t jgd g d gj}t |gd dgd  dgd  }tdd| d}||| t t jdd	ggj}||}t||ddg d
S )zITest when missing values are uniquely present in a class among 3 classes.r   r7   )r   r8   rL   r;   rK   r   rH   r1  r8   rL   r   rz  r;   r1  Nrq   r   r3  r   r   r   r   r'   )r   Zmissing_values_classr^   r_   r4  r  Z
y_nan_predr|   r|   r}   /test_missing_values_best_splitter_three_classesx	  s    $
r8  c                 C   s   t t jgd g d gj}t dgd dgd  }tdd| d}||| t t jd	t jggj}||}t|g d
 dS )zMissing values spanning only one class at fit-time must make missing
    values at predict-time be classified has belonging to this class.r7   r   r8   rL   r;   r7   r:   r   r8   rI   r   rL   rz  r:   )r   r8   r   Nr7  r   r^   r_   r4  r  ro  r|   r|   r}   )test_missing_values_best_splitter_to_left	  s    
r;  c                 C   s   t t jgd g d gj}t dgd dgd  dgd  }tdd| d}||| t t jdd	ggj}||}t|g d
 dS )zMissing values and non-missing values sharing one class at fit-time
    must make missing values at predict-time be classified has belonging
    to this class.r7   r9  r8   r   rL   r   rz  rO   g333333@r  Nr7  r:  r|   r|   r}   *test_missing_values_best_splitter_to_right	  s    $
r<  c                 C   s   t ddddt jddddt jg
gj}t d	gd dgd  }td
d| d}||| t t jddggj}||}t|g d dS )zNCheck behavior of missing value when there is one missing value in each class.r8   rL   r;   r:   rC   rZ   rV   r   r   r   rz  gffffff@gA@r  Nr7  r:  r|   r|   r}   >test_missing_values_best_splitter_missing_both_classes_has_nan	  s    &
r=  r   r   c                 C   s   t ddddt jddddt jg
gj}t d	gd dgd  }| d
urP| |}tjtdd ||| W d
   n1 s0    Y  d
S )z4Check unsupported configurations for missing values.r8   rL   r;   r:   rC   rZ   rV   r   r   NzInput X contains NaNr   )rq   r   r3  r   r   r   r   r   )r  r   r^   r_   r|   r|   r}   test_missing_value_errors	  s    &r>  c                 C   sp   t j t j }}tj|ddddf< tj|ddddf< | ddd}||| ||}|d	k slJ dS )
z5Smoke test for poisson regression and missing values.Nr:   r   rI   r>   r5   r   r   rS   )	ra   r   r  r   rq   r3  r   r   r   )r#   r^   r_   r   ro  r|   r|   r}   test_missing_values_poisson	  s    
r?  c                  O   s$   t j| i |\}}|dk}||fS )N   )r   make_friedman1)argskwargsr^   r_   r|   r|   r}   make_friedman1_classification	  s    rD  zmake_data, Tree, tolerancegQ?gQ?gQ?sample_weight_trainr   c                 C   s  d\}}| ||d|d\}}|  }	tj|}
tj|	|
jddg|jddgd< t|	||d	\}}}}|d
krt|jd }nd}d}|||d}|j	|||d |
||}tt |||d}|	|| |
||}|| |ksJ d|d| d| dS )zFCheck that trees can deal with missing values have decent performance.)r   rC   rN   )rX   rY   noiserW   FTrU   r   r\   r  r   r   r   NrC   r   r   zscore_native_tree=z + z! should be strictly greater than )r  rq   rC  r  r3  choicer   r   r   r   r   r   r   )	make_datar#   rE  r  Z	tolerancerX   rY   r^   r_   	X_missingr   ZX_missing_trainZX_missing_testr   r   r   r   Znative_treeZscore_native_treeZtree_with_imputerZscore_tree_with_imputerr|   r|   r}   !test_missing_values_is_resilience	  s<    
 rK  zTree, expected_scoreg333333?g(\?c                 C   s   t jd}d}|j|dfd}t t |d t |d g}|jddg|dd	gd
}| 	t
}||  ||< |j|d}	t j|	|< |	|dddf< | |d}
t|
||dd }||ksJ d| d| dS )z@Check the tree learns when only the missing value is predictive.r   r  rZ   r[   rL   FTgffffff?rP   rG  Nr:   r   )ZcvzExpected CV score: z	 but got )rq   rC  r  Zstandard_normalr  rj   r   rH  r  r   boolr3  r   r  )r#   Zexpected_scorer  r   rX   r^   r_   ZX_random_maskZy_maskZX_predictiver   Ztree_cv_scorer|   r|   r}    test_missing_value_is_predictive 
  s     "

rM  zmake_data, Treec           
      C   s   t jd}d\}}| |||d\}}t j||jddg|jddgd< t |jd }d	|d
d
d< |dd}|j|||d |dd}	|	|dd
dd
d
f |dd
d  t|		||	| d
S )z=Check sample weight is correctly handled with missing values.r   )r   rC   r  FTrU   r   rG  rS   NrL   r   r   r8   )
rq   rC  r  r3  rH  r   r   r   r   r   )
rI  r#   r   rX   rY   r^   r_   r   Ztree_with_swZtree_samples_removedr|   r|   r}   test_sample_weight_non_uniform>
  s    	 

(rN  c                  C   sP   t ddtjtj} t ddtjtj}t| }t|}||ksLJ d S r  )r   r   r`   r   r   r)  r*  )Ztree1Ztree2Zpickle1Zpickle2r|   r|   r}   test_deterministic_pickle[
  s
    

rO  r^   r:   rI   c                 C   s   | dd}td}| |dd||}t|| dd|}|jj}t|dksbJ | t	|jjdd |jjdd  t
|jjdk|jjdk@ }t	|jj| d dS )	a'  Check that we properly handle missing values in regression trees using a toy
    dataset.

    The regression targeted by this test was that we were not reinitializing the
    criterion when it comes to the number of missing values. Therefore, the value
    of the critetion (i.e. MSE) was completely wrong.

    This test check that the MSE is null when there is a single sample in the leaf.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28254
    https://github.com/scikit-learn/scikit-learn/issues/28316
    r>   r8   rI   r   r   NrL   rS   )r]  rq   r   r   r   r   rv   r   r   r   flatnonzerorp   rt   )r#   r^   r   r_   r   Ztree_refrv   
leaves_idxr|   r|   r}   'test_regression_tree_missing_values_toyh
  s    
"rR  c                 C   s   t j| }d}t j|t jddd}t j|dd d d f< || t |}t| dd	||}|j
j}t|dksJ |d S )	Nr  r   r>   r8   ir:   rk  r   )rq   rC  r  r   r;  r]  r3  r   r   r   r   rv   r   )r  r   rX   r^   r_   r   rv   r|   r|   r}   -test_regression_extra_tree_missing_values_toy
  s    

rS  c                  C   s   t jdd\} }tjd}|  }|jtjdtjd| dddgf d d		t
}tj||< t||d
d\}}}}tjg dtjd}tdddd}	|	|| ||  t|	jjdksJ t|	jjdk|	jjdk@ }
t|	jj|
 d dS )a  Check that we properly handle missing values in clasification trees using a toy
    dataset.

    The test is more involved because we use a case where we detected a regression
    in a random forest. We therefore define the seed and bootstrap indices to detect
    one of the non-frequent regression.

    Here, we check that the impurity is null or positive in the leaves.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28254
    T)Z
return_X_yr   )r8   r7   )r   r   NrL   rK   )nr     r   )prL   Q   '   a   [   &   .      e   rU  Y   R   r  r   E      rV     I   J   3   /   k      K   n   rZ   r   h   9      r   ri  O   #   M   Z   re  r[  rU  ^   rY     rK   ]   rt  rc  rp  r1  rU  rd  m   rj     rC   rs  rk  ra  \   4   rZ   ru  rK   rK      ra  ro  r1  r1  rX  r  r   rV   r\  N   r1  rv  i   r>  r   rc  r1  f   r{  rU  r\  r8   r`  rH       ri  rq  j   rr  r   8   ro  rh  >   U   rV  rW  P   rb  ?   rI   r  T   r;   r;   L   rz  r   r;   r   iHnr   r   r>   r8   rS   )r   	load_irisrq   rC  r  r  r}  r   r  r   rL  r3  r   r   r   r   r   r   rv   rP  rp   rt   r   )r^   r_   r   rJ  maskr  r  r   r   r   rQ  r|   r|   r}   +test_classification_tree_missing_values_toy
  s,    "


r  c                  C   sL  t ddd} | tjtj t| j}t| j	|| j
}tj| jjtjd}d|d< t|| j| | jjdksrJ |jdksJ tt  t| jj|j W d   n1 s0    Y  t| jjd |jd  t| j	|| j
}tj| jjtjd}d|dd< t|| j| | jjdks"J |jdks8J |jt| jj|j dS )zHTest pruning a tree with the Python caller of the Cythonized prune tree.r   r8   rk  r   r;   N)r   r   r`   r   r   rq   
atleast_1dr2  r  n_features_in_r  rj   r   rm   uint8r   r   r   AssertionErrorr'   rw   r   r$  Zpruned_treeZleave_in_subtreer|   r|   r}   test_build_pruned_tree_py
  s&    .r  c                  C   s   t ddd} | tjtj t| j}t| j	|| j
}tj| jjtjd}d|d< tjtdd t|| j| W d   n1 s0    Y  dS )z8Test pruning a tree does not result in an infinite loop.r   r8   rk  r   z,Node has reached a leaf in the original treer   N)r   r   r`   r   r   rq   r  r2  r  r  r  rj   r   rm   r  r   r   r   r   r  r|   r|   r}   $test_build_pruned_tree_infinite_loop
  s    r  c                  C   s`   t jd} | jddddt j}t |gd }t d}t||d g d}t	|| d	S )
zNon-regression test for gh-30554.

    Using log2 and log in sort correctly sorts feature_values, but the tie breaking is
    different which can results in placing samples in a different order.
    ri  rS   g      $@rC   )locscaler\   r:   r>  )2r   (   rV   rZ   rC      rW     1   r   -   r   ro  r:      rH   r\  )   r8         r1  rL   r   r}  rs  rU  r  r;   !   rI   $   r[  rb  rm  r7   r@  rv  "   ,   ra  rf  rX  %   rh  rK   rZ  0   ry     N)
rq   rC  Zdefault_rngnormalr   r   r  r   r   r'   )r   ZsomeZfeature_valuesr  Zexpected_samplesr|   r|   r}   test_sort_log2_build  s    
r  )N)N)N)__doc__r  r  r  r)  rD  rf  	itertoolsr   r   r  numpyrq   r   Zjoblib.numpy_pickler   Znumpy.testingr   Zsklearnr   r   r   Zsklearn.dummyr	   Zsklearn.exceptionsr
   Zsklearn.imputer   Zsklearn.metricsr   r   r   Zsklearn.model_selectionr   r   Zsklearn.pipeliner   Zsklearn.random_projectionr   Zsklearn.treer   r   r   r   Zsklearn.tree._classesr   r   r   r   Zsklearn.tree._partitionerr   Zsklearn.tree._treer   r   r   r   r    r!   r"   r#   r  Zsklearn.utilsr$   Zsklearn.utils._testingr%   r&   r'   r(   r)   r*   Zsklearn.utils.fixesr+   r,   r-   r.   Zsklearn.utils.validationr/   r   ZREG_CRITERIONSr   r   dictr6   __annotations__updateZSPARSE_TREESr   r  r  Zy_small_regr^   r_   r   r   r  r`   rC  r  r   r~  r   r\   permr   Zload_diabetesra   Zload_digitsrb   rW   Zmake_multilabel_classificationr  r  r  ZX_sparse_posr@  Zy_randomr  ZX_sparse_mixrj   r   r~   r   r   markZparametrizer  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r!  r/  r1  r4  r7  r=  rB  rG  rK  rM  rR  rS  rV  rW  rZ  r[  r^  rb  rj  rq  rt  sortedr  intersectionru  rv  r  r{  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r$  r&  r'  r)  r-  r/  r5  r6  r8  r;  r<  r=  r>  r?  rD  rA  rK  rM  r  r   rN  rO  r3  rR  rS  r  r  r  r  r|   r|   r|   r}   <module>   s  $	 

(

'



	
$B!;

=
H3<-4
/
		
)3
K


d
(
!*
!4!




- !/