a
    hC                     @   sJ  d dl Z d dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZmZmZmZ g dg dg dg dgZejd	d
g dg dg dg dgdfdg dg dg dg dgdfdg dg dg dg dgdfdg dg dg dg dgg dfdg dg dg dg dgg dfdg dg dg dg dgg dfdg dg dg dg dgg dfdg dg dg dg dgg dfgdd Zdd Zejdd
gdd Zdd Zejd	d
g dg dg dg d gdfdg dg dg dg d gdfdg dg dg d g d gdfdg dg dg d g d gg d!fdg dg dg dg dgg d"fdg dg dg dg d gg dfgd#d$ Zejd%d&d' Zejdddgd(d) Zejdg d*d+d, Zd-d. Zejd/ed0d1d2d3 Zd4d5 Z ejd6d
g d7g d8g d9fdg d7g d:g d;fdg d<g d:g d=fgd>d? Z!ejd@d
g dAg dBg dCg dDgfdg dEg dEg dFg dGgfdg dHg dIg dJg dJgfgejdKg dLdMdN Z"ejdg d*dOdP Z#dQdR Z$ejdSdg dTfdg dUfgdVdW Z%dXdY Z&ejdZej'ej(ej)gejd[dej(ej)gejdKg dLd\d] Z*ejd^ej'ej(ej)gejdKg dLd_d` Z+dadb Z,ejdcdddedf edgD fdhdidf edgD fdjdkdf edgD fgdldm Z-ejdg d*dndo Z.dpdq Z/dS )r    N)clone)KBinsDiscretizerOneHotEncoder)assert_allcloseassert_allclose_dense_sparseassert_array_almost_equalassert_array_equal      ?)r         @      )r   g      @r
         ?)   g      @r      z!strategy, expected, sample_weightuniform)r   r   r   r   )r   r   r   r   )r   r   r   r   )r   r   r   r   kmeans)r   r   r   r   quantile)r   r   r   r   )r   r   r   r   )r   r      r   c                 C   s0   t dd| d}|jt|d t||t d S )Nr   ordinaln_binsencodestrategysample_weight)r   fitXr   	transform)r   expectedr   est r$   k/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/preprocessing/tests/test_discretization.pytest_fit_transform   s    "r&   c                   C   sR   t ddt t tdgd dt t ddtjjttksNJ d S )Nr   r   r   )	r   fit_transformr    nparrayr   n_bins_dtypeintr$   r$   r$   r%   test_valid_n_bins:   s    r.   r   c                 C   s`   t jttd}td| d}d}tjt|d |jt|d W d   n1 sR0    Y  dS )z=Check that we raise an error when the wrong strategy is used.)shaper   )r   r   zK`sample_weight` was provided but it cannot be used with strategy='uniform'.matchr   N)	r)   Zoneslenr    r   pytestraises
ValueErrorr   )r   r   r#   err_msgr$   r$   r%   1test_kbinsdiscretizer_wrong_strategy_with_weights@   s    r7   c                  C   sB  t dd} t| d}d}tjt|d |t W d    n1 sH0    Y  g d} t| d}d}tjt|d |t W d    n1 s0    Y  g d} t| d}d}tjt|d |t W d    n1 s0    Y  g d	} t| d}d
}tjt|d |t W d    n1 s40    Y  d S )N)r             @r'   z:n_bins must be a scalar or array of shape \(n_features,\).r0   )r   r   r   r   r   r   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 3. Number of bins must be at least 2, and must be an int.) @r   r;   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 2. Number of bins must be at least 2, and must be an int.)r)   fullr   r3   r4   r5   r(   r    )r   r#   r6   r$   r$   r%   test_invalid_n_bins_arrayL   s,    
(
(
(
r=   )r   r   r   r   r:   )r   r   r   r   )r   r   r   r   )r   r   r   r   c                 C   s~   t g dd| djt|d}t||t ttjd }|jj|fksNJ t	|j|j
D ]\}}|j|d fks\J q\d S )Nr   r   r   r   r   r   r   r   )r   r   r    r   r!   r)   r*   r/   
bin_edges_zipr+   )r   r"   r   r#   Z
n_features	bin_edgesr   r$   r$   r%   test_fit_transform_n_bins_arrayr   s    %
rB   z&ignore: Bins whose width are too smallc                  C   s   t dgdgdgdgdgdgg} tddd	d
}|j| g dd t|jd g d t|| dgdgdgdgdgdgg dS )z;Check the impact of `sample_weight` one computed quantiles.r
   r   r   r   i  i  
   r   r   r   )r   r   r   r   r   r   r   r   )r
   r   r   r           g      ?r9   N)r)   r*   r   r   r   r?   r!   r    r#   r$   r$   r%   *test_kbinsdiscretizer_effect_sample_weight   s
    "rF   c                 C   sH   t dd| d}tjg dtjd}t|}|jt|d t|| dS )z7Make sure that `sample_weight` is not changed in place.r   r   r   )r   r   r   r   r,   r   N)r   r)   r*   float64copyr   r    r   )r   r#   r   Zsample_weight_copyr$   r$   r%   /test_kbinsdiscretizer_no_mutating_sample_weight   s
    
rJ   )r   r   r   c                 C   s   t d tddgddgddgddgg}t| ddd}d	}tjt|d
 || W d    n1 sl0    Y  |j	d dksJ |
|}t|d d df t|jd  d S )Nalwaysr   r
   r   r   r   r   )r   r   r   z2Feature 0 is constant and will be replaced with 0.r0   )warningssimplefilterr)   r*   r   r3   warnsUserWarningr   r+   r!   r   Zzerosr/   )r   r    r#   warning_messageXtr$   r$   r%   test_same_min_max   s    
"(
rR   c                  C   s   t d} tdd}tt ||  W d    n1 s>0    Y  tdd}|| dd tt ||  W d    n1 s0    Y  d S )Nr8   r   r'   r   r   )	r)   aranger   r3   r4   r5   r   reshaper!   rE   r$   r$   r%   test_transform_1d_behavior   s    

(
rU   ir   	   c                 C   sX   t g ddd}t g ddd}|d|   }tddd|}t|| d S )	N)r9         @g      @g       @g      $@r   r   )r   r   r   r   r   rC   r   r   r   r   )r)   r*   rT   r   r(   r   )rV   ZX_initZXt_expectedr    rQ   r$   r$   r%   test_numeric_stability   s
    rZ   c                  C   s   t g dddt} | t}t g dddt} | t}t|rNJ ttdd dD dd|| t g dd	dt} | t}t|sJ ttd
d dD dd|	 |	  d S )Nr>   r   rY   onehot-densec                 S   s   g | ]}t |qS r$   r)   rS   .0rV   r$   r$   r%   
<listcomp>       z'test_encode_options.<locals>.<listcomp>F)
categoriesZsparse_outputonehotc                 S   s   g | ]}t |qS r$   r\   r]   r$   r$   r%   r_      r`   T)
r   r   r    r!   spissparser   r   r(   Ztoarray)r#   ZXt_1ZXt_2ZXt_3r$   r$   r%   test_encode_options   s0    


re   z8strategy, expected_2bins, expected_3bins, expected_5bins)r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r8   r8   )r   r   r   r   r   r   )r   r   r   r   r   r8   )r   r   r   r   r   r   )r   r   r   r   r8   r8   c                 C   s   t g ddd}td| dd}||}t||  td| dd}||}t||  td| dd}||}t||  d S )	N)r   r   r   r   rW   rC   r   r   r   r   r   r   r   r      )r)   r*   rT   r   r(   r   Zravel)r   Zexpected_2binsZexpected_3binsZexpected_5binsr    r#   rQ   r$   r$   r%   test_nonuniform_strategies   s    


rh   zstrategy, expected_inv)      r9         r   )r         @      r   )r   rX   ri   r   )r   rX   ri   r   )g      g      @g      g      )g      g      @g      g      ?)g      ?g      @g      g      ?)ri   r9   rj   g      )r   rk   rl   rD   )r   rX   ri   g      ?r   )r   rb   r[   c                 C   s0   t d| |d}|t}||}t|| d S )Nr   rf   )r   r(   r    inverse_transformr   )r   r   Zexpected_invkbdrQ   Xinvr$   r$   r%   test_inverse_transform  s    "

rp   c                 C   s   t g dd d d f }td| dd}|| t ddgd d d f }||}t|jddd	 |j t|jdddg d S )
Nr   r   r   r   r8   r   rf   r
   rg   r   )Zaxisr   )	r)   r*   r   r   r!   r   maxr+   min)r   r    rn   ZX2ZX2tr$   r$   r%    test_transform_outside_fit_range<  s    

rt   c                  C   s   t g dd d d f } |  }tddd}|| }t| | | }||}t|| t|t dgdgdgdgg d S )Nrq   r   r   rY   r   r   r   )r)   r*   rI   r   r(   r   rm   )r    ZX_beforer#   rQ   Z	Xt_beforero   r$   r$   r%   test_overwriteH  s    



ru   zstrategy, expected_bin_edges)r   r   r   )r   r   r   c                 C   sz   dgdgdgdgdgdgg}t d| d d}d}tjt|d || W d    n1 s\0    Y  t|jd | d S )Nr   r   )r   r   	subsample'Consider decreasing the number of bins.r0   )r   r3   rN   rO   r   r   r?   )r   Zexpected_bin_edgesr    rn   rP   r$   r$   r%   test_redundant_binsV  s    (rx   c                  C   s   t g ddd} t g d}t g ddd}tdddd	}d
}tjt|d ||  W d    n1 sz0    Y  t|j	d | t|
| | d S )N)皙?ry   ffffff?r   r   )ry   gq=
ףp?g=
ףp=?gzG?gp=
ף?rz   )r   r   r8   rC   r   r   r   rw   r0   r   )r)   r*   rT   r   r3   rN   rO   r   r   r?   r!   )r    rA   rQ   rn   rP   r$   r$   r%   !test_percentile_numeric_stabilityb  s    (r{   in_dtype	out_dtypec                 C   sr   t jt| d}td||d}|| |d ur4|}n"|d u rP|jt jkrPt j}n|j}||}|j|ksnJ d S NrG   r   )r   r   r,   )	r)   r*   r    r   r   r,   float16rH   r!   )r|   r}   r   X_inputrn   Zexpected_dtyperQ   r$   r$   r%   test_consistent_dtypeo  s    

r   input_dtypec                 C   sd   t jt| d}td|t jd}|| ||}td|t jd}|| ||}t|| d S r~   )	r)   r*   r    r   float32r   r!   rH   r   )r   r   r   Zkbd_32ZXt_32Zkbd_64ZXt_64r$   r$   r%   test_32_equal_64  s    



r   c                  C   s   t g ddd} tdddd}||  t|}|jd d ||  t|jd	 |jd	 D ]\}}t j	
|| qb|jj|jjksJ d S )
Nr	   r   r   rC   r   r   r   rv   r   )r)   r*   rT   r   r   r   
set_paramsr@   r?   Ztestingr   r/   )r    Zkbd_defaultZkbd_without_subsamplingZbin_kbd_defaultZbin_kbd_with_subsamplingr$   r$   r%   'test_kbinsdiscretizer_subsample_default  s    

r   zencode, expected_namesrb   c                 C   s.   g | ]&}t d D ]}d| dt| qqS r8   feat_rangefloatr^   col_idZbin_idr$   r$   r%   r_     s   r_   r   r[   c                 C   s.   g | ]&}t d D ]}d| dt| qqS r   r   r   r$   r$   r%   r_     s   r   c                 C   s   g | ]}d | qS r   r$   )r^   r   r$   r$   r%   r_     r`   c                 C   sz   g dg dg dg dg}t d| d|}||}dd td	D }||}|jd
 |jd kslJ t|| dS )z[Check get_feature_names_out for different settings.
    Non-regression test for #22731
    )r
   r   r   )r   r   r   )r   r   r
   )r   r8   r   r8   rY   c                 S   s   g | ]}d | qS r   r$   r]   r$   r$   r%   r_     r`   z>test_kbinsdiscrtizer_get_feature_names_out.<locals>.<listcomp>r   r   r   N)r   r   r!   r   Zget_feature_names_outr/   r   )r   Zexpected_namesr    rn   rQ   Zinput_featuresZoutput_namesr$   r$   r%   *test_kbinsdiscrtizer_get_feature_names_out  s    

r   c                 C   sj   t j|dd }t| d|d}|| t|}|jd d || t|j	d |j	d dd d S )	N)i r   r   iP  )r   rv   Zrandom_stater   r   g{Gz?)Zrtol)
r)   randomZRandomStateZrandom_sampler   r   r   r   r   r?   )r   Zglobal_random_seedr    Zkbd_subsamplingZkbd_no_subsamplingr$   r$   r%   test_kbinsdiscretizer_subsample  s    

r   c                  C   s  t dd d d f } t }|| } tjtdd |  W d    n1 sR0    Y  tjtdd |j| | d W d    n1 s0    Y  tj	dd$ t
d ||  W d    n1 s0    Y  tjtd	d |j| d
 W d    n1 s
0    Y  d S )NrC   z$Missing required positional argumentr0   z$Cannot use both X and Xt. Use X only)r    rQ   T)recorderrorzXt was renamed X in version 1.5)rQ   )r)   rS   r   r(   r3   r4   	TypeErrorrm   rL   catch_warningsrM   rN   FutureWarning)r    rn   r$   r$   r%   )test_KBD_inverse_transform_Xt_deprecation  s    
&,
(r   )0rL   numpyr)   r3   Zscipy.sparsesparserc   Zsklearnr   Zsklearn.preprocessingr   r   Zsklearn.utils._testingr   r   r   r   r    markZparametrizer&   r.   r7   r=   rB   filterwarningsrF   rJ   rR   rU   r   rZ   re   rh   rp   rt   ru   rx   r{   r   r   rH   r   r   r   r   r   r   r$   r$   r$   r%   <module>   s     !

&   $



	





 

		

