a
    h                     @   s   d Z ddlmZ ddlmZ ddlZddlZddlm	Z
 ddlmZmZmZ ddlmZ ddlmZ dd	 Zd
d Zdd Zdd Zdd Zdd Zdd ZdS )zTest the 20news downloader, if the data is available,
or if specifically requested via environment variable
(e.g. for CI jobs).    )partial)patchN)check_as_framecheck_pandas_dependency_messagecheck_return_X_y	normalize)assert_allclose_dense_sparsec           	      C   s,  | ddd}|j dsJ | d|jddd dd}|j|jdd  ksNJ t|j d	d
gksjJ t|jt|jksJ t|jt|j	ksJ |j	d	 }|j|jd	  }|j
|}|j	t|j|kd	 d	  }||ksJ | dddd\}}t|t|j	ksJ |j|jjks(J d S )NallF)subsetshuffle.. _20newsgroups_dataset:)r   
categoriesr   r      T)r   r   Z
return_X_y)DESCR
startswithZtarget_namesnpuniquetargettolistlen	filenamesdataindexwhereshape)	fetch_20newsgroups_fxtr   Z	data2catsZentry1categorylabelZentry2Xy r$   ^/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/datasets/tests/test_20news.pytest_20news   s"    
r&   c                 C   s\   | dd}t |d t |jks$J t |d t |jks>J t |d t |jksXJ dS )zuChecks the length consistencies within the bunch

    This is a non-regression test for a bug present in 0.16.1.
    r
   r   r   r   r   N)r   r   r   r   )r   r   r$   r$   r%   test_20news_length_consistency5   s    
r(   c                 C   sj  | dd}t |jr"|jjdks&J |jjdks6J |jjd dksJJ |jjtjks\J |j	
dslJ | dd}t |jr|jjdksJ |jjd	ksJ |jjd d
ksJ |jjtjksJ |j	
dsJ t| dd}t|| | dd}t |jr|jjdksJ |jjdks*J |jjd dks@J |jjtjksTJ |j	
dsfJ d S )Ntrainr'   Zcsr)2,  ; r   r*   r   test)l  r+   r-   r
   )I  r+   r.   )spissparser   formatr   r   Zdtyper   Zfloat64r   r   r   r   )!fetch_20newsgroups_vectorized_fxtbunchZ
fetch_funcr$   r$   r%   test_20news_vectorizedA   s(    



 r4   c                 C   sf   | dd}| dd}|d d d }|d d d }t |t| ttjj| dddsbJ d S )NFr   Tr   d   r   )Zaxis)r	   r   r   ZallcloseZlinalgZnormZtodense)r2   r"   ZX_ZX_normr$   r$   r%   test_20news_normalization_   s    

r6   c                    s   t d | dd}t||  |j}|jdks2J t fdd|jjD sPJ dD ]}|| v sTJ qTd| v szJ |j	j
dksJ d S )	NZpandasTZas_frame)r*   i< c                    s   g | ]}t | jqS r$   )
isinstanceZSparseDtype).0colpdr$   r%   
<listcomp>q       z(test_20news_as_frame.<locals>.<listcomp>)beginnerZ	beginnersZ	beginningZ
beginningsZbeginsZbegleyZbegoneZcategory_class)pytestZimportorskipr   framer   r
   r   Zdtypeskeysr   name)r2   r3   rA   Zexpected_featurer$   r;   r%   test_20news_as_framei   s    


	rD   c                 C   s   t |  d S )N)r   )r2   Zhide_available_pandasr$   r$   r%   test_as_frame_no_pandas   s    rE   c              
   C   s   t d}t dX}d|_d|_d}tjt|d | dd W d    n1 sR0    Y  W d    n1 sp0    Y  W d    n1 s0    Y  d S )Nzos.path.existszjoblib.loadT)r"   r#   zThe cached dataset located in)matchr7   )r   Zreturn_valuer@   Zraises
ValueError)r2   Zmock_is_existZ	mock_loaderr_msgr$   r$   r%   test_outdated_pickle   s    

rI   )__doc__	functoolsr   Zunittest.mockr   numpyr   r@   Zscipy.sparsesparser/   Z"sklearn.datasets.tests.test_commonr   r   r   Zsklearn.preprocessingr   Zsklearn.utils._testingr	   r&   r(   r4   r6   rD   rE   rI   r$   r$   r$   r%   <module>   s    
