a
    hY                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlZd dlZd d	lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d d
l&m'Z'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 G dd dZ2dd Z3ej4dddd Z5ej4dddd Z6ej4dd Z7ej4dd Z8ej9:ddee2gdd Z;dd  Z<d!d" Z=d#d$ Z>d%d& Z?ej9:d'd(gd(d)gfd*d+ Z@ej9:d,d-d.d/g d0fd1d2d3g d4fd5d6d7d8d9gfgd:d; ZAd<d= ZBej9:d>d?i d@dAgfdBi d@gfdCdDdEidFdGgfgdHdI ZCdJdK ZDdLdM ZEdNdO ZFdPdQ ZGej9:dRedSdTdUdVdWgfe%dXdYdZdVg fe!d[d\dZdVdWgfe"d]d]dZdVd^d_gfed`daddVg fedbdcdAdVg feedddedfdgdAdVg fgdhdi ZHej9:djeejIeJfeejIejIfeejIeJfe!ejIeJfe"ejIejIfe%ejIeJfgdkdl ZKdmdn ZLdodp ZMdqdr ZNdsdt ZOdudv ZPdwdx ZQdydz ZRd{d| ZSd}d~ ZTdd ZUdS )    N)partial)	resources)Path)dumpsloadsMock)	HTTPError)urlparse)clear_data_home
fetch_fileget_data_homeload_breast_cancerload_diabetesload_digits
load_files	load_irisload_linnerudload_sample_imageload_sample_images	load_wine)RemoteFileMetadata$_derive_folder_and_filename_from_url_fetch_remoteload_csv_dataload_gzip_compressed_csv_datacheck_as_frame)scale)Bunchc                   @   s    e Zd ZdZdd Zdd ZdS )
_DummyPathz8Minimal class that implements the os.PathLike interface.c                 C   s
   || _ d S Npath)selfr#    r%   \/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/datasets/tests/test_base.py__init__0   s    z_DummyPath.__init__c                 C   s   | j S r!   r"   )r$   r%   r%   r&   
__fspath__3   s    z_DummyPath.__fspath__N)__name__
__module____qualname____doc__r'   r(   r%   r%   r%   r&   r    -   s   r    c                 C   s   t j| rt|  d S r!   )osr#   isdirshutilrmtreer"   r%   r%   r&   _remove_dir7   s    r1   module)scopec                 c   s    t | d}|V  t| d S )NZscikit_learn_data_home_teststrmktempr1   Ztmpdir_factoryZtmp_filer%   r%   r&   	data_home<   s    r8   c                 c   s    t | d}|V  t| d S )NZscikit_learn_load_files_testr4   r7   r%   r%   r&   load_files_rootC   s    r9   c                 c   sB   t j| d}t j|dd}|d |  t|V  t| d S )NdirF)r;   delete   Hello World!
)tempfilemkdtempNamedTemporaryFilewritecloser5   r1   )r9   Ztest_category_dir1Zsample_filer%   r%   r&   test_category_dir_1J   s    

rC   c                 c   s"   t j| d}t|V  t| d S )Nr:   )r>   r?   r5   r1   )r9   Ztest_category_dir2r%   r%   r&   test_category_dir_2T   s    
rD   path_containerc                 C   s~   | d ur| |}t |d}||ks&J tj|s6J | d urF| |}t|d tj|r`J t |d}tj|szJ d S )N)r8   )r   r-   r#   existsr   )rE   r8   r%   r%   r&   test_data_home[   s    


rG   c                 C   s>   t | }t|jdksJ t|jdks,J |jd u s:J d S )Nr   )r   len	filenamestarget_namesDESCR)r9   resr%   r%   r&   test_default_empty_load_fileso   s    rM   c                 C   sN   t |}t|jdksJ t|jdks,J |jd u s:J |jdgksJJ d S )N      r=   )r   rH   rI   rJ   rK   datarC   rD   r9   rL   r%   r%   r&   test_default_load_filesv   s
    rR   c                 C   sp   t j| t j }t|d|gdd}t|jdks<J t|j	dksNJ |j
dks\J |jdgkslJ d S )Ntestutf-8)description
categoriesencodingrN   zHello World!
)r-   r#   abspathsplitseppopr   rH   rI   rJ   rK   rP   )rC   rD   r9   categoryrL   r%   r%   r&   .test_load_files_w_categories_desc_and_encoding~   s    
r]   c                 C   sT   t |dd}t|jdksJ t|jdks0J |jd u s>J |dd u sPJ d S )NF)Zload_contentrN   rO   rP   )r   rH   rI   rJ   rK   getrQ   r%   r%   r&   test_load_files_wo_load_content   s
    r_   allowed_extensionsz.txtz.jsonc                    sn   | d    d}fdd|D }|D ]}|d q*t|  d}t fdd|D t|jksjJ dS )	z;Check the behaviour of `allowed_extension` in `load_files`.sub)z	file1.txtz
file2.jsonz
file3.jsonzfile4.mdc                    s   g | ]} | qS r%   r%   .0f)dr%   r&   
<listcomp>       z6test_load_files_allowed_extensions.<locals>.<listcomp>s   hellor`   c                    s   g | ]}|j  v rt|qS r%   )suffixr5   )rc   prh   r%   r&   rf      rg   N)mkdirwrite_bytesr   setrI   )Ztmp_pathr`   filespathsrj   rL   r%   )r`   re   r&   "test_load_files_allowed_extensions   s    rp   zHfilename, expected_n_samples, expected_n_features, expected_target_nameszwine_data.csv      )Zclass_0Zclass_1Zclass_2iris.csv      )ZsetosaZ
versicolorZ	virginicazbreast_cancer.csv9     Z	malignantZbenignc                 C   sV   t | \}}}|jd |ks J |jd |ks2J |jd |ksDJ tj|| d S )Nr   rN   )r   shapenptestingassert_array_equal)filenameZexpected_n_samplesZexpected_n_featuresZexpected_target_namesactual_dataZactual_targetZactual_target_namesr%   r%   r&   test_load_csv_data   s
    r~   c                  C   s   d} d}t | d}t | |d}t|dks.J t|dks>J tj|d |d  tj|d |d  tj|d	 |d	  |d
 dsJ d S )Nrs   ziris.rstdata_file_namer   descr_file_nameru      r   rN   rO   z.. _iris_dataset:)r   rH   ry   rz   r{   
startswith)r   r   Zres_without_descrZres_with_descrr%   r%   r&   test_load_csv_data_with_descr   s    
r   z filename, kwargs, expected_shapezdiabetes_data_raw.csv.gz  
   diabetes_target.csv.gzzdigits.csv.gz	delimiter,  A   c                 C   s&   t | fi |}|jt|ks"J d S r!   )r   rx   tuple)r|   kwargsZexpected_shaper}   r%   r%   r&   "test_load_gzip_compressed_csv_data   s    	r   c                  C   sB   d} d}t | d}t | |d\}}tj|| |ds>J d S )Nr   zdiabetes.rstr   r   z.. _diabetes_dataset:)r   ry   rz   r{   r   )r   r   Zexpected_datar}   descrr%   r%   r&   -test_load_gzip_compressed_csv_data_with_descr   s    

r   c                  C   s   zt  } t| jdksJ t| jdks,J | j}t|d ddd d f tjg dtjdksfJ t|d ddd d f tjg dtjdksJ | jsJ W n t	y   t
d Y n0 d S )NrO   r   )         )dtyperN   )rO      rr   3Could not load sample images, PIL is not available.)r   rH   imagesrI   ry   allarrayuint8rK   ImportErrorwarningswarn)rL   r   r%   r%   r&   test_load_sample_images   s    44r   c                  C   sJ   z(t d} | jdksJ | jdks&J W n tyD   td Y n0 d S )Nz	china.jpgr   )i  i  r   r   )r   r   rx   r   r   r   )chinar%   r%   r&   test_load_sample_image   s    r   c                  C   sn   t dd} | jjdksJ | jjs*J dt| jdks<J | jsFJ t  }tj	j
t| jd |jdd d	S )
zTest to check that we load a scaled version by default but that we can
    get an unscaled version when setting `scaled=False`.F)Zscaledr   r   r   r   gT5@g-C6?)ZatolN)r   rP   rx   targetsizerH   feature_namesrK   ry   rz   Zassert_allcloser   )Zdiabetes_rawZdiabetes_defaultr%   r%   r&   test_load_diabetes_raw   s    

r   zEloader_func, data_shape, target_shape, n_target, has_descr, filenames)rv   rw   )rv   rO   Tr|   )rq   rr   )rq   r   )rt   ru   )rt   )   r   Zdata_filenameZtarget_filenamer   )r   )r   @   )r   	   )Zn_class)Q  r   )r   c                    s   |   t  tsJ  jj|ks$J  jj|ks4J t drTt j|d ksTJ |d urnt j|ksnJ |r| j	s|J |rd v sJ t
 fdd|D sJ d S )Nr   rN   data_modulec                    s.   g | ]&}| v o(t  d   |   qS )r   )r   rn   is_filerb   bunchr%   r&   rf   /  s   ztest_loader.<locals>.<listcomp>)
isinstancer   rP   rx   r   hasattrrH   r   rJ   rK   r   )loader_funcZ
data_shapeZtarget_shapeZn_targetZ	has_descrrI   r%   r   r&   test_loader  s"    


r   z%loader_func, data_dtype, target_dtypec                 C   s   |  }t || ||d d S )N)Zexpected_data_dtypeZexpected_target_dtyper   )r   Z
data_dtypeZtarget_dtypeZdefault_resultr%   r%   r&   test_toy_dataset_frame_dtype7  s    r   c                  C   s2   t dd} tt| }d|_|d |jks.J d S )Nx)r   y)r   r   r   r   r   Zbunch_from_pklr%   r%   r&   test_loads_dumps_bunchL  s    
r   c                  C   sf   t dd} d| jd< tt| }|jdks.J |d dks>J d|_|jdksRJ |d dksbJ d S )Noriginal)keyzset from __dict__r   changed)r   __dict__r   r   r   r   r%   r%   r&   8test_bunch_pickle_generated_with_0_16_and_read_with_0_17S  s    

r   c                  C   s   t  } dt| v sJ d S )NrP   )r   r;   )rP   r%   r%   r&   test_bunch_dirh  s    r   c                  C   s   d} t jt| d ddlm} W d   n1 s40    Y  d} t jt| d ddlm} W d   n1 sr0    Y  dS )zLCheck that we raise the ethical warning when trying to import `load_boston`.z8The Boston housing prices dataset has an ethical problemmatchr   )load_bostonNzBcannot import name 'non_existing_function' from 'sklearn.datasets')non_existing_function)pytestraisesr   sklearn.datasetsr   r   )msgr   r   r%   r%   r&   test_load_boston_errorn  s    *r   c              	   C   s   d}t d|d}tt|dddt dd}| d| tjtd	d
}tj	tdd
 t
|ddd W d   n1 sz0    Y  |jdksJ |D ]}t|jd| ksJ qt|dksJ W d   n1 s0    Y  dS )z'Check retry mechanism in _fetch_remote.z8https://scikit-learn.org/this_file_does_not_exist.tar.gzZinvalid_fileN  	Not Found)urlcoder   hdrsfpZside_effect"sklearn.datasets._base.urlretrievezRetry downloadingr   zHTTP Error 404r   r   )Z	n_retriesdelayru   zRetry downloading from url: )r   r   r	   ioBytesIOsetattrr   warnsUserWarningr   r   
call_countr5   messagerH   )monkeypatchr   Zinvalid_remote_fileurlretrieve_mockrecordrr%   r%   r&   1test_fetch_remote_raise_warnings_with_invalid_urlz  s    ,r   c                  C   s   t d\} }| dksJ |dks$J t d\} }| dks<J |dksHJ t d\} }| dks`J |dkslJ t d\} }| dksJ |d	ksJ t d
\} }| dksJ |d	ksJ t d\} }| dksJ |dksJ t d\} }| dksJ |dksJ t d\} }| dksJ |dks$J t d\} }| dks>J |dksLJ t d\} }| dksfJ |dkstJ t d\} }| dksJ |dksJ t d\} }| dksJ |d	ksJ tjtdd t d W d    n1 s0    Y  d S )Nzhttps://example.com/file.tar.gzexample.comzfile.tar.gzu2   https://example.com/نمونه نماینده.datau   نمونه-نماینده.dataz)https://example.com/path/to-/.file.tar.gzzexample.com/path_tozhttps://example.com/Zdownloaded_filezhttps://example.comz2https://example.com/path/@to/data.json?param=valuez	data.jsonz4https://example.com/path/@@to._/-_.data.json.#anchorz"https://example.com//some_file.txtzsome_file.txtzhttp://example/../some_file.txtZexamplez'https://example.com/!.'.,/some_file.txtz+https://example.com/a/!.'.,/b/some_file.txtzexample.com/a_bzhttps://example.com/!.'.,zInvalid URLr   z
https:/../)r   r   r   
ValueError)folderr|   r%   r%   r&   (test_derive_folder_and_filename_from_url  sp    r   c                    s    fdd}t |dS )Nc                    sH   t  }t| jd}||  s4t| ddd d t|| | d S )N/r   r   )r   r
   r#   striprF   r	   r/   copy)r   Z
local_pathZserver_root	file_pathserver_sider%   r&   _urlretrieve_mock  s
    z,_mock_urlretrieve.<locals>._urlretrieve_mockr   r   )r   r   r%   r   r&   _mock_urlretrieve  s    r   c              	   C   s  t |}|d }|  |d }d}|j|dd |d }|  |d }|jddd |d	 }|  t|}| d
| | dt|d td}	|	|d d ksJ |	jdd|ksJ td}	|	|d d d ksJ |	jdd|dksJ t	d}
t
tJ t
j|
d tddd W d    n1 s>0    Y  W d    n1 s^0    Y  |d d }t| |d gksJ d S )Nr   
data.jsonl{"a": 1, "b": 2}
rT   rW   Z	subfolderzother_file.txtzSome important text data.r8   r   z$sklearn.datasets._base.get_data_home)Zreturn_valuehttps://example.com/data.jsonlr   z,https://example.com/subfolder/other_file.txtzERetry downloading from url: https://example.com/subfolder/invalid.txtr   z)https://example.com/subfolder/invalid.txtr   )r   )r   rk   
write_textr   r   r   r   	read_textreescaper   r   r	   r   sortediterdir)r   tmpdirr   	data_fileserver_dataZserver_subfolderZother_data_filer8   r   fetched_file_pathexpected_warning_msgZlocal_subfolderr%   r%   r&   test_fetch_file_using_data_home  sR    

Fr   c                 C   s  | d}t|d }d}|j|dd | d}t|}| d| td|d	}||d ksbJ |jdd|ksvJ |jd
ksJ td|d	}||d ksJ |jdd|ksJ |jd
ksJ |  td|d	}||d ksJ |jdd|ksJ |jdks
J d S )Nr   r   r   rT   r   client_sider   r   r   rN   rO   )	rk   r   r   r   r   r   r   r   unlink)r   r   r   r   r   r   r   r   r%   r%   r&   test_fetch_file_without_sha256  s:    

r   c              	   C   s  | d}t|d }d}|j|dd t|  }| d}t|}| d| t	d||d	}||d ksvJ |j
dd|ksJ |jd
ksJ t	d||d	}||d ksJ |j
dd|ksJ |jd
ksJ |jddd d| d}	tj|	dV t	d||d	}||d ks J |j
dd|ks6J |jdksFJ W d    n1 s\0    Y  t	d||d	}||d ksJ |j
dd|ksJ |jdksJ |  t	d||d	}||d ksJ |j
dd|ksJ |jdksJ t	d|d}||d ksJ |j
dd|ks.J |jdks>J d}
d}td| d|
 d}tjt|dL tj|d t	d||
d	 W d    n1 s0    Y  W d    n1 s0    Y  d S )Nr   r   r   rT   r   r   r   r   )r   sha256rN   zcorrupted contentszQSHA256 checksum of existing local file data.jsonl \(.*\) differs from expected \(z9\): re-downloading from https://example.com/data.jsonl \.r   rO   r   r   Zdeadbabecafebeefzdiffers from expectedz#The SHA256 checksum of data.jsonl (z) differs from expected (z).)rk   r   r   hashlibr   
read_bytes	hexdigestr   r   r   r   r   r   r   r   r   r   r   OSError)r   r   r   r   r   Zexpected_sha256r   r   r   Zexpected_msgZnon_matching_sha256r   Zexpected_error_msgr%   r%   r&   test_fetch_file_with_sha256?  s    

0r  )Vr   r   r-   r   r/   r>   r   	functoolsr   	importlibr   pathlibr   pickler   r   Zunittest.mockr   urllib.errorr	   urllib.parser
   numpyry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zsklearn.datasets._baser   r   r   r   r   Z"sklearn.datasets.tests.test_commonr   Zsklearn.preprocessingr   Zsklearn.utilsr   r    r1   Zfixturer8   r9   rC   rD   markZparametrizerG   rM   rR   r]   r_   rp   r~   r   r   r   r   r   r   r   Zfloat64intr   r   r   r   r   r   r   r   r   r   r  r%   r%   r%   r&   <module>   s   8





	







	






G4(