a
    hK                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlZddlZddlZddlZddlmZ ddlmZ dd	lmZmZmZmZ dd
lmZ ddlmZ ddl m!Z!m"Z"m#Z# dZ$dZ%G dd dZ&eeddZdd Z'ej()ddddidddfdddddddfdddidddfdddddddfd dd id!d"dfd d#ddd!d"dfd$dd$id%d&d'fd(dd(id)d*dfd(d+d,id)d*dfd-dd-id"d.dfd-d+d/id"d.dfd0dd0id1d%dfgej()d2d3d4gej()d5dd6gd7d8 Z*ej()ddddidddfdddddddfdddidddfdddddddfd dd id!d"dfd d#ddd!d"dfd$dd$id%d&d'fd(dd(id)d*dfd(d+d,id)d*dfd-dd-id"d.dfd-d+d/id"d.dfgej()d2d3d4gd9d: Z+ej()dg d;d<d= Z,ej()d2d3d4gd>d? Z-ej()d2d3d4gd@dA Z.ej()d2d3d4gej()dBdCdCdDggdEdF Z/ej()dg dGej()d2d3d4gdHdI Z0ej()dg dJej()d2d3d4gdKdL Z1dMdN Z2ej3dOdPdQdR Z4ej3dOdPdSdT Z5ej()dUg dVej()d5dd6gdWdX Z6ej()dYd2dZid[fd\dZid]fgd^d_ Z7ej()d`ddadbdadadbd6d4dbd6dadbgdcdd Z8ej(9deej()dYd2d4idffd\didgfd4ddhdgfgdidj Z:ej(9deej()dkdldmgdndo Z;dpdq Z<ej()d5dd6gdrds Z=ej()d5dd6gdtdu Z>ej()d5dd6gej()d2d3d4gdvdw Z?ej()d5dd6gej()dxddyiddzdd{gd|d} Z@ej()d5dd6gej()d~dyd+dzieAdfddddgdeAdfd0d0d6deAdfdddddeAdfdddd6deAdfddddeBdfddddgdeBdfgej()d2d3d4gdd ZCej()ddddd{eAdfdddeAdfdddd{eAdfi eAdfgdd ZDej()d5dd6gdd ZEej()d5dd6gdd ZFej()d5dd6gdd ZGdd ZHej()d5dd6gdd ZIej()ddd6gdd ZJdd ZKdd ZLej()d5dd6gdd ZMej()dg ddd ZNdd ZOej()d5dd6gej()d2ddd ZPdd ZQdd ZRdd ZSdS )zTest the openml loader.    N)partial)	resources)BytesIO)	HTTPError)config_context)fetch_openml)_OPENML_PREFIX_get_local_path_open_openml_url_retry_with_clean_cache)Bunch)check_pandas_support)SkipTestassert_allcloseassert_array_equalz"sklearn.datasets.tests.data.openmlTc                   @   sF   e Zd Zdd ZdddZdd Zdd	 Zd
d Zdd Zdd Z	dS )_MockHTTPResponsec                 C   s   || _ || _d S N)datais_gzip)selfr   r    r   ^/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/datasets/tests/test_openml.py__init__'   s    z_MockHTTPResponse.__init__c                 C   s   | j |S r   )r   read)r   amtr   r   r   r   +   s    z_MockHTTPResponse.readc                 C   s   | j   d S r   )r   closer   r   r   r   r   .   s    z_MockHTTPResponse.closec                 C   s   | j rddiS i S )NzContent-Encodinggzipr   r   r   r   r   info1   s    z_MockHTTPResponse.infoc                 C   s
   t | jS r   )iterr   r   r   r   r   __iter__6   s    z_MockHTTPResponse.__iter__c                 C   s   | S r   r   r   r   r   r   	__enter__9   s    z_MockHTTPResponse.__enter__c                 C   s   dS )NFr   )r   exc_typeexc_valexc_tbr   r   r   __exit__<   s    z_MockHTTPResponse.__exit__N)r   )
__name__
__module____qualname__r   r   r   r    r"   r#   r'   r   r   r   r   r   &   s   
r   )	data_homec                    s   d
ddddt j	td d|  fdd	  	fd
d
fddfddfdd 	fdd
fdd}tr| tjjd| d S )Nz(https://api.openml.org/api/v1/json/data/z1https://api.openml.org/api/v1/json/data/features/zhttps://api.openml.org/data/v1/z-https://api.openml.org/api/v1/json/data/list/z.gz.id_c                    s~   t dd| tdd  |   }|dddddd	d
dddddddddddddddS )Nz\W-zhttps://api.openml.org/z-json-data-listz-jdlz-json-data-featuresz-jdfz-json-data-qualitiesz-jdqz
-json-dataz-jdz
-data_namez-dnz	-downloadz-dlz-limitz-lz-data_versionz-dvz-statusz-sz-deactivatedz-dactz-activez-act)resublenreplace)urlsuffixoutput)path_suffixr   r   
_file_nameU   s8    	
z4_monkey_patch_webbased_functions.<locals>._file_namec           	         s   |  |sJ  | |}t| }|dj}|r^r^t| }t|dW  d    S |d}t| }t|dW  d    S W d    n1 s0    Y  d S )NrbTF)
startswithr   filesopenr   r   r   )	r3   has_gzip_headerexpected_prefixr4   data_file_namedata_file_pathffpdecompressed_f)r7   data_modulegzip_responseread_fnr   r   _mock_urlopen_sharedk   s    

z>_monkey_patch_webbased_functions.<locals>._mock_urlopen_sharedc                    s    | |ddS N.jsonr3   r<   r=   r4   r   r3   r<   )rF   url_prefix_data_descriptionr   r   _mock_urlopen_data_descriptionz   s    zH_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_descriptionc                    s    | |ddS rG   r   rJ   )rF   url_prefix_data_featuresr   r   _mock_urlopen_data_features   s    zE_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_featuresc                    s    | |ddS )Nz.arffrI   r   rJ   )rF   url_prefix_download_datar   r   _mock_urlopen_download_data   s    zE_monkey_patch_webbased_functions.<locals>._mock_urlopen_download_datac           	         s  |  sJ  | d}t| }|d2}|d}| d}t|}W d    n1 sh0    Y  d|v rtd ddd t	 d|df}|rt	| }t
|dW  d    S |d}t	| }t
|d	W  d    S W d    n1 s0    Y  d S )
NrH   r8   zutf-8error  Simulated mock errorr3   codemsghdrsrA   TF)r9   r   r:   r;   r   decodejsonloadsr   r   r   )	r3   r<   r>   r?   r@   rB   Z	decoded_sZ	json_datarA   )r7   rC   rE   url_prefix_data_listr   r   _mock_urlopen_data_list   s$    

(
zA_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_listc                    sv   |   }| ddk}|r*||S |r>||S |rR||S |rf ||S td| d S )NzAccept-encodingr   zUnknown mocking URL pattern: %s)get_full_url
get_headerr9   
ValueError)requestargskwargsr3   r<   )rL   rN   r\   rP   rK   rM   r[   rO   r   r   _mock_urlopen   s    







z7_monkey_patch_webbased_functions.<locals>._mock_urlopenurlopen)r   r;   OPENML_TEST_DATA_MODULEtest_offlinesetattrsklearndatasets_openml)contextdata_idrD   rc   r   )r7   rL   rN   r\   rP   rF   rC   rD   r6   rE   rK   rM   r[   rO   r    _monkey_patch_webbased_functionsG   s     rm   z9data_id, dataset_params, n_samples, n_features, n_targets=   rl            iris)nameversion      &   Zanneal1        cpu鍞     H      _  
      rs   zadult-census  M   ZMiceProtein  i  parser	liac-arffpandasrD   Fc           
      C   s
  t d}t| ||d tf dd|d|}	t|	jd |ksDJ t|	tsRJ t|	j|j	sdJ |	jj
||| fks|J t|	j|j	sJ |	jj
||fksJ |dkrt|	j|jsJ |	jj
|fksJ n&t|	j|j	sJ |	jj
||fksJ |	jdu sJ dS )	zCheck the behaviour of `fetch_openml` with `as_frame=True`.

    Fetch by ID and/or name (depending if the file was previously cached).
    r   rD   TFas_framecacher   idrq   N)pytestimportorskiprm   r   intdetails
isinstancer   frame	DataFrameshaper   targetSeries
categories)
monkeypatchrl   dataset_params	n_samples
n_features	n_targetsr   rD   pdbunchr   r   r   test_fetch_openml_as_frame_true   s*    (
r   c                 C   s   t d t| |dd tf dd|d|}t|jd |ksDJ t|tsRJ |jdu s`J t|j	t
jsrJ |j	j||fksJ t|jt
jsJ |dkr|jj|fksJ n|jj||fksJ t|jtsJ dS )	znCheck the behaviour of `fetch_openml` with `as_frame=False`.

    Fetch both by ID and/or name + version.
    r   Tr   Fr   r   Nrq   )r   r   rm   r   r   r   r   r   r   r   npZndarrayr   r   r   dict)r   rl   r   r   r   r   r   r   r   r   r    test_fetch_openml_as_frame_false  s&    $
r   )rn   r   r   c           
         s   t dt| |dd t|dddd}t|dddd}|j|j }  fdd}||}j|  |j|j }j|j	   fd	d
}||}	j|	 dS )z:Check the consistency of the LIAC-ARFF and pandas parsers.r   Tr   Fr   rl   r   r   r   c                    s,    | j  }jj|r$| |jS | S d S r   )rs   apitypesis_numeric_dtypeastypedtypeZseriesZpandas_series)data_pandasr   r   r   convert_numerical_dtypes]  s    
zFtest_fetch_openml_consistency_parser.<locals>.convert_numerical_dtypesc                    sJ    | j  }jj|r$| |jS t|jjrB| j	|jj
S | S d S r   )rs   r   r   r   r   r   r   CategoricalDtypecatZrename_categoriesr   r   )frame_pandasr   r   r   (convert_numerical_and_categorical_dtypesq  s    
zVtest_fetch_openml_consistency_parser.<locals>.convert_numerical_and_categorical_dtypesN)
r   r   rm   r   r   applytestingassert_frame_equalr   feature_names)
r   rl   Z
bunch_liacbunch_pandasZ	data_liacr   Zdata_liac_with_fixed_dtypesZ
frame_liacr   Zframe_liac_with_fixed_dtypesr   )r   r   r   r   $test_fetch_openml_consistency_parserE  s2    


r   c                 C   s\   t d d}t| |dd t|dd|d}t|dd|d}t|j|j t|j|j dS )z^Check the equivalence of the dataset when using `as_frame=False` and
    `as_frame=True`.
    r   rn   Tr   Fr   N)r   r   rm   r   r   r   r   r   )r   r   rl   Zbunch_as_frame_trueZbunch_as_frame_falser   r   r   -test_fetch_openml_equivalence_array_dataframe  s"    
r   c                 C   s|  t d}|jjj}d}d}d}d}|g d}tjgd }	g d}
d	}t| |d
 t|d
d|d}|j	}|j
}|j}t||jsJ t|j|	ksJ |j|ksJ t|j|
ksJ t|j|
ksJ |j|gksJ t||jsJ |j|ksJ |j|ksJ |j|ksJ |jjs,J t||js>J |j|ksNJ t|j|	|g ksjJ |jjsxJ dS )z>Check fetching on a numerical only dataset with string labels.r   rn   ro   rp   )ro   )ro      )zIris-setosazIris-versicolorzIris-virginicarp   )sepallength
sepalwidthpetallength
petalwidthclassTFr   N)r   r   r   r   r   r   Zfloat64rm   r   r   r   r   r   r   alldtypesr   columnsr   Ztarget_namesr   r   rs   indexZ	is_unique)r   r   r   r   rl   Z
data_shapeZtarget_shapeZframe_shapeZtarget_dtypeZdata_dtypesZ
data_namesZtarget_namer   r   r   r   r   r   r   test_fetch_openml_iris_pandas  sJ    

r   target_columnr   r   c                 C   s   t d}d}t| |d t|dd||d}t|dd|d}|j|j|j t|tr|j	|j
j|| |jjdksJ n |j
j|ksJ |jjdksJ d	S )
z@Check that we can force the target to not be the default target.r   rn   TF)rl   r   r   r   r   r   )ro      r   N)r   r   rm   r   r   r   r   r   listZassert_index_equalr   r   Indexr   r   rs   )r   r   r   r   rl   Zbunch_forcing_targetZbunch_defaultr   r   r   !test_fetch_openml_forcing_targets  s0    

r   )rn   ru   rx   r|   r   c                 C   s   t d}t| |dd t|ddd|d}t|ddd|d\}}|j|j| t||jrn|j	|j
| n|j|j
| dS )z>Check the behaviour of `return_X_y=True` when `as_frame=True`.r   Tr   Frl   r   r   
return_X_yr   N)r   r   rm   r   r   r   r   r   r   assert_series_equalr   )r   rl   r   r   r   Xyr   r   r   .test_fetch_openml_equivalence_frame_return_X_y  s(    

r   )rn   rx   r|   r   c                 C   s\   t d t| |dd t|ddd|d}t|ddd|d\}}t|j| t|j| dS )z?Check the behaviour of `return_X_y=True` when `as_frame=False`.r   Tr   Fr   N)r   r   rm   r   r   r   r   )r   rl   r   r   r   r   r   r   r   .test_fetch_openml_equivalence_array_return_X_y  s$    

r   c                 C   sf   t d d}t| |dd d}t||ddd}t||ddd}|jjjdksRJ |jjd	ksbJ d
S )z9Check the difference between liac-arff and pandas parser.r   r   Tr   Fr   r   r@   ON)r   r   rm   r   r   r   kind)r   rl   r   Zbunch_liac_arffr   r   r   r   $test_fetch_openml_difference_parsers(  s$    
r   module)scopec                   C   s0   g dg dg dg dg dg dg ddS )	z+Returns the columns names for each dataset.)r   r   r   r   r   )'familyzproduct-typeZsteelcarbonZhardnesstemper_rolling	conditionformabilityZstrength
non-ageingsurface-finishzsurface-qualityenamelabilitybcbfbtbw%2Fmeblmchromphoscbondmarviexptlferrocorrblue%2Fbright%2Fvarn%2Fcleanlustrejurofmspr   Zthickwidthr1   oilZborepackingr   )vendorZMYCTZMMINZMMAXZCACHZCHMINZCHMAXr   )NZ Mean_Acc1298_Mean_Mem40_CentroidZMean_Acc1298_Mean_Mem40_RolloffZMean_Acc1298_Mean_Mem40_FluxZMean_Acc1298_Mean_Mem40_MFCC_0ZMean_Acc1298_Mean_Mem40_MFCC_1ZMean_Acc1298_Mean_Mem40_MFCC_2ZMean_Acc1298_Mean_Mem40_MFCC_3ZMean_Acc1298_Mean_Mem40_MFCC_4ZMean_Acc1298_Mean_Mem40_MFCC_5ZMean_Acc1298_Mean_Mem40_MFCC_6ZMean_Acc1298_Mean_Mem40_MFCC_7ZMean_Acc1298_Mean_Mem40_MFCC_8ZMean_Acc1298_Mean_Mem40_MFCC_9ZMean_Acc1298_Mean_Mem40_MFCC_10ZMean_Acc1298_Mean_Mem40_MFCC_11ZMean_Acc1298_Mean_Mem40_MFCC_12ZMean_Acc1298_Std_Mem40_CentroidZMean_Acc1298_Std_Mem40_RolloffZMean_Acc1298_Std_Mem40_FluxZMean_Acc1298_Std_Mem40_MFCC_0ZMean_Acc1298_Std_Mem40_MFCC_1ZMean_Acc1298_Std_Mem40_MFCC_2ZMean_Acc1298_Std_Mem40_MFCC_3ZMean_Acc1298_Std_Mem40_MFCC_4ZMean_Acc1298_Std_Mem40_MFCC_5ZMean_Acc1298_Std_Mem40_MFCC_6ZMean_Acc1298_Std_Mem40_MFCC_7ZMean_Acc1298_Std_Mem40_MFCC_8ZMean_Acc1298_Std_Mem40_MFCC_9ZMean_Acc1298_Std_Mem40_MFCC_10ZMean_Acc1298_Std_Mem40_MFCC_11ZMean_Acc1298_Std_Mem40_MFCC_12ZStd_Acc1298_Mean_Mem40_CentroidZStd_Acc1298_Mean_Mem40_RolloffZStd_Acc1298_Mean_Mem40_FluxZStd_Acc1298_Mean_Mem40_MFCC_0ZStd_Acc1298_Mean_Mem40_MFCC_1ZStd_Acc1298_Mean_Mem40_MFCC_2ZStd_Acc1298_Mean_Mem40_MFCC_3ZStd_Acc1298_Mean_Mem40_MFCC_4ZStd_Acc1298_Mean_Mem40_MFCC_5ZStd_Acc1298_Mean_Mem40_MFCC_6ZStd_Acc1298_Mean_Mem40_MFCC_7ZStd_Acc1298_Mean_Mem40_MFCC_8ZStd_Acc1298_Mean_Mem40_MFCC_9ZStd_Acc1298_Mean_Mem40_MFCC_10ZStd_Acc1298_Mean_Mem40_MFCC_11ZStd_Acc1298_Mean_Mem40_MFCC_12ZStd_Acc1298_Std_Mem40_CentroidZStd_Acc1298_Std_Mem40_RolloffZStd_Acc1298_Std_Mem40_FluxZStd_Acc1298_Std_Mem40_MFCC_0ZStd_Acc1298_Std_Mem40_MFCC_1ZStd_Acc1298_Std_Mem40_MFCC_2ZStd_Acc1298_Std_Mem40_MFCC_3ZStd_Acc1298_Std_Mem40_MFCC_4ZStd_Acc1298_Std_Mem40_MFCC_5ZStd_Acc1298_Std_Mem40_MFCC_6ZStd_Acc1298_Std_Mem40_MFCC_7ZStd_Acc1298_Std_Mem40_MFCC_8ZStd_Acc1298_Std_Mem40_MFCC_9ZStd_Acc1298_Std_Mem40_MFCC_10ZStd_Acc1298_Std_Mem40_MFCC_11ZStd_Acc1298_Std_Mem40_MFCC_12ZBH_LowPeakAmpZBH_LowPeakBPMZBH_HighPeakAmpZBH_HighPeakBPMZBH_HighLowRatioZBHSUM1ZBHSUM2ZBHSUM3zamazed.suprisedzhappy.pleasedzrelaxing.calmzquiet.stillz
sad.lonelyzangry.aggresive)ageZ	workclasszfnlwgt:z
education:zeducation-num:zmarital-status:zoccupation:zrelationship:zrace:zsex:zcapital-gain:zcapital-loss:zhours-per-week:znative-country:r   )NZDYRK1A_NZITSN1_NZBDNF_NZNR1_NZNR2A_NZpAKT_NZpBRAF_NZ	pCAMKII_NZpCREB_NZpELK_NZpERK_NZpJNK_NZPKCA_NZpMEK_NZpNR1_NZpNR2A_NZpNR2B_NZpPKCAB_NZpRSK_NZAKT_NZBRAF_NZCAMKII_NZCREB_NZELK_NZERK_NZGSK3B_NZJNK_NZMEK_NZTRKA_NZRSK_NZAPP_NZ
Bcatenin_NZSOD1_NZMTOR_NZP38_NZpMTOR_NZDSCR1_NZAMPKA_NZNR2B_NZpNUMB_NZRAPTOR_NZTIAM1_NZpP70S6_NNUMB_NZP70S6_NZpGSK3B_NZpPKCG_NZCDK5_NZS6_NZADARB1_NZAcetylH3K9_NZRRP1_NZBAX_NZARC_NZERBB4_NZnNOS_NZTau_NZGFAP_NZGluR3_NZGluR4_NZIL1B_NZP3525_NZpCASP9_NZPSD95_NZSNCA_NZUbiquitin_NZpGSK3B_Tyr216_NZSHH_NZBAD_NBCL2_NZpS6_NZpCFOS_NZSYP_NZ	H3AcK18_NZEGR1_NZH3MeK4_NZCaNA_Nr   )ZpclassZsurvivedrs   sexr   ZsibspZparchticketfarecabinembarkedboatbody	home.destrn   ru   rx   r|   r   r   r   r   r   r   r   r   datasets_column_namesG  s    )PP r   c                   C   s`   i ddddddddddddddddddddddddddddi i i dd	id
ddddddddS )Nrv   	   ru   rp   r      )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rz   i  rq   i  i7  i  i4  )r   r   r   r   r   r   r   r   r   r   r   r   r   datasets_missing_values:  sT    r  zJdata_id, parser, expected_n_categories, expected_n_floats, expected_n_ints))rn   r   rq   rp   r   )rn   r   rq   rp   r   )ru   r   !   r   r   )ru   r   r  ru   rp   )rx   r   rq   rz   r   )rx   r   rq   r   rz   )r|   r   r   r~   r   )r|   r   r   E   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   rq   r   r   )r   r   rq   r   r   )r   r   r   r   r   )r   r   r   r   r   c	                    s   t d}	|	jjj t| ||d t|dd|d}
|
j}t fdd|j	D }tdd |j	D }td	d |j	D }||ksJ ||ksJ ||ksJ |j
 || ksJ |   }| D ]$\}}|| |d
}||ksJ qdS )zYCheck that `fetch_openml` infer the right number of categories, integers, and
    floats.r   r   TFr   c                    s   g | ]}t | r|qS r   )r   .0r   r   r   r   
<listcomp>      z5test_fetch_openml_types_inference.<locals>.<listcomp>c                 S   s   g | ]}|j d kr|qS )r@   r   r  r   r   r   r    r	  c                 S   s   g | ]}|j d kr|qS )ir
  r  r   r   r   r    r	  r   N)r   r   r   r   r   rm   r   r   r1   r   r   tolistisnasumto_dictitemsget)r   rl   r   Zexpected_n_categoriesZexpected_n_floatsZexpected_n_intsrD   r   r  r   r   r   Zn_categoriesZn_floatsZn_intsZframe_feature_to_n_nanrs   Z	n_missingZexpected_missingr   r  r   !test_fetch_openml_types_inferencek  s.    (

r  zparams, err_msgunknownz:The 'parser' parameter of fetch_openml must be a str amongr   z<The 'as_frame' parameter of fetch_openml must be an instancec                 C   sT   d}t | |d tjt|d" tf d|i| W d    n1 sF0    Y  d S )Nr   Tmatchrl   )rm   r   raisesr_   r   r   paramserr_msgrl   r   r   r   &test_fetch_openml_validation_parameter  s    r  r  auto)r   r   c                 C   s   d}zt d W nb tyr   t| |d d}tjt|d" tf d|i| W d   n1 sd0    Y  Y n
0 tddS )	z=Check that we raise the proper errors when we require pandas.r   Z!test_fetch_openml_requires_pandasTz:requires pandas to be installed. Alternatively, explicitlyr  rl   Nz.This test requires pandas to not be installed.)r   ImportErrorrm   r   r  r   r   )r   r  rl   r  r   r   r   'test_fetch_openml_requires_pandas_error  s    6r  z2ignore:Version 1 of dataset Australian is inactivez:Sparse ARFF datasets cannot be loaded with parser='pandas'z9Sparse ARFF datasets cannot be loaded with as_frame=True.)r   r   c                 C   s`   t d d}t| |d t jt|d$ tf |dd| W d   n1 sR0    Y  dS )ztCheck that we raise the expected error for sparse ARFF datasets and
    a wrong set of incompatible parameters.
    r   $  Tr  F)rl   r   N)r   r   rm   r  r_   r   r  r   r   r   #test_fetch_openml_sparse_arff_error  s    
r  zdata_id, data_type)rn   	dataframe)r  sparsec                 C   sN   t d}t| |d t|ddd}|dkr2|jntjj}t|j	|sJJ dS )z&Check the auto mode of `fetch_openml`.r   Tr  F)rl   r   r   r   N)
r   r   rm   r   r   scipyr!  Z
csr_matrixr   r   )r   rl   Z	data_typer   r   klassr   r   r   test_fetch_openml_auto_mode  s
    

r$  c              	   C   s   t d d}t| |d d}t jt|dJ tdd  t|ddd	d
 W d   n1 s^0    Y  W d   n1 s|0    Y  dS )z[Check that we raise a warning regarding the working memory when using
    LIAC-ARFF parser.r   r   Tz*Could not adhere to working_memory config.r  gư>)Zworking_memoryFr   r   N)r   r   rm   warnsUserWarningr   r   )r   rl   rV   r   r   r   :test_convert_arff_data_dataframe_warning_low_memory_pandas  s    
r'  c                 C   s`   d}d}t | || td}tjt|d  t|dddd W d   n1 sR0    Y  dS )	z\Check that a warning is raised when multiple versions exist and no version is
    requested.rn   rr   a;  Multiple active versions of the dataset matching the name iris exist. Versions may be fundamentally different, returning version 1. Available versions:
- version 1, status: active
  url: https://www.openml.org/search?type=data&id=61
- version 3, status: active
  url: https://www.openml.org/search?type=data&id=969
r  Fr   )rs   r   r   r   N)rm   r/   escaper   r%  r&  r   )r   rD   rl   Z	data_namerV   r   r   r   ,test_fetch_openml_iris_warn_multiple_version(  s    	r)  c                 C   sT   d}d}d}d}t | || t||dddd}|jj||fksBJ |jdu sPJ dS )z/Check that we can get a dataset without target.rn   Nro   r   Fr   rl   r   r   r   r   )rm   r   r   r   r   )r   rD   rl   r   Zexpected_observationsZexpected_featuresr   r   r   r   test_fetch_openml_no_targetC  s    r+  c                 C   sb   t d d}t| ||d t|dd|d}|jjd }|jd   sNJ t|j	g d d	S )
zRcheck that missing values in categories are compatible with pandas
    categoricalr   iY  r   FTrl   r   r   r   r   )ZFEMALEZMALE_N)
r   r   rm   r   r   r   r  anyr   r   )r   rD   r   rl   ZpenguinsZ	cat_dtyper   r   r   test_missing_values_pandasW  s    
r/  r     glass2)rl   rs   rt   c                 C   s~   d}t | || d}tjt|d& tf dddd|}W d   n1 sN0    Y  |jjdkshJ |jd	 d
kszJ dS )z;Check that we raise a warning when the dataset is inactive.r0  z(Version 1 of dataset glass2 is inactive,r  Fr   )r   r   r   N)   r   r   Z40675)rm   r   r%  r&  r   r   r   r   )r   rD   r   rl   rV   r1  r   r   r   test_fetch_openml_inactivem  s    
$r3  z"data_id, params, err_type, err_msgzNo active dataset glass2 foundr   r   )rl   r   z1Can only handle homogeneous multi-target datasets)rl   r   zOSTRING attributes are not supported for array representation. Try as_frame=Truer   )rl   r   r   zTarget column 'family'Z	undefinedz(Could not find target_column='undefined'c                 C   sp   t | || |dds |dkr*td tj||d$ tf d|d| W d    n1 sb0    Y  d S )Nr   Tr   r  F)r   r   )rm   r  r   r   r  r   )r   rD   rl   r  err_typer  r   r   r   r   test_fetch_openml_error  s
    2
r5  zparams, err_type, err_msgr   rt   zCThe 'version' parameter of fetch_openml must be an int in the rangeZnAmE)rl   rs   zCThe 'data_id' parameter of fetch_openml must be an int in the rangez6The 'version' parameter of fetch_openml must be an intzFNeither name nor data_id are provided. Please provide name or data_id.c                 C   s@   t j||d tf i |  W d    n1 s20    Y  d S )Nr  )r   r  r   )r  r4  r  r   r   r   )test_fetch_openml_raises_illegal_argument  s    r6  c                 C   s^  d}d}d}t | || d}||}tjt|d" t||dddd W d    n1 s\0    Y  d	}||}tjt|d" t||dddd W d    n1 s0    Y  d}||}tjt|d& t||d
gdddd W d    n1 s0    Y  d	}||}tjt|d& t||d
gdddd W d    n1 sP0    Y  d S )Nr   z.target_column='{}' has flag is_row_identifier.z&target_column='{}' has flag is_ignore.ZMouseIDr  Fr   r*  ZGenotyper   )rm   formatr   r%  r&  r   )r   rD   rl   Zexpected_row_id_msgZexpected_ignore_msgZ
target_colrV   r   r   r   test_warn_ignore_attribute  sX    
$
$
$
r8  c                 C   sV   d}t | || d}tjt|d  t|dddd W d    n1 sH0    Y  d S )Nrq   zJOpenML registered a problem with the dataset. It might be unusable. Error:r  Fr   r,  rm   r   r%  r&  r   r   rD   rl   rV   r   r   r   test_dataset_with_openml_error  s
    r;  c                 C   sV   d}t | || d}tjt|d  t|dddd W d    n1 sH0    Y  d S )Nr   zFOpenML raised a warning on the dataset. It might be unusable. Warning:r  Fr   r,  r9  r:  r   r   r    test_dataset_with_openml_warning  s
    r<  c                 C   s   t d d}t| |dd |dddd}tf i |}tf i |dddii}td	d
 |jd jjD srJ tdd
 |jd jjD rJ dS )zACheck that we can overwrite the default parameters of `read_csv`.r   6  Frl   rD   Tr   Zread_csv_kwargsskipinitialspacec                 s   s   | ]}| d V  qdS  Nr9   r  r   r   r   r   	<genexpr>0  s   zFtest_fetch_openml_overwrite_default_params_read_csv.<locals>.<genexpr>r   c                 s   s   | ]}| d V  qdS r@  rB  rC  r   r   r   rD  3  s   N)	r   r   rm   r   r   r   r   r   r.  )r   rl   common_paramsZadult_without_spacesZadult_with_spacesr   r   r   3test_fetch_openml_overwrite_default_params_read_csv  s(    
	rF  c           	      C   st   d}t | || tjjj|}t|d}t||}t	||}t
j|sRJ t||}| | kspJ d S )Nrn   scikit_learn_data)rm   rh   ri   rj   
_DATA_FILEr7  strmkdirr
   r	   ospathisfiler   )	r   rD   tmpdirrl   openml_pathcache_directoryZ	response1locationZ	response2r   r   r   test_open_openml_url_cache=  s    


rR  write_to_diskc                    s   d}t jjj|}t|d}t||  fdd}| t jjd| t	j
tdd t|| W d    n1 sz0    Y  tj rJ d S )Nrn   rG  c                    sD   r8t  d}|d W d    n1 s.0    Y  tdd S )Nw Invalid request)r;   writer_   )r`   ra   rb   r@   rQ  rS  r   r   rc   U  s    (z>test_open_openml_url_unlinks_local_path.<locals>._mock_urlopenrd   rV  r  )rh   ri   rj   rH  r7  rI  rJ  r	   rg   r   r  r_   r
   rK  rL  exists)r   rN  rS  rl   rO  rP  rc   r   rX  r   'test_open_openml_url_unlinks_local_pathN  s    
(rZ  c                    s   d}t jjj|}t| d}t|| t	tj
  t d}|d W d    n1 sh0    Y  t|| fdd}d}tjt|d | }W d    n1 s0    Y  |d	ksJ d S )
Nrn   rG  rT  rU  c                      s   t j rtddS )NzFile exist!rq   )rK  rL  rY  	Exceptionr   rQ  r   r   
_load_datam  s    z/test_retry_with_clean_cache.<locals>._load_dataz!Invalid cache, redownloading filer  rq   )rh   ri   rj   rH  r7  rI  rJ  r	   rK  makedirsrL  dirnamer;   rW  r   r   r%  RuntimeWarning)rN  rl   rO  rP  r@   r]  Zwarn_msgresultr   r\  r   test_retry_with_clean_cachec  s    
($rb  c                 C   sp   d}t jjj|}t| d}t||dd }d}tj	t
|d |  W d    n1 sb0    Y  d S )Nrn   rG  c                   S   s   t d ddd t dd S )NrR   rS   rT   r   r   r   r   r   r   r]    s    z:test_retry_with_clean_cache_http_error.<locals>._load_datarS   r  )rh   ri   rj   rH  r7  rI  rJ  r   r   r  r   )rN  rl   rO  rP  r]  	error_msgr   r   r   &test_retry_with_clean_cache_http_errorz  s    
re  c           
      C   s   dd }d}t |d}t| || t|d|dddd\}}| tjjd	| t|d|dddd\}}	tj	
|| tj	
||	 d S )
Nc                 _   s   t d|   d S )NzhThis mechanism intends to test correct cachehandling. As such, urlopen should never be accessed. URL: %s)r_   r]   r`   ra   rb   r   r   r   _mock_urlopen_raise  s
    z4test_fetch_openml_cache.<locals>._mock_urlopen_raisern   rG  TFr   )rl   r   r+   r   r   r   rd   )rI  rJ  rm   r   rg   rh   ri   rj   r   r   r   )
r   rD   rN  rg  rl   rP  Z	X_fetchedZ	y_fetchedZX_cachedZy_cachedr   r   r   test_fetch_openml_cache  s.    
	
rh  zas_frame, parser))Tr   )Fr   )Tr   )Fr   c                    sX  |s|dkrt d d}t| |d td d|  }d}t|| }|d  |d8}	t|	d}
t|
	 }d	|t
|d
 < W d   n1 s0    Y  t d}|| W d   n1 s0    Y  tjjj fdd}| tjjd| t t$}tjj|d||d W d   n1 s:0    Y  |dsTJ dS )z/Check that the checksum is working as expected.r   ru   Tr,   r-   zdata-v1-dl-1666876.arff.gzztest_invalid_checksum.arffr8   %   rq   Nwbc                    s`   |   }|drTt d}| }W d    n1 s:0    Y  tt|ddS | S d S )Nzdata/v1/download/1666876r8   Tr   )r]   endswithr;   r   r   r   )r`   ra   rb   r3   r@   Zcorrupted_dataZcorrupt_copy_pathZmocked_openml_urlr   r   swap_file_mock  s    
&z9test_fetch_openml_verify_checksum.<locals>.swap_file_mockrd   Fr,  Z1666876)r   r   rm   re   r   r:   r;   r   	bytearrayr   r1   GzipFilerW  rh   ri   rj   rd   rg   r  r_   r   r  )r   r   r   rN  r   rl   Zoriginal_data_moduleZoriginal_data_file_nameZoriginal_data_pathZ	orig_fileZ	orig_gzipr   Zmodified_gziprm  excr   rl  r   !test_fetch_openml_verify_checksum  s,    
.(
	&rq  c              	   C   s   dd }|  tjjd| d}tjttdt	|  dd\}tj
tdd t|d d	d
 W d    n1 sr0    Y  t|dksJ W d    n1 s0    Y  d S )Nc                 _   s   t d ddd t dd S )Ni  Simulated network errorrT   rc  rf  r   r   r   _mock_urlopen_network_error  s    zPtest_open_openml_url_retry_on_network_error.<locals>._mock_urlopen_network_errorrd   zinvalid-urlz+A network error occurred while downloading z. Retrying...r  rr  r   )delayr   )rg   rh   ri   rj   r   r%  r&  r/   r(  r   r  r   r
   r1   )r   rs  Zinvalid_openml_urlrecordr   r   r   +test_open_openml_url_retry_on_network_error  s"    
,rv  )r   r   c                 C   sh   |dkrt d d}t| || tjj|dd|d}|dusBJ |d jdksTJ d|d	 vsdJ dS )
zCheck that we can load the "zoo" dataset.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/14340
    r   >   Fr,  Nr   )e      Zanimalr   )r   r   rm   rh   ri   r   r   )r   rD   r   rl   Zdatasetr   r   r   &test_fetch_openml_with_ignored_feature   s    
rz  c                 C   s  t d}d}t| |dd dd|d}tf ddi|}tf ddi|}|j|j|j |jjd		 rtJ |jj
d		 rJ tf dd
d|}tf dd
d|}|j|jd |jd  |jd jd		 rJ |jd j
d		 rJ dS )zCheck that we strip the single quotes when used as a string delimiter.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/23381
    r   r   Fr>  Tr   r   rl   r   r   'r   )r   r   r   N)r   r   rm   r   r   r   r   rI  r9   r.  rk  r   )r   r   rl   rE  Zmice_pandasZmice_liac_arffr   r   r   test_fetch_openml_strip_quotes  s(    
r}  c                 C   sj   t d}d}t| |dd dd|d}tf ddi|}tf ddi|}|j|jd	 |jd	  d
S )zCheck that we can strip leading whitespace in pandas parser.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25311
    r   r=  Fr>  Tr{  r   r   r   N)r   r   rm   r   r   r   r   r   r   rl   rE  Zadult_pandasZadult_liac_arffr   r   r   $test_fetch_openml_leading_whitespace3  s    
r  c                 C   sb   t d}d}t| |dd dd|d}tf ddi|}tf ddi|}|j|j|j d	S )
zCheck that we can handle escapechar and single/double quotechar.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25478
    r   iZ  Fr>  Tr{  r   r   N)r   r   rm   r   r   r   r   r~  r   r   r   &test_fetch_openml_quotechar_escapecharE  s    
r  )T__doc__r   rY   rK  r/   	functoolsr   	importlibr   ior   urllib.errorr   numpyr   r   Zscipy.sparser"  rh   r   Zsklearn.datasetsr   Zfetch_openml_origZsklearn.datasets._openmlr   r	   r
   r   Zsklearn.utilsr   Z$sklearn.utils._optional_dependenciesr   Zsklearn.utils._testingr   r   r   re   rf   r   rm   markZparametrizer   r   r   r   r   r   r   r   r   Zfixturer   r  r  r  r  filterwarningsr  r$  r'  r)  r+  r/  r3  r_   KeyErrorr5  r6  r8  r;  r<  rF  rR  rZ  rb  re  rh  rq  rv  rz  r}  r  r  r   r   r   r   <module>   s  {+'
?

/

 s

00
	









-




1

 


"	
.