a
    h>                     @   s  d dl Z d dlmZ d dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZmZ d d
lmZ dd Zejdedd Zejdedd Zejdedd Zejdedd Z ejdedd Z!dd Z"ejdddd Z#dd Z$d d! Z%eejdeejddd"d# Z&eejded$d% Z'd&d' Z(ejddejded(d) Z)ejddejded*d+ Z*ejded,d- Z+ejded.d/ Z,ejded0d1 Z-ejded2d3 Z.d4d5 Z/d6d7 Z0ejd8g d9ejded:d; Z1ejded<d= Z2ejd>d?d@dA Z3ejd>d?dBdC Z4dS )D    N)StringIO)assert_array_equal)
block_diag)psi)LatentDirichletAllocation)_dirichlet_expectation_1d_dirichlet_expectation_2d)NotFittedError)assert_allcloseassert_almost_equalassert_array_almost_equal!if_safe_multiprocessing_with_blas)CSR_CONTAINERSc                 C   s6   d}t jd|td}|g| }t| }| |}||fS )N   )r   r   )dtype)npfullintr   )csr_containern_componentsblockblocksX r   g/var/www/html/assistant/venv/lib/python3.9/site-packages/sklearn/decomposition/tests/test_online_lda.py_build_sparse_array   s    
r   r   c                 C   sR   t | \}}d| }t|||dd}t|dd}||}||}t|| d S )Ng      ?r   )r   Zdoc_topic_priorZtopic_word_priorrandom_stater   r   )r   r   fit_transformr   )r   r   r   Zpriorlda_1lda_2Ztopic_distr_1Ztopic_distr_2r   r   r   test_lda_default_prior_params$   s    

r!   c                 C   s|   t jd}t| \}}t|dd|d}|| g d}|jD ]6}t| dd  d d d }t	t
||v s@J q@d S )Nr      batch)r   evaluate_everylearning_methodr   )r   r"      )r         )         r   randomRandomStater   r   fitcomponents_setargsorttuplesortedr   rngr   r   ldacorrect_idx_grps	componenttop_idxr   r   r   test_lda_fit_batch6   s    

r>   c                 C   s~   t jd}t| \}}t|ddd|d}|| g d}|jD ]6}t| dd  d d d }t	t
||v sBJ qBd S )	Nr         $@r"   online)r   learning_offsetr$   r%   r   r&   r-   r.   r/   r8   r   r   r   test_lda_fit_onlineJ   s    

rB   c           	      C   s   t jd}t| \}}t|dd|d}tdD ]}|| q0g d}|jD ]6}t|	 dd  d d d }t
t||v sNJ qNd S )	Nr   r?   d   r   rA   total_samplesr   r   r&   r-   r.   r   r0   r1   r   r   rangeZpartial_fitr3   r4   r5   r6   r7   	r   r9   r   r   r:   ir;   cr=   r   r   r   test_lda_partial_fit_   s    
rK   c                 C   s~   t jd}t| \}}t|d|d}||  g d}|jD ]6}t|	 dd  d d d }t
t||v sBJ qBd S )Nr   r#   r   r%   r   r&   r-   r.   )r   r0   r1   r   r   r2   toarrayr3   r4   r5   r6   r7   r8   r   r   r   test_lda_dense_inputt   s    
rN   c                  C   s   t jd} | jddd}d}t|| d}||}|dk sDJ tt j|dd	t 	|j
d  |j|d
d}t|||jdd	d d t jf   d S )Nr   r)      
   sizer   r   g        r"   ZaxisF)	normalize)r   r0   r1   randintr   r   anyr   sumonesshape	transformnewaxis)r9   r   r   r:   X_transZX_trans_unnormalizedr   r   r   test_lda_transform   s    
 r^   method)r@   r#   c                 C   sL   t jd}|jddd}td| |d}||}||}t||d d S )Nr   rQ   )2   rP   rR   r)   rL   r(   )r   r0   r1   rV   r   r   r[   r   )r_   r9   r   r:   ZX_fitr]   r   r   r   test_lda_fit_transform   s    

ra   c                  C   sR   t dd} t }d}tjt|d ||  W d    n1 sD0    Y  d S )N)r)   rQ         z^Negative values in data passedmatch)r   r   r   pytestraises
ValueErrorr2   )r   r:   regexr   r   r   test_lda_negative_input   s
    ri   c                  C   s`   t jd} | jddd}t }d}tjt|d || W d    n1 sR0    Y  d S )Nr   r(   rO   rR   z}This LatentDirichletAllocation instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.rc   )	r   r0   r1   rV   r   re   rf   r	   
perplexity)r9   r   r:   rh   r   r   r   test_lda_no_component_error   s    rk   c           	      C   s~   t |\}}tjd}t|d| d|d}|| g d}|jD ]6}t| dd  d d d }t	t
||v sBJ qBd S )Nr   r'   r"   )r   n_jobsr%   r$   r   r&   r-   r.   )r   r   r0   r1   r   r2   r3   r4   r5   r6   r7   )	r_   r   r   r   r9   r:   r;   rJ   r=   r   r   r   test_lda_multi_jobs   s    

rm   c           	      C   s   t jd}t| \}}t|ddd|d}tdD ]}|| q2g d}|jD ]6}t|	 dd  d d d }t
t||v sPJ qPd S )	Nr   r'         @   )r   rl   rA   rE   r   r&   r-   r.   rF   rH   r   r   r   test_lda_partial_fit_multi_jobs   s    
rp   c                  C   s   t jd} | dd}| dd}t jjd|dfd}t|dd| d	}|| | jd|d
 |fd}tjtdd |	|| W d    n1 s0    Y  | jd||d
 fd}tjtdd |	|| W d    n1 s0    Y  d S )Nr   r   r*   rQ   r(   rR   rn   rP   rD   r"   zNumber of samplesrc   zNumber of topics)
r   r0   r1   rV   r   r2   re   rf   rg   Z_perplexity_precomp_distr)r9   r   Z	n_samplesr   r:   Zinvalid_n_samplesZinvalid_n_componentsr   r   r   test_lda_preplexity_mismatch   s"    
*rq   c           
      C   s   t |\}}t|d| ddd}t|d| ddd}|| |j|dd}|| |j|dd}||kslJ |j|dd}|j|dd}	||	ksJ d S )	Nr"   rC   r   r   max_iterr%   rE   r   rQ   FZsub_samplingT)r   r   r2   rj   )
r_   r   r   r   r   r    perp_1perp_2Zperp_1_subsamplingZperp_2_subsamplingr   r   r   test_lda_perplexity   s.    

rw   c                 C   sh   t |\}}t|d| ddd}t|d| ddd}|| ||}|| ||}||ksdJ d S )Nr"   rC   r   rr   rQ   )r   r   r   score)r_   r   r   r   r   r    Zscore_1Zscore_2r   r   r   test_lda_score  s(    



ry   c                 C   sN   t | \}}t|ddddd}|| ||}|| }t|| d S )Nr"   r#   rC   r   rr   )r   r   r2   rj   rM   r   )r   r   r   r:   ru   rv   r   r   r   test_perplexity_input_format:  s    

rz   c                 C   sd   t | \}}t|ddd}|| |j|dd}||}td|t|j  }t	|| d S )NrQ   r   )r   rs   r   Frt   rb   )
r   r   r2   rj   rx   r   exprX   datar   )r   r   r   r:   Zperplexity_1rx   Zperplexity_2r   r   r   test_lda_score_perplexityL  s    

r}   c                 C   sF   t | \}}t|ddddd}|| |j}||}t|| d S )Nr"   r#   r   )r   rs   r%   r   r$   )r   r   r2   Zbound_rj   r   )r   r   r   r:   Zperplexity1Zperplexity2r   r   r   test_lda_fit_perplexity[  s    

r~   c                 C   sR   t d}|| |fD ]6}tdd|}t|jjddt |jjd  qdS )z+Test LDA on empty document (all-zero rows).)r)   r(   i  )rs   r   rT   r"   N)	r   Zzerosr   r2   r   r3   rX   rY   rZ   )r   Zr   r:   r   r   r   test_lda_empty_docsr  s    
r   c               	   C   s   t ddd} t | }t| d| t|t t| tt |  dd | dd} tt	| t| tt j| dd	d
d
t j
f  ddd d
S )z9Test Cython version of Dirichlet expectation calculation.irQ   i'  r   gҶOɃ;)atolrC   r"   rT   Ngdy=gA:)>)Zrtolr   )r   ZlogspaceZ
empty_liker   r
   r{   r   rX   Zreshaper   r\   )xZexpectationr   r   r   test_dirichlet_expectation}  s    
&&r   c                 C   s   t |\}}t|dd| |dd}t }tj| }	t_z|| W |	t_n|	t_0 | d}
| d}||
kszJ ||ksJ d S )Nr   r#   r   )r   rs   r%   verboser$   r   
rj   )r   r   r   sysstdoutr2   getvaluecount)r   r$   expected_linesexpected_perplexitiesr   r   r   r:   outZold_outZn_linesZn_perplexityr   r   r   check_verbosity  s$    r   z;verbose,evaluate_every,expected_lines,expected_perplexities))Fr"   r   r   )Fr   r   r   )Tr   r   r   )Tr"   r   r   )Tr'   r   r"   c                 C   s   t | |||| d S )N)r   )r   r$   r   r   r   r   r   r   test_verbosity  s    
r   c                 C   s@   t | \}}t|d|}| }tdd t|D | dS )z6Check feature names out for LatentDirichletAllocation.)r   c                 S   s   g | ]}d | qS )Zlatentdirichletallocationr   ).0rI   r   r   r   
<listcomp>      z.test_lda_feature_names_out.<locals>.<listcomp>N)r   r   r2   Zget_feature_names_outr   rG   )r   r   r   r:   namesr   r   r   test_lda_feature_names_out  s    r   r%   )r#   r@   c                 C   s^   t jd}|jddj|dd}tdd| d}|| |jj|ksJJ |j	j|ksZJ dS )	z2Check data type preservation of fitted attributes.r   rO   rR   F)copyr)   r   r   r%   N)
r   r0   r1   uniformastyper   r2   r3   r   Zexp_dirichlet_component_)r%   Zglobal_dtyper9   r   r:   r   r   r   test_lda_dtype_match  s    
r   c                 C   st   t j|}|jdd}|t j}td|| d|}td|| d|}t|j	|j	 t|
||
| dS )z>Check numerical consistency between np.float32 and np.float64.rO   rR   r)   r   N)r   r0   r1   r   r   Zfloat32r   r2   r
   r3   r[   )r%   Zglobal_random_seedr9   ZX64ZX32Zlda_64Zlda_32r   r   r   test_lda_numerical_consistency  s    r   )5r   ior   numpyr   re   Znumpy.testingr   Zscipy.linalgr   Zscipy.specialr   Zsklearn.decompositionr   Z&sklearn.decomposition._online_lda_fastr   r   Zsklearn.exceptionsr	   Zsklearn.utils._testingr
   r   r   r   Zsklearn.utils.fixesr   r   markZparametrizer!   r>   rB   rK   rN   r^   ra   ri   rk   rm   rp   rq   rw   ry   rz   r}   r~   r   r   r   r   r   r   r   r   r   r   r   <module>   s~   





	







