a
    h                     @  sz   d dl mZ d dlZd dlZd dlZd dlmZ d dlZd dl	m
Z
 er\d dlZd dlmZ eeZG dd de
ZdS )    )annotationsN)TYPE_CHECKING)SentenceEvaluator)SentenceTransformerc                	      sp   e Zd ZdZddddd	d
ddd fddZdddd	d	ddddZddddddZed
dddZ  Z	S ) MSEEvaluatorFromDataFrameu  
    Computes the mean squared error (x100) between the computed sentence embedding and some target sentence embedding.

    Args:
        dataframe (List[Dict[str, str]]): It must have the following format. Rows contains different, parallel sentences.
            Columns are the respective language codes::

            [{'en': 'My sentence in English', 'es': 'Oración en español', 'fr': 'Phrase en français'...},
             {'en': 'My second sentence', ...}]
        teacher_model (SentenceTransformer): The teacher model used to compute the sentence embeddings.
        combinations (List[Tuple[str, str]]): Must be of the format ``[('en', 'es'), ('en', 'fr'), ...]``.
            First entry in a tuple is the source language. The sentence in the respective language will be fetched from
            the dataframe and passed to the teacher model. Second entry in a tuple the the target language. Sentence
            will be fetched from the dataframe and passed to the student model
        batch_size (int, optional): The batch size to compute sentence embeddings. Defaults to 8.
        name (str, optional): The name of the evaluator. Defaults to "".
        write_csv (bool, optional): Whether to write the results to a CSV file. Defaults to True.
        truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. If None, uses the model's
            current truncation dimension. Defaults to None.
        TNzlist[dict[str, str]]r   zlist[tuple[str, str]]intstrboolz
int | None)	dataframeteacher_modelcombinations
batch_sizename	write_csvtruncate_dimc                   s*  t    || _|| _|| _|r(d| }d| d | _ddg| _d| _|| _|| _	i | _
td t }| jD ]\}	}
g }g }|D ]N}||	  dkr||
  dkr|||	  |||	  |||
  q||f| j
|	|
f< | j|	 d	|
  qnt|}| ||}d
d t||D | _d S )N_Zmse_evaluationz_results.csvepochstepsnegative_msezCompute teacher embeddingsr   -c                 S  s   i | ]\}}||qS  r   ).0sentZembr   r   v/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/evaluation/MSEEvaluatorFromDataFrame.py
<dictcomp>U       z6MSEEvaluatorFromDataFrame.__init__.<locals>.<dictcomp>)super__init__r   r   r   csv_filecsv_headersZprimary_metricr   r   dataloggerinfosetstripaddappendlistembed_inputszipteacher_embeddings)selfr   r   r   r   r   r   r   Zall_source_sentencessrc_langtrg_langsrc_sentencestrg_sentencesrowZall_src_embeddings	__class__r   r   r   *   s6    



 z"MSEEvaluatorFromDataFrame.__init__z
str | Nonezdict[str, float])modeloutput_pathr   r   returnc              
     sx  |   g } jD ]\}} j||f \}}	t fdd|D }
t ||	}|
| d  }|d9 }|| t	d j
 d| d| d t	d	|d
 q|d urB jrBtj| j}tj|}t|d|rdnddd>}t|}|s| j |||g|  W d    n1 s80    Y  dt|  i} | j
} |||| |S )Nc                   s   g | ]} j | qS r   )r,   )r   r   r-   r   r   
<listcomp>`   r   z6MSEEvaluatorFromDataFrame.__call__.<locals>.<listcomp>   d   zMSE evaluation on z dataset - r   :zMSE (*100):	Z4fr   awzutf-8)newlinemodeencodingr   )evalr   r"   npZasarrayr*   meanr(   r#   r$   r   r   ospathjoinr    isfileopencsvwriterwriterowr!   itemZprefix_name_to_metricsZ store_metrics_in_model_card_data)r-   r6   r7   r   r   Z
mse_scoresr.   r/   r0   r1   Zsrc_embeddingsZtrg_embeddingsZmseZcsv_pathZoutput_file_existsfrL   Zmetricsr   r9   r   __call__W   s.    
 
2z"MSEEvaluatorFromDataFrame.__call__zstr | list[str] | np.ndarrayz
np.ndarray)r6   	sentencesr8   c                 K  s   |j |f| jd| jd|S )NT)r   Zconvert_to_numpyr   )encoder   r   )r-   r6   rQ   kwargsr   r   r   r*   z   s    z&MSEEvaluatorFromDataFrame.embed_inputs)r8   c                 C  s   dS )NzKnowledge Distillationr   r9   r   r   r   description   s    z%MSEEvaluatorFromDataFrame.description)r   r   TN)Nr5   r5   )
__name__
__module____qualname____doc__r   rP   r*   propertyrT   __classcell__r   r   r3   r   r      s        . #r   )
__future__r   rK   loggingrF   typingr   numpyrD   Z2sentence_transformers.evaluation.SentenceEvaluatorr   Z)sentence_transformers.SentenceTransformerr   	getLoggerrU   r#   r   r   r   r   r   <module>   s   
