a
    h5                     @  sr   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ erTd dl	Z
d dlmZ eeZG dd deZdS )    )annotationsN)TYPE_CHECKING)SentenceEvaluator)SentenceTransformerc                	      st   e Zd ZdZd dddd	d
ddd fddZd!ddddddZddddddZed
dddZdd Z	  Z
S )"MSEEvaluatora
  
    Computes the mean squared error (x100) between the computed sentence embedding
    and some target sentence embedding.

    The MSE is computed between ||teacher.encode(source_sentences) - student.encode(target_sentences)||.

    For multilingual knowledge distillation (https://arxiv.org/abs/2004.09813), source_sentences are in English
    and target_sentences are in a different language like German, Chinese, Spanish...

    Args:
        source_sentences (List[str]): Source sentences to embed with the teacher model.
        target_sentences (List[str]): Target sentences to embed with the student model.
        teacher_model (SentenceTransformer, optional): The teacher model to compute the source sentence embeddings.
        show_progress_bar (bool, optional): Show progress bar when computing embeddings. Defaults to False.
        batch_size (int, optional): Batch size to compute sentence embeddings. Defaults to 32.
        name (str, optional): Name of the evaluator. Defaults to "".
        write_csv (bool, optional): Write results to CSV file. Defaults to True.
        truncate_dim (int, optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation
            dimension. Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import MSEEvaluator
            from datasets import load_dataset

            # Load a model
            student_model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
            teacher_model = SentenceTransformer('all-mpnet-base-v2')

            # Load any dataset with some texts
            dataset = load_dataset("sentence-transformers/stsb", split="validation")
            sentences = dataset["sentence1"] + dataset["sentence2"]

            # Given queries, a corpus and a mapping with relevant documents, the MSEEvaluator computes different MSE metrics.
            mse_evaluator = MSEEvaluator(
                source_sentences=sentences,
                target_sentences=sentences,
                teacher_model=teacher_model,
                name="stsb-dev",
            )
            results = mse_evaluator(student_model)
            '''
            MSE evaluation (lower = better) on the stsb-dev dataset:
            MSE (*100):  0.805045
            '''
            print(mse_evaluator.primary_metric)
            # => "stsb-dev_negative_mse"
            print(results[mse_evaluator.primary_metric])
            # => -0.8050452917814255
    NF     Tz	list[str]boolintstrz
int | None)source_sentencestarget_sentencesshow_progress_bar
batch_sizename	write_csvtruncate_dimc	           	        s^   t    || _|| _|| _|| _|| _d| d | _g d| _|| _	d| _
| ||| _d S )NZmse_evaluation_z_results.csv)epochstepsZMSEnegative_mse)super__init__r   r   r   r   r   csv_filecsv_headersr   Zprimary_metricembed_inputssource_embeddings)	selfr   r   Zteacher_modelr   r   r   r   r   	__class__ i/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/evaluation/MSEEvaluator.pyr   H   s    

zMSEEvaluator.__init__r   z
str | Nonezdict[str, float])modeloutput_pathreturnc                 C  sd  |dkr0|dkrd| }q4d| d| d}nd}| j d urP|d| j  d7 }| || j}| j| d	  }|d
 }td| j d| d td|d |d ur8| jr8t	j
|| j}t	j
|}	t|d|	rdnddd<}
t|
}|	s|| j ||||g W d    n1 s.0    Y  d| i}| || j}| |||| |S )Nr!   z after epoch z
 in epoch z after z stepsr   z (truncated to )   d   z'MSE evaluation (lower = better) on the z dataset:zMSE (*100):	Z4fawzutf-8)newlinemodeencodingr   )r   r   r   r   meanloggerinfor   r   ospathjoinr   isfileopencsvwriterwriterowr   Zprefix_name_to_metricsZ store_metrics_in_model_card_data)r   r"   r#   r   r   Zout_txtZtarget_embeddingsZmseZcsv_pathZoutput_file_existsfr7   Zmetricsr   r   r    __call__a   s0    

0
zMSEEvaluator.__call__zstr | list[str] | np.ndarrayz
np.ndarray)r"   	sentencesr$   c                 K  s"   |j |f| j| jd| jd|S )NT)r   r   Zconvert_to_numpyr   )encoder   r   r   )r   r"   r;   kwargsr   r   r    r      s    zMSEEvaluator.embed_inputs)r$   c                 C  s   dS )NzKnowledge Distillationr   )r   r   r   r    description   s    zMSEEvaluator.descriptionc                 C  s   i }| j d ur| j |d< |S )Nr   )r   )r   Zconfig_dictr   r   r    get_config_dict   s    

zMSEEvaluator.get_config_dict)NFr   r   TN)Nr!   r!   )__name__
__module____qualname____doc__r   r:   r   propertyr>   r?   __classcell__r   r   r   r    r      s   9        %r   )
__future__r   r6   loggingr1   typingr   Z2sentence_transformers.evaluation.SentenceEvaluatorr   numpynpZ)sentence_transformers.SentenceTransformerr   	getLoggerr@   r/   r   r   r   r   r    <module>   s   
