a
    hU                     @  s   d dl mZ d dlZd dlZd dlZd dlmZ d dlZd dl	Z	d dl
mZ d dlmZ ertd dl	mZ d dlmZ eeZG dd	 d	eZdS )
    )annotationsN)TYPE_CHECKING)SentenceEvaluator)pytorch_cos_sim)Tensor)SentenceTransformerc                
      sh   e Zd ZdZddddd	d
dddd fddZdddd	d	ddddZddddddZdd Z  ZS )TranslationEvaluatora	  
    Given two sets of sentences in different languages, e.g. (en_1, en_2, en_3...) and (fr_1, fr_2, fr_3, ...),
    and assuming that fr_i is the translation of en_i.
    Checks if vec(en_i) has the highest similarity to vec(fr_i). Computes the accuracy in both directions

    The labels need to indicate the similarity between the sentences.

    Args:
        source_sentences (List[str]): List of sentences in the source language.
        target_sentences (List[str]): List of sentences in the target language.
        show_progress_bar (bool): Whether to show a progress bar when computing embeddings. Defaults to False.
        batch_size (int): The batch size to compute sentence embeddings. Defaults to 16.
        name (str): The name of the evaluator. Defaults to an empty string.
        print_wrong_matches (bool): Whether to print incorrect matches. Defaults to False.
        write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
        truncate_dim (int, optional): The dimension to truncate sentence embeddings to. If None, the model's
            current truncation dimension will be used. Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import TranslationEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')

            # Load a parallel sentences dataset
            dataset = load_dataset("sentence-transformers/parallel-sentences-news-commentary", "en-nl", split="train[:1000]")

            # Initialize the TranslationEvaluator using the same texts from two languages
            translation_evaluator = TranslationEvaluator(
                source_sentences=dataset["english"],
                target_sentences=dataset["non_english"],
                name="news-commentary-en-nl",
            )
            results = translation_evaluator(model)
            '''
            Evaluating translation matching Accuracy of the model on the news-commentary-en-nl dataset:
            Accuracy src2trg: 90.80
            Accuracy trg2src: 90.40
            '''
            print(translation_evaluator.primary_metric)
            # => "news-commentary-en-nl_mean_accuracy"
            print(results[translation_evaluator.primary_metric])
            # => 0.906
    F    TNz	list[str]boolintstrz
int | None)source_sentencestarget_sentencesshow_progress_bar
batch_sizenameprint_wrong_matches	write_csvtruncate_dimc	           	        s   t    || _|| _|| _|| _|| _|| _|| _t	| jt	| jksLJ |rXd| }d| d | _
g d| _|| _d| _d S )N_Ztranslation_evaluationz_results.csv)epochstepsZsrc2trgZtrg2srcmean_accuracy)super__init__r   r   r   r   r   r   r   lencsv_filecsv_headersr   Zprimary_metric)	selfr   r   r   r   r   r   r   r   	__class__ q/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/evaluation/TranslationEvaluator.pyr   H   s    

zTranslationEvaluator.__init__r   z
str | Nonezdict[str, float])modeloutput_pathr   r   returnc                 C  s  |dkr0|dkrd| }q4d| d| d}nd}| j d urP|d| j  d7 }td	| j d
| d t| || j}t| || j}t	||
   }d}	d}
tt|D ]}t|| }||kr|	d7 }	q| jrtd|d|d| td| j|  td| j| d|| | dd td| j| d|| | dd t|| }t|dd dd}|d d D ](\}}td|d|dd| j|  qzq|j}tt|D ]&}t|| }||kr|
d7 }
q|	t| }|
t| }td|d d td|d d |d ur| jrtj|| j}tj|}t|d|rbd nd!d"d#>}t|}|s|| j  |||||g W d    n1 s0    Y  |||| d$ d%}| !|| j}| "|||| |S )&Nr$   z after epoch z
 in epoch z after z stepsr
   z (truncated to )z=Evaluating translation matching Accuracy of the model on the z dataset:r      z
Incorrect  : Sourcezis most similar to targetzinstead of targetzSource     :zPred Target:z(Score: z.4fzTrue Target:c                 S  s   | d S )Nr*   r"   )xr"   r"   r#   <lambda>       z/TranslationEvaluator.__call__.<locals>.<lambda>T)keyreverse   	zAccuracy src2trg: d   z.2fzAccuracy trg2src: awzutf-8)newlinemodeencoding   )Zsrc2trg_accuracyZtrg2src_accuracyr   )#r   loggerinfor   torchstackembed_inputsr   r   r   detachcpunumpyranger   npZargmaxr   print	enumeratesortedTr   ospathjoinr   isfileopencsvwriterwriterowr   Zprefix_name_to_metricsZ store_metrics_in_model_card_data)r   r%   r&   r   r   Zout_txtZembeddings1Zembeddings2Zcos_simsZcorrect_src2trgZcorrect_trg2srciZmax_idxresultsidxZscoreZacc_src2trgZacc_trg2srcZcsv_pathZoutput_file_existsfrM   Zmetricsr"   r"   r#   __call__f   sd    

$$$

2
zTranslationEvaluator.__call__zstr | list[str] | np.ndarrayzlist[Tensor])r%   	sentencesr'   c                 K  s"   |j |f| j| jd| jd|S )NF)r   r   Zconvert_to_numpyr   )encoder   r   r   )r   r%   rT   kwargsr"   r"   r#   r=      s    z!TranslationEvaluator.embed_inputsc                 C  s   i }| j d ur| j |d< |S )Nr   )r   )r   Zconfig_dictr"   r"   r#   get_config_dict   s    

z$TranslationEvaluator.get_config_dict)Fr	   r
   FTN)Nr$   r$   )	__name__
__module____qualname____doc__r   rS   r=   rW   __classcell__r"   r"   r    r#   r      s   5      " Fr   )
__future__r   rL   loggingrG   typingr   r@   rB   r;   Z2sentence_transformers.evaluation.SentenceEvaluatorr   Zsentence_transformers.utilr   r   Z)sentence_transformers.SentenceTransformerr   	getLoggerrX   r9   r   r"   r"   r"   r#   <module>   s   
