a
    h<                     @  s   d dl mZ d dlZd dlZd dlZd dlmZmZ d dlZ	d dl
mZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ erd d	lmZ eeZG d
d deZdS )    )annotationsN)TYPE_CHECKINGLiteral)average_precision_scorematthews_corrcoef)SentenceEvaluator)InputExample)SimilarityFunction)pairwise_cos_simpairwise_dot_scorepairwise_euclidean_simpairwise_manhattan_sim)SentenceTransformerc                      s   e Zd ZdZd0dddd	d
ddddd	 fddZdddddZeddddZd1ddd
d
ddddZdd d!d"d#Z	dd$d%d&d'd(Z
edd)d*d+Zedd)d,d-Zd.d/ Z  ZS )2BinaryClassificationEvaluatoraV  
    Evaluate a model based on the similarity of the embeddings by calculating the accuracy of identifying similar and
    dissimilar sentences.
    The metrics are the cosine similarity, dot score, Euclidean and Manhattan distance
    The returned score is the accuracy with a specified metric.

    The results are written in a CSV. If a CSV already exists, then values are appended.

    The labels need to be 0 for dissimilar pairs and 1 for similar pairs.

    Args:
        sentences1 (List[str]): The first column of sentences.
        sentences2 (List[str]): The second column of sentences.
        labels (List[int]): labels[i] is the label for the pair (sentences1[i], sentences2[i]). Must be 0 or 1.
        name (str, optional): Name for the output. Defaults to "".
        batch_size (int, optional): Batch size used to compute embeddings. Defaults to 32.
        show_progress_bar (bool, optional): If true, prints a progress bar. Defaults to False.
        write_csv (bool, optional): Write results to a CSV file. Defaults to True.
        truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation dimension. Defaults to None.
        similarity_fn_names (Optional[List[Literal["cosine", "dot", "euclidean", "manhattan"]]], optional): The similarity functions to use. If not specified, defaults to the ``similarity_fn_name`` attribute of the model. Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import BinaryClassificationEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('all-mpnet-base-v2')

            # Load a dataset with two text columns and a class label column (https://huggingface.co/datasets/sentence-transformers/quora-duplicates)
            eval_dataset = load_dataset("sentence-transformers/quora-duplicates", "pair-class", split="train[-1000:]")

            # Initialize the evaluator
            binary_acc_evaluator = BinaryClassificationEvaluator(
                sentences1=eval_dataset["sentence1"],
                sentences2=eval_dataset["sentence2"],
                labels=eval_dataset["label"],
                name="quora_duplicates_dev",
            )
            results = binary_acc_evaluator(model)
            '''
            Binary Accuracy Evaluation of the model on the quora_duplicates_dev dataset:
            Accuracy with Cosine-Similarity:             81.60  (Threshold: 0.8352)
            F1 with Cosine-Similarity:                   75.27  (Threshold: 0.7715)
            Precision with Cosine-Similarity:            65.81
            Recall with Cosine-Similarity:               87.89
            Average Precision with Cosine-Similarity:    76.03
            Matthews Correlation with Cosine-Similarity: 62.48
            '''
            print(binary_acc_evaluator.primary_metric)
            # => "quora_duplicates_dev_cosine_ap"
            print(results[binary_acc_evaluator.primary_metric])
            # => 0.760277070888393
         FTNz	list[str]z	list[int]strintboolz
int | Nonez?list[Literal['cosine', 'dot', 'euclidean', 'manhattan']] | None)	
sentences1
sentences2labelsname
batch_sizeshow_progress_bar	write_csvtruncate_dimsimilarity_fn_namesc
                   s   t    || _|| _|| _|| _|	p(g | _t| jt| jksDJ t| jt| jks\J |D ]}
|
dks`|
dks`J q`|| _|| _	|| _
|d u rt tjkpt tjk}|| _d|rd| nd d | _ddg| _| | j d S )	Nr      Z binary_classification_evaluation_r   z_results.csvepochsteps)super__init__r   r   r   r   r   lenr   r   r   loggergetEffectiveLevelloggingINFODEBUGr   csv_filecsv_headers_append_csv_headers)selfr   r   r   r   r   r   r   r   r   label	__class__ z/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/evaluation/BinaryClassificationEvaluator.pyr#   U   s(    


z&BinaryClassificationEvaluator.__init__None)r   returnc                 C  s6   g d}|D ]$}|D ]}| j | d|  qqd S )N)accuracyaccuracy_thresholdf1	precisionrecallf1_thresholdapmccr   )r+   append)r-   r   metricsvmr1   r1   r2   r,   {   s    z1BinaryClassificationEvaluator._append_csv_headerszlist[InputExample])examplesc                 K  sV   g }g }g }|D ]0}| |jd  | |jd  | |j q| |||fi |S )Nr   r   )r=   Ztextsr.   )clsrA   kwargsr   r   scoresZexampler1   r1   r2   from_input_examples   s    z1BinaryClassificationEvaluator.from_input_examplesr   z
str | Nonezdict[str, float])modeloutput_pathr    r!   r4   c                   s"  |dkr0|dkrd| }q4d| d| d}nd}| j durP|d| j  d	7 }td
| j d| d | js|jg| _| | j | | ||g}| jD ]>}|	ddkr|j
ddd\}}	| v r| | |	  q|dur| jrtj|| j}
tj|
sbt|
dddd0}t|}|| j || W d   n1 sV0    Y  nFt|
dddd$}t|}|| W d   n1 s0    Y  dd   D }t| jdkr| fdd d D  d| _n| jd  d| _| || j}| |||| |S )a  
        Compute the evaluation metrics for the given model.

        Args:
            model (SentenceTransformer): The model to evaluate.
            output_path (str, optional): Path to save the evaluation results CSV file. Defaults to None.
            epoch (int, optional): The epoch number. Defaults to -1.
            steps (int, optional): The number of steps. Defaults to -1.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        rF   z after epoch z
 in epoch z after z stepsr   Nz (truncated to )z/Binary Accuracy Evaluation of the model on the z dataset:r   r   )maxsplitwzutf-8)newlinemodeencodingac                 S  s2   i | ]*\}}|  D ]\}}| d | |qqS )r   )items).0
short_namevaluesmetricvaluer1   r1   r2   
<dictcomp>   s   z:BinaryClassificationEvaluator.__call__.<locals>.<dictcomp>c                   s,   i | ]$ d   t  fddD qS )Zmax_c                 3  s   | ]}|   V  qd S Nr1   )rR   rS   )rU   rD   r1   r2   	<genexpr>       zDBinaryClassificationEvaluator.__call__.<locals>.<dictcomp>.<genexpr>)max)rR   rD   )rU   r2   rW      s   ZcosineZmax_apr   Z_ap)r   r%   infor   r   similarity_fn_namer,   compute_metricesr+   countsplitr=   r   ospathjoinr*   isfileopencsvwriterwriterowrQ   r$   updateZprimary_metricZprefix_name_to_metricsZ store_metrics_in_model_card_data)r-   rG   rH   r    r!   Zout_txtZfile_output_dataheader_nameZsim_fctrU   Zcsv_pathfrh   r>   r1   r\   r2   __call__   sT    




,
*
z&BinaryClassificationEvaluator.__call__zdict[str, dict[str, float]])rG   r4   c                   sp  zt t| j| j }W n. tyF   | || j}| || j}Y nJ0 | ||}dd t||D   fdd| jD } fdd| jD }tjj	dd dd	d
tj
j	dd dd	d
tjj	dd ddd
tjj	dd ddd
i}t| j}i }| jD ]n}	||	 }
|
d ||   }|
d }|
d }| |||\}}| |||\}}}}t|||rjdnd }|r||kn||k}t||}td| d|d dd|dd td| d|d dd|dd td | d!|d d td"| d#|d d td$| d%|d d td&| d'|d dd( ||||||||d)||	< q|S )*Nc                 S  s   i | ]\}}||qS r1   r1   )rR   sentZembr1   r1   r2   rW      rZ   zBBinaryClassificationEvaluator.compute_metrices.<locals>.<dictcomp>c                   s   g | ]} | qS r1   r1   rR   rn   Zemb_dictr1   r2   
<listcomp>   rZ   zBBinaryClassificationEvaluator.compute_metrices.<locals>.<listcomp>c                   s   g | ]} | qS r1   r1   ro   rp   r1   r2   rq      rZ   c                 S  s
   t | |S rX   )r
   xyr1   r1   r2   <lambda>   rZ   z@BinaryClassificationEvaluator.compute_metrices.<locals>.<lambda>zCosine-SimilarityT)score_fnr   greater_is_betterc                 S  s
   t | |S rX   )r   rr   r1   r1   r2   ru      rZ   zDot-Productc                 S  s
   t | |S rX   )r   rr   r1   r1   r2   ru      rZ   zManhattan-DistanceFc                 S  s
   t | |S rX   )r   rr   r1   r1   r2   ru      rZ   zEuclidean-Distancerv   rw   r   r   rF   zAccuracy with z:             d   z.2fz	(Threshold: z.4frI   zF1 with z:                   zPrecision with z:            zRecall with z:               zAverage Precision with z:    zMatthews Correlation with z: 
)r5   r6   r7   r:   r8   r9   r;   r<   )listsetr   r   	TypeErrorembed_inputszipr	   ZCOSINErV   ZDOT_PRODUCTZ	MANHATTANZ	EUCLIDEANnpasarrayr   r   detachcpunumpyfind_best_acc_and_thresholdfind_best_f1_and_thresholdr   r   r%   r]   )r-   rG   	sentencesZembeddings1Zembeddings2Z
embeddingsZsimilarity_fnsr   Zoutput_scoresr^   Zsimilarity_fnrD   rw   r   accZacc_thresholdr7   r8   r9   r:   r;   Zpredicted_labelsr<   r1   rp   r2   r_      st    
&&z.BinaryClassificationEvaluator.compute_metriceszstr | list[str] | np.ndarrayz
np.ndarray)rG   r   r4   c                 K  s"   |j |f| j| jd| jd|S )NT)r   r   Zconvert_to_numpyr   )encoder   r   r   )r-   rG   r   rC   r1   r1   r2   r}   (  s    z*BinaryClassificationEvaluator.embed_inputs)high_score_more_similarc                 C  s   t | t |ksJ tt| |}t|dd |d}d}d}d}t|dk}tt |d D ]f}|| \}	}
|
dkr~|d7 }n|d8 }|| t | }||kr\|}|| d ||d  d  d }q\||fS )Nc                 S  s   | d S Nr   r1   rs   r1   r1   r2   ru   <  rZ   zKBinaryClassificationEvaluator.find_best_acc_and_threshold.<locals>.<lambda>keyreverser   rF   r      )r$   rz   r~   sortedsumrange)rD   r   r   rowsZmax_accZbest_thresholdZpositive_so_farZremaining_negativesiscorer.   r   r1   r1   r2   r   7  s"    
"z9BinaryClassificationEvaluator.find_best_acc_and_thresholdc                 C  s  t | t |ksJ t| } t|}tt| |}t|dd |d}d } }}d}d}d}	t|}
tt |d D ]}|| \}}|d7 }|dkr|	d7 }	|	dkrx|	| }|	|
 }d| | ||  }||krx|}|}|}|| d ||d  d  d }qx||||fS )Nc                 S  s   | d S r   r1   r   r1   r1   r2   ru   [  rZ   zJBinaryClassificationEvaluator.find_best_f1_and_threshold.<locals>.<lambda>r   r   r   r   )r$   r   r   rz   r~   r   r   r   )rD   r   r   r   Zbest_f1Zbest_precisionZbest_recall	thresholdZnextractZncorrectZtotal_num_duplicatesr   r   r.   r8   r9   r7   r1   r1   r2   r   R  s2    

"z8BinaryClassificationEvaluator.find_best_f1_and_thresholdc                 C  s   i }| j d ur| j |d< |S )Nr   )r   )r-   Zconfig_dictr1   r1   r2   get_config_dictv  s    

z-BinaryClassificationEvaluator.get_config_dict)r   r   FTNN)NrF   rF   )__name__
__module____qualname____doc__r#   r,   classmethodrE   rm   r_   r}   staticmethodr   r   r   __classcell__r1   r1   r/   r2   r      s(   >      $& HI#r   )
__future__r   rg   r'   rb   typingr   r   r   r   Zsklearn.metricsr   r   Z2sentence_transformers.evaluation.SentenceEvaluatorr   Zsentence_transformers.readersr   Z*sentence_transformers.similarity_functionsr	   Zsentence_transformers.utilr
   r   r   r   Z)sentence_transformers.SentenceTransformerr   	getLoggerr   r%   r   r1   r1   r1   r2   <module>   s   
