a
    h-                     @  s   d dl mZ d dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZ erd dlZd dlmZ eeZG d	d
 d
e	ZdS )    )annotationsN)TYPE_CHECKINGLiteral)SentenceEvaluator)InputExample)SimilarityFunction)pairwise_cos_simpairwise_dot_scorepairwise_euclidean_simpairwise_manhattan_sim)SentenceTransformerc                      s   e Zd ZdZd'dddd	d
ddddddd	d fddZdd ZeddddZd(ddddddddZdd d!d"d#d$Z	d%d& Z
  ZS ))TripletEvaluatora  
    Evaluate a model based on a triplet: (sentence, positive_example, negative_example).
    Checks if ``similarity(sentence, positive_example) > similarity(sentence, negative_example) + margin``.

    Args:
        anchors (List[str]): Sentences to check similarity to. (e.g. a query)
        positives (List[str]): List of positive sentences
        negatives (List[str]): List of negative sentences
        main_similarity_function (Union[str, SimilarityFunction], optional):
            The similarity function to use. If not specified, use cosine similarity,
            dot product, Euclidean, and Manhattan similarity. Defaults to None.
        margin (Union[float, Dict[str, float]], optional): Margins for various similarity metrics.
            If a float is provided, it will be used as the margin for all similarity metrics.
            If a dictionary is provided, the keys should be 'cosine', 'dot', 'manhattan', and 'euclidean'.
            The value specifies the minimum margin by which the negative sample should be further from
            the anchor than the positive sample. Defaults to None.
        name (str): Name for the output. Defaults to "".
        batch_size (int): Batch size used to compute embeddings. Defaults to 16.
        show_progress_bar (bool): If true, prints a progress bar. Defaults to False.
        write_csv (bool): Write results to a CSV file. Defaults to True.
        truncate_dim (int, optional): The dimension to truncate sentence embeddings to.
            `None` uses the model's current truncation dimension. Defaults to None.
        similarity_fn_names (List[str], optional): List of similarity function names to evaluate.
            If not specified, evaluate using the ``model.similarity_fn_name``.
            Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import TripletEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('all-mpnet-base-v2')

            # Load a dataset with (anchor, positive, negative) triplets
            dataset = load_dataset("sentence-transformers/all-nli", "triplet", split="dev")

            # Initialize the TripletEvaluator using anchors, positives, and negatives
            triplet_evaluator = TripletEvaluator(
                anchors=dataset[:1000]["anchor"],
                positives=dataset[:1000]["positive"],
                negatives=dataset[:1000]["negative"],
                name="all_nli_dev",
            )
            results = triplet_evaluator(model)
            '''
            TripletEvaluator: Evaluating the model on the all-nli-dev dataset:
            Accuracy Cosine Similarity:        95.60%
            '''
            print(triplet_evaluator.primary_metric)
            # => "all_nli_dev_cosine_accuracy"
            print(results[triplet_evaluator.primary_metric])
            # => 0.956
    N    FT
deprecatedz	list[str]zstr | SimilarityFunction | Nonezfloat | dict[str, float] | Nonestrintboolz
int | Nonez?list[Literal['cosine', 'dot', 'euclidean', 'manhattan']] | None)anchors	positives	negativesmain_similarity_functionmarginname
batch_sizeshow_progress_bar	write_csvtruncate_dimsimilarity_fn_namesmain_distance_functionc                   sh  t    || _|| _|| _|| _|
| _t| jt| jks@J t| jt| jksXJ |dkrv|d u rv|}t	d |rt
|nd | _|pg | _|d u rddddd| _nLt|ttfr||||d| _n,t|tri ddddd|| _ntd|| _|d u r&t tjkp$t tjk}|| _d|r<d| nd d	 | _d
dg| _|	| _| | j d S )Nr   zThe 'main_distance_function' parameter is deprecated. Please use 'main_similarity_function' instead. 'main_distance_function' will be removed in a future release.r   ZcosinedotZ	manhattanZ	euclideanzb`margin` should be a float or a dictionary with keys 'cosine', 'dot', 'manhattan', and 'euclidean'Ztriplet_evaluation_r   z_results.csvepochsteps)super__init__r   r   r   r   r   lenloggerwarningr   r   r   r   
isinstancefloatr   dict
ValueErrorr   getEffectiveLevelloggingINFODEBUGr   csv_filecsv_headersr   _append_csv_headers)selfr   r   r   r   r   r   r   r   r   r   r   r   	__class__ m/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/evaluation/TripletEvaluator.pyr&   T   sL    




zTripletEvaluator.__init__c                 C  s    |D ]}| j d|  qd S )NZ	accuracy_)r3   append)r5   r   fn_namer8   r8   r9   r4      s    z$TripletEvaluator._append_csv_headerszlist[InputExample])examplesc                 K  sZ   g }g }g }|D ]4}| |jd  | |jd  | |jd  q| |||fi |S )Nr         )r:   Ztexts)clsr<   kwargsr   r   r   Zexampler8   r8   r9   from_input_examples   s    z$TripletEvaluator.from_input_examplesr   z
str | Nonezdict[str, float])modeloutput_pathr#   r$   returnc                 C  s  |dkr0|dkrd| }q4d| d| d}nd}| j d urP|d| j  d7 }td	| j d
| d | || j}| || j}| || j}| js|j	g| _| 
| j dd dd dd dd d}	i }
| jD ]h}||	v r|	| |||\}}||| j|  k   }||
| d< td|  d|d q|d ur,| jr,tj|| j}tj|st|dddd@}t|}|| j |||gt|
   W d    n1 s0    Y  nVt|dddd4}t|}|||gt|
   W d    n1 s"0    Y  t| jdkrLt|
 |
d< | jr|t j!dt j"dt j#dt j$di%| j| _&n*t| jdkrd| _&n| jd   d| _&| '|
| j}
| (||
|| |
S )!NrB   z after epoch z
 in epoch z after z stepsr   z (truncated to )z.TripletEvaluator: Evaluating the model on the z dataset:c                 S  s   t | |t | |fS N)r   r   r   r   r8   r8   r9   <lambda>   s    z+TripletEvaluator.__call__.<locals>.<lambda>c                 S  s   t | |t | |fS rH   )r	   rI   r8   r8   r9   rJ      s    c                 S  s   t | |t | |fS rH   )r   rI   r8   r8   r9   rJ      s    c                 S  s   t | |t | |fS rH   )r
   rI   r8   r8   r9   rJ      s    r    Z	_accuracyz	Accuracy z Similarity:	z.2%wzutf-8)newlinemodeencodingar=   Zmax_accuracyZcosine_accuracyZdot_accuracyZeuclidean_accuracyZmanhattan_accuracyr   ))r   r(   infor   embed_inputsr   r   r   r   Zsimilarity_fn_namer4   r   r+   meanitem
capitalizer   ospathjoinr2   isfileopencsvwriterwriterowr3   listvaluesr'   maxr   r   ZCOSINEZDOT_PRODUCTZ	EUCLIDEANZ	MANHATTANgetZprimary_metricZprefix_name_to_metricsZ store_metrics_in_model_card_data)r5   rC   rD   r#   r$   Zout_txtZembeddings_anchorsZembeddings_positivesZembeddings_negativesZsimilarity_functionsZmetricsr;   Zpositive_scoresZnegative_scoresZaccuracyZcsv_pathfr[   r8   r8   r9   __call__   sl    



<
:zTripletEvaluator.__call__zstr | list[str] | np.ndarrayz
np.ndarray)rC   	sentencesrE   c                 K  s"   |j |f| j| jd| jd|S )NT)r   r   Zconvert_to_numpyr   )encoder   r   r   )r5   rC   rc   r@   r8   r8   r9   rQ      s    zTripletEvaluator.embed_inputsc                 C  s:   i }| j dddddkr"| j |d< | jd ur6| j|d< |S )Nr   r    r   r   )r   r   )r5   Zconfig_dictr8   r8   r9   get_config_dict  s    


z TripletEvaluator.get_config_dict)	NNr   r   FTNNr   )NrB   rB   )__name__
__module____qualname____doc__r&   r4   classmethodrA   rb   rQ   re   __classcell__r8   r8   r6   r9   r      s$   >         *@ Ur   )
__future__r   rZ   r/   rU   typingr   r   Z2sentence_transformers.evaluation.SentenceEvaluatorr   Zsentence_transformers.readersr   Z*sentence_transformers.similarity_functionsr   Zsentence_transformers.utilr   r	   r
   r   numpynpZ)sentence_transformers.SentenceTransformerr   	getLoggerrf   r(   r   r8   r8   r8   r9   <module>   s   
