a
    h:                     @  s   d dl mZ d dlZd dlZd dlZd dlmZmZ d dlZ	d dl
Z
d dlZd dlmZmZ d dlmZ d dlmZ erd dl
mZ d dlmZ eeZG d	d
 d
eZdS )    )annotationsN)TYPE_CHECKINGCallable)average_precision_score
ndcg_score)SentenceEvaluator)cos_sim)Tensor)SentenceTransformerc                      s   e Zd ZdZdddedddddf	dd	d
ddd	ddddd
 fddZd'ddd	d	ddddZddddZddddZddddZ	d(dddd d!d"d#d$Z
d%d& Z  ZS ))RerankingEvaluatoraD  
    This class evaluates a SentenceTransformer model for the task of re-ranking.

    Given a query and a list of documents, it computes the score [query, doc_i] for all possible
    documents and sorts them in decreasing order. Then, MRR@10, NDCG@10 and MAP is compute to measure the quality of the ranking.

    Args:
        samples (list): A list of dictionaries, where each dictionary represents a sample and has the following keys:

            - 'query': The search query.
            - 'positive': A list of positive (relevant) documents.
            - 'negative': A list of negative (irrelevant) documents.
        at_k (int, optional): Only consider the top k most similar documents to each query for the evaluation. Defaults to 10.
        name (str, optional): Name of the evaluator. Defaults to "".
        write_csv (bool, optional): Write results to CSV file. Defaults to True.
        similarity_fct (Callable[[torch.Tensor, torch.Tensor], torch.Tensor], optional): Similarity function between sentence embeddings. By default, cosine similarity. Defaults to cos_sim.
        batch_size (int, optional): Batch size to compute sentence embeddings. Defaults to 64.
        show_progress_bar (bool, optional): Show progress bar when computing embeddings. Defaults to False.
        use_batched_encoding (bool, optional): Whether or not to encode queries and documents in batches for greater speed, or 1-by-1 to save memory. Defaults to True.
        truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation dimension. Defaults to None.
        mrr_at_k (Optional[int], optional): Deprecated parameter. Please use `at_k` instead. Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import RerankingEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer("all-MiniLM-L6-v2")

            # Load a dataset with queries, positives, and negatives
            eval_dataset = load_dataset("microsoft/ms_marco", "v1.1", split="validation")

            samples = [
                {
                    "query": sample["query"],
                    "positive": [text for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"]) if is_selected],
                    "negative": [text for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"]) if not is_selected],
                }
                for sample in eval_dataset
            ]

            # Initialize the evaluator
            reranking_evaluator = RerankingEvaluator(
                samples=samples,
                name="ms-marco-dev",
            )
            results = reranking_evaluator(model)
            '''
            RerankingEvaluator: Evaluating the model on the ms-marco-dev dataset:
            Queries: 9706      Positives: Min 1.0, Mean 1.1, Max 5.0   Negatives: Min 1.0, Mean 7.1, Max 9.0
            MAP: 56.07
            MRR@10: 56.70
            NDCG@10: 67.08
            '''
            print(reranking_evaluator.primary_metric)
            # => ms-marco-dev_ndcg@10
            print(results[reranking_evaluator.primary_metric])
            # => 0.6708042171399308
    
    T@   FNz list[dict[str, str | list[str]]]intstrboolz4Callable[[torch.Tensor, torch.Tensor], torch.Tensor]z
int | None)
samplesat_kname	write_csvsimilarity_fct
batch_sizeshow_progress_baruse_batched_encodingtruncate_dimmrr_at_kc                   s   t    || _|| _|
d ur8td|
 d |
| _n|| _|| _|| _|| _	|| _
|	| _t| jtrxt| j | _dd | jD | _d|rd| nd d| j d	 | _d
ddd| j d| j g| _|| _d| j | _d S )Nz?The `mrr_at_k` parameter has been deprecated; please use `at_k=z
` instead.c                 S  s0   g | ](}t |d  dkrt |d dkr|qS )positiver   negativelen.0sample r#   o/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/evaluation/RerankingEvaluator.py
<listcomp>z   s   z/RerankingEvaluator.__init__.<locals>.<listcomp>r   _r   z
_results_@z.csvepochstepsZMAPMRR@NDCG@ndcg@)super__init__r   r   loggerwarningr   r   r   r   r   r   
isinstancedictlistvaluescsv_filecsv_headersr   Zprimary_metric)selfr   r   r   r   r   r   r   r   r   r   	__class__r#   r$   r-   Y   s4    
$

zRerankingEvaluator.__init__r
   z
str | Nonezdict[str, float])modeloutput_pathr'   r(   returnc                 C  sD  |dkr0|dkrd| }q4d| d| d}nd}| j durP|d| j  d	7 }td
| j d| d | |}|d }|d }|d }	dd | jD }
dd | jD }tdt| j dt|
ddt	|
ddt
|
ddt|ddt	|ddt
|d td|d d td| j d|d d td| j d|	d d |dur| jrtj|| j}tj|}t|d|rdnd d!d"@}t|}|s|| j ||||||	g W d   n1 s0    Y  d|d#| j |d$| j |	i}| || j}| |||| |S )%a  
        Evaluates the model on the dataset and returns the evaluation metrics.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to evaluate.
            output_path (str, optional): The output path to write the results. Defaults to None.
            epoch (int, optional): The current epoch number. Defaults to -1.
            steps (int, optional): The current step number. Defaults to -1.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        r9   z after epoch z
 in epoch z after z stepsr   Nz (truncated to )z0RerankingEvaluator: Evaluating the model on the z dataset:mapmrrndcgc                 S  s   g | ]}t |d  qS )r   r   r    r#   r#   r$   r%          z/RerankingEvaluator.__call__.<locals>.<listcomp>c                 S  s   g | ]}t |d  qS )r   r   r    r#   r#   r$   r%      rB   z	Queries: z 	 Positives: Min z.1fz, Mean z, Max z 	 Negatives: Min zMAP: d   z.2fr)   z: r*   awzutf-8)newlinemodeencodingzmrr@r+   )r   r.   infor   compute_metricesr   r   npminmeanmaxr   r   ospathjoinr4   isfileopencsvwriterwriterowr5   Zprefix_name_to_metricsZ store_metrics_in_model_card_data)r6   r:   r;   r'   r(   Zout_txtZscoresmean_apmean_mrr	mean_ndcgZnum_positivesZnum_negativesZcsv_pathZoutput_file_existsfrU   Zmetricsr#   r#   r$   __call__   sF    

b
4zRerankingEvaluator.__call__)r:   c                 C  s   | j r| |S | |S )a  
        Computes the evaluation metrics for the given model.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        )r   compute_metrices_batchedcompute_metrices_individual)r6   r:   r#   r#   r$   rJ      s    z#RerankingEvaluator.compute_metricesc                 C  s  g }g }g }| j |dd | jD d| jd}g }| jD ] }||d  ||d  q6| j ||d| jd}d\}	}
| jD ]}||	 }|	d	7 }	t|d }t|d }||
|
| |  }|
|| 7 }
|d
ksz|d
krqz| ||}t|jd	kr|d
 }t| }|	 
 }d	g| d
g|  }d
}t|d
| j D ]&\}}|| r>d	|d	  } qfq>|| |t|g|g| jd |t|| qzt|}t|}t|}|||dS )aE  
        Computes the evaluation metrics in a batched way, by batching all queries and all documents together.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        c                 S  s   g | ]}|d  qS )queryr#   r    r#   r#   r$   r%      rB   z?RerankingEvaluator.compute_metrices_batched.<locals>.<listcomp>r^   encode_fn_namer   r   r   document)r   r      r   kr?   r@   rA   )embed_inputsr   r   extendr   r   shapetorchargsortcputolist	enumerater   appendr   r   rK   rM   )r6   r:   all_mrr_scoresall_ndcg_scoresall_ap_scoresZall_query_embsZall_docsr"   Zall_docs_embsZ	query_idxZdocs_idxinstance	query_embnum_posZnum_negdocs_embpred_scorespred_scores_argsortis_relevant	mrr_scorerankindexrW   rX   rY   r#   r#   r$   r\      sX    








z+RerankingEvaluator.compute_metrices_batchedc                 C  s  g }g }g }t j | j| j ddD ]0}|d }t|d }t|d }t|dks"t|dkrbq"|| }	dgt| dgt|  }
| j||gddd	}| j||	d
dd	}| ||}t|jdkr|d }t	| }|
  }d}t|d| j D ]"\}}|
| rd|d  } q q|| |t|
g|g| jd |t|
| q"t|}t|}t|}|||dS )aO  
        Computes the evaluation metrics individually by embedding every (query, positive, negative) tuple individually.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        ZSamples)disabledescr^   r   r   r   rb   Fr_   ra   rc   re   )tqdmr   r   r2   r   rf   r   rh   ri   rj   rk   rl   rm   r   rn   r   r   rK   rM   )r6   r:   ro   rp   rq   rr   r^   r   r   docsrx   rs   ru   rv   rw   ry   rz   r{   rW   rX   rY   r#   r#   r$   r]   !  s<    




z.RerankingEvaluator.compute_metrices_individualzstr | list[str] | np.ndarrayzbool | Noner	   )r:   	sentencesr`   r   r<   c                 K  sL   |d u r|j }n|dkr |j}n|dkr.|j}||f| j|d| jd|S )Nr^   ra   T)r   r   Zconvert_to_tensorr   )encodeZencode_queryZencode_documentr   r   )r6   r:   r   r`   r   kwargsZ	encode_fnr#   r#   r$   rf   X  s     zRerankingEvaluator.embed_inputsc                 C  s"   d| j i}| jd ur| j|d< |S )Nr   r   )r   r   )r6   Zconfig_dictr#   r#   r$   get_config_dicto  s    


z"RerankingEvaluator.get_config_dict)Nr9   r9   )NN)__name__
__module____qualname____doc__r   r-   r[   rJ   r\   r]   rf   r   __classcell__r#   r#   r7   r$   r      s(   B&1 ?I;  r   )
__future__r   rT   loggingrO   typingr   r   numpyrK   ri   r~   Zsklearn.metricsr   r   Z2sentence_transformers.evaluation.SentenceEvaluatorr   Zsentence_transformers.utilr   r	   Z)sentence_transformers.SentenceTransformerr
   	getLoggerr   r.   r   r#   r#   r#   r$   <module>   s   
