a
    h1                     @  s   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ erdd dlmZ eeZG dd	 d	e
ZdS )
    )annotationsN)defaultdict)TYPE_CHECKING)SentenceEvaluator)paraphrase_mining)SentenceTransformerc                      sl   e Zd ZdZd dddddddddddddd fddZd!ddddddddZedd Zdd Z  Z	S )"ParaphraseMiningEvaluatora  
    Given a large set of sentences, this evaluator performs paraphrase (duplicate) mining and
    identifies the pairs with the highest similarity. It compare the extracted paraphrase pairs
    with a set of gold labels and computes the F1 score.

    Args:
        sentences_map (Dict[str, str]): A dictionary that maps sentence-ids to sentences.
            For example, sentences_map[id] => sentence.
        duplicates_list (List[Tuple[str, str]], optional): A list with id pairs [(id1, id2), (id1, id5)]
            that identifies the duplicates / paraphrases in the sentences_map. Defaults to None.
        duplicates_dict (Dict[str, Dict[str, bool]], optional): A default dictionary mapping [id1][id2]
            to true if id1 and id2 are duplicates. Must be symmetric, i.e., if [id1][id2] => True,
            then [id2][id1] => True. Defaults to None.
        add_transitive_closure (bool, optional): If true, it adds a transitive closure,
            i.e. if dup[a][b] and dup[b][c], then dup[a][c]. Defaults to False.
        query_chunk_size (int, optional): To identify the paraphrases, the cosine-similarity between
            all sentence-pairs will be computed. As this might require a lot of memory, we perform
            a batched computation. query_chunk_size sentences will be compared against up to
            corpus_chunk_size sentences. In the default setting, 5000 sentences will be grouped
            together and compared up-to against 100k other sentences. Defaults to 5000.
        corpus_chunk_size (int, optional): The corpus will be batched, to reduce the memory requirement.
            Defaults to 100000.
        max_pairs (int, optional): We will only extract up to max_pairs potential paraphrase candidates.
            Defaults to 500000.
        top_k (int, optional): For each query, we extract the top_k most similar pairs and add it to a sorted list.
            I.e., for one sentence we cannot find more than top_k paraphrases. Defaults to 100.
        show_progress_bar (bool, optional): Output a progress bar. Defaults to False.
        batch_size (int, optional): Batch size for computing sentence embeddings. Defaults to 16.
        name (str, optional): Name of the experiment. Defaults to "".
        write_csv (bool, optional): Write results to CSV file. Defaults to True.
        truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to.
            `None` uses the model's current truncation dimension. Defaults to None.

    Example:
        ::

            from datasets import load_dataset
            from sentence_transformers.SentenceTransformer import SentenceTransformer
            from sentence_transformers.evaluation import ParaphraseMiningEvaluator

            # Load a model
            model = SentenceTransformer('all-mpnet-base-v2')

            # Load the Quora Duplicates Mining dataset
            questions_dataset = load_dataset("sentence-transformers/quora-duplicates-mining", "questions", split="dev")
            duplicates_dataset = load_dataset("sentence-transformers/quora-duplicates-mining", "duplicates", split="dev")

            # Create a mapping from qid to question & a list of duplicates (qid1, qid2)
            qid_to_questions = dict(zip(questions_dataset["qid"], questions_dataset["question"]))
            duplicates = list(zip(duplicates_dataset["qid1"], duplicates_dataset["qid2"]))

            # Initialize the paraphrase mining evaluator
            paraphrase_mining_evaluator = ParaphraseMiningEvaluator(
                sentences_map=qid_to_questions,
                duplicates_list=duplicates,
                name="quora-duplicates-dev",
            )
            results = paraphrase_mining_evaluator(model)
            '''
            Paraphrase Mining Evaluation of the model on the quora-duplicates-dev dataset:
            Number of candidate pairs: 250564
            Average Precision: 56.51
            Optimal threshold: 0.8325
            Precision: 52.76
            Recall: 59.19
            F1: 55.79
            '''
            print(paraphrase_mining_evaluator.primary_metric)
            # => "quora-duplicates-dev_average_precision"
            print(results[paraphrase_mining_evaluator.primary_metric])
            # => 0.5650940787776353
    NF  順   d       Tzdict[str, str]zlist[tuple[str, str]] | Nonez!dict[str, dict[str, bool]] | Noneboolintstrz
int | None)sentences_mapduplicates_listduplicates_dictadd_transitive_closurequery_chunk_sizecorpus_chunk_size	max_pairstop_kshow_progress_bar
batch_sizename	write_csvtruncate_dimc                   s  t    g | _g | _| D ] \}}| j| | j| q|| _|	| _|
| _|| _	|| _
|| _|| _|| _|d ur||n
tdd | _|d ur|D ]4\}}||v r||v rd| j| |< d| j| |< q|r| | j| _t }| jD ]X}| j| D ]H}||v r||v r| j| | s*| j| | r|tt||g qqt|| _|r\d| }d| d | _g d| _|| _d| _d S )	Nc                   S  s   t tS )N)r   r    r   r   v/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/evaluation/ParaphraseMiningEvaluator.py<lambda>}       z4ParaphraseMiningEvaluator.__init__.<locals>.<lambda>T_Zparaphrase_mining_evaluationz_results.csv)epochsteps	precisionrecallf1	thresholdaverage_precisionr*   )super__init__	sentencesidsitemsappendr   r   r   r   r   r   r   r   r   
duplicatesr   setaddtuplesortedlentotal_num_duplicatescsv_filecsv_headersr   Zprimary_metric)selfr   r   r   r   r   r   r   r   r   r   r   r   r   idZsentenceid1id2Zpositive_key_pairskey1key2	__class__r   r    r,   \   sR    



z"ParaphraseMiningEvaluator.__init__r   z
str | Nonezdict[str, float])modeloutput_pathr$   r%   returnc                 C  s&  |dkr0|dkrd| }q4d| d| d}nd}| j d urP|d| j  d7 }td	| j d
| d t|| j| j| j| j| j	| j
| j| j d	}tdtt|  d }}d}	d }
 }}d}tt|D ]}|| \}}}| j| }| j| }|d7 }| j| | s| j| | r|d7 }|| }|| j }d| | ||  }||7 }||
kr|}
|}|}|| d |t|d t|d  d  d }	q|| j }td|d d td|	d td|d d td|d d td|
d dd |d ur| jrtj|| j}tj|st|dddd>}t|}|| j ||||||
|	|g W d    n1 s0    Y  nTt|dddd2}t|}||||||
|	|g W d    n1 s0    Y  ||
|||	d}| || j}| |||| |S )NrB   z after epoch z
 in epoch z after z stepsr   z (truncated to )z1Paraphrase Mining Evaluation of the model on the z dataset:)r   r   r   r   r   r   r   zNumber of candidate pairs: r         zAverage Precision: r   z.2fzOptimal threshold: z.4fzPrecision: zRecall: zF1: 
wzutf-8)newlinemodeencodinga)r*   r(   r&   r'   r)   ) r   loggerinfor   r   r-   r   r   r   r   r   r   r   r6   ranger.   r1   r7   minr   ospathjoinr8   isfileopencsvwriterwriterowr9   Zprefix_name_to_metricsZ store_metrics_in_model_card_data)r:   rC   rD   r$   r%   Zout_txtZ
pairs_listZ	n_extractZ	n_correctr)   Zbest_f1Zbest_recallZbest_precisionr*   idxZscoreijr<   r=   r&   r'   r(   Zcsv_pathfrZ   Zmetricsr   r   r    __call__   s    



0

:
8z"ParaphraseMiningEvaluator.__call__c                 C  s   t  }t|  D ]}||vrt  }|| t| | }t|dkrr|d}||vr:|| || |  q:t|}tt|d D ]`}t|d t|D ]H}d| ||  || < d| ||  || < |||  |||  qqq| S )Nr   rH   T)r2   listkeysr3   r6   popextendrR   )graphZnodes_visitedrO   Zconnected_subgraph_nodesZneighbor_nodes_queuenoder]   r^   r   r   r    r      s&    


z0ParaphraseMiningEvaluator.add_transitive_closurec                 C  s*   | j | j| jd}| jd ur&| j|d< |S )N)r   r   r   r   )r   r   r   r   )r:   Zconfig_dictr   r   r    get_config_dict  s    

z)ParaphraseMiningEvaluator.get_config_dict)NNFr	   r
   r   r   Fr   r   TN)NrB   rB   )
__name__
__module____qualname____doc__r,   r`   staticmethodr   rg   __classcell__r   r   r@   r    r      s&   L            ,A W
r   )
__future__r   rY   loggingrT   collectionsr   typingr   Z2sentence_transformers.evaluation.SentenceEvaluatorr   Zsentence_transformers.utilr   Z)sentence_transformers.SentenceTransformerr   	getLoggerrh   rP   r   r   r   r   r    <module>   s   
