a
    ãÀh|  ã                   @  sZ   d dl mZ d dlmZ d dlmZmZ d dlmZ ddl	m	Z	m
Z
 G dd„ dejƒZd	S )
é    )Úannotations)ÚIterable)ÚTensorÚnn)ÚSentenceTransformeré   )ÚBatchHardTripletLossÚ$BatchHardTripletLossDistanceFunctionc                      sd   e Zd Zejdfddddœ‡ fdd„Zdd	d	d
œdd„Zd	d	d	dœdd„Zeddœdd„ƒZ	‡  Z
S )ÚBatchAllTripletLossé   r   ÚfloatÚNone)ÚmodelÚmarginÚreturnc                   s    t ƒ  ¡  || _|| _|| _dS )a¦  
        BatchAllTripletLoss takes a batch with (sentence, label) pairs and computes the loss for all possible, valid
        triplets, i.e., anchor and positive must have the same label, anchor and negative a different label. The labels
        must be integers, with same label indicating sentences from the same class. Your train dataset
        must contain at least 2 examples per label class.

        Args:
            model: SentenceTransformer model
            distance_metric: Function that returns a distance between
                two embeddings. The class SiameseDistanceMetric contains
                pre-defined metrics that can be used.
            margin: Negative samples should be at least margin further
                apart from the anchor than the positive.

        References:
            * Source: https://github.com/NegatioN/OnlineMiningTripletLoss/blob/master/online_triplet_loss/losses.py
            * Paper: In Defense of the Triplet Loss for Person Re-Identification, https://arxiv.org/abs/1703.07737
            * Blog post: https://omoindrot.github.io/triplet-loss

        Requirements:
            1. Each sentence must be labeled with a class.
            2. Your dataset must contain at least 2 examples per labels class.

        Inputs:
            +------------------+--------+
            | Texts            | Labels |
            +==================+========+
            | single sentences | class  |
            +------------------+--------+

        Recommendations:
            - Use ``BatchSamplers.GROUP_BY_LABEL`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
              ensure that each batch contains 2+ examples per label class.

        Relations:
            * :class:`BatchHardTripletLoss` uses only the hardest positive and negative samples, rather than all possible, valid triplets.
            * :class:`BatchHardSoftMarginTripletLoss` uses only the hardest positive and negative samples, rather than all possible, valid triplets.
              Also, it does not require setting a margin.
            * :class:`BatchSemiHardTripletLoss` uses only semi-hard triplets, valid triplets, rather than all possible, valid triplets.

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
                from datasets import Dataset

                model = SentenceTransformer("microsoft/mpnet-base")
                # E.g. 0: sports, 1: economy, 2: politics
                train_dataset = Dataset.from_dict({
                    "sentence": [
                        "He played a great game.",
                        "The stock is up 20%",
                        "They won 2-1.",
                        "The last goal was amazing.",
                        "They all voted against the bill.",
                    ],
                    "label": [0, 1, 0, 0, 2],
                })
                loss = losses.BatchAllTripletLoss(model)

                trainer = SentenceTransformerTrainer(
                    model=model,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()

        N)ÚsuperÚ__init__Úsentence_embedderÚtriplet_marginÚdistance_metric)Úselfr   r   r   ©Ú	__class__© úl/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/losses/BatchAllTripletLoss.pyr      s    J
zBatchAllTripletLoss.__init__zIterable[dict[str, Tensor]]r   )Úsentence_featuresÚlabelsr   c                 C  s   |   |d ¡d }|  ||¡S )Nr   Zsentence_embedding)r   Úbatch_all_triplet_loss)r   r   r   Úrepr   r   r   Úforward\   s    zBatchAllTripletLoss.forward)r   Ú
embeddingsr   c           
      C  sx   |   |¡}| d¡}| d¡}|| | j }t |¡}| ¡ | }d||dk < ||dk }| d¡}	| ¡ |	d  }|S )a]  Build the triplet loss over a batch of embeddings.
        We generate all the valid triplets and average the loss over the positive ones.
        Args:
            labels: labels of the batch, of size (batch_size,)
            embeddings: tensor of shape (batch_size, embed_dim)
            margin: margin for triplet loss
            squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                     If false, output is the pairwise euclidean distance matrix.
        Returns:
            Label_Sentence_Triplet: scalar tensor containing the triplet loss
        é   r   r   g¼‰Ø—²Òœ<)r   Z	unsqueezer   r   Zget_triplet_maskr   ÚsizeÚsum)
r   r   r    Zpairwise_distZanchor_positive_distZanchor_negative_distZtriplet_lossÚmaskZvalid_tripletsZnum_positive_tripletsr   r   r   r   `   s    




z*BatchAllTripletLoss.batch_all_triplet_lossÚstr)r   c                 C  s   dS )Na  
@misc{hermans2017defense,
    title={In Defense of the Triplet Loss for Person Re-Identification},
    author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
    year={2017},
    eprint={1703.07737},
    archivePrefix={arXiv},
    primaryClass={cs.CV}
}
r   )r   r   r   r   Úcitation‹   s    zBatchAllTripletLoss.citation)Ú__name__Ú
__module__Ú__qualname__r	   Zeucledian_distancer   r   r   Úpropertyr&   Ú__classcell__r   r   r   r   r
      s   üO+r
   N)Ú
__future__r   Úcollections.abcr   Ztorchr   r   Z)sentence_transformers.SentenceTransformerr   r   r	   ÚModuler
   r   r   r   r   Ú<module>   s
   