a
    h                      @  sh   d dl mZ d dlmZ d dlZd dlm  mZ d dlm	Z	mZ d dl
mZmZ G dd dejZdS )    )annotations)IterableN)Tensornn)SentenceTransformerutilc                      sd   e Zd Zddddddd	d
 fddZddddddZddddddZeddddZ  ZS )MegaBatchMarginLoss皙?333333?T2   r   floatboolintNone)modelpositive_marginnegative_marginuse_mini_batched_versionmini_batch_sizereturnc                   s8   t    || _|| _|| _|| _|r,| jn| j| _dS )a  
        Given a large batch (like 500 or more examples) of (anchor_i, positive_i) pairs, find for each pair in the batch
        the hardest negative, i.e. find j != i such that cos_sim(anchor_i, positive_j) is maximal. Then create from this a
        triplet (anchor_i, positive_i, positive_j) where positive_j serves as the negative for this triplet.

        Then train as with the triplet loss.

        Args:
            model: SentenceTransformerModel
            positive_margin: Positive margin, cos(anchor, positive)
                should be > positive_margin
            negative_margin: Negative margin, cos(anchor, negative)
                should be < negative_margin
            use_mini_batched_version: As large batch sizes require a lot
                of memory, we can use a mini-batched version. We break
                down the large batch into smaller batches with fewer
                examples.
            mini_batch_size: Size for the mini-batches. Should be a
                divisor for the batch size in your data loader.

        References:
            - This loss function was inspired by the ParaNMT paper: https://www.aclweb.org/anthology/P18-1042/

        Requirements:
            1. (anchor, positive) pairs
            2. Large batches (500 or more examples)

        Inputs:
            +---------------------------------------+--------+
            | Texts                                 | Labels |
            +=======================================+========+
            | (anchor, positive) pairs              | none   |
            +---------------------------------------+--------+

        Recommendations:
            - Use ``BatchSamplers.NO_DUPLICATES`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
              ensure that no in-batch negatives are duplicates of the anchor or positive samples.

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainingArguments, SentenceTransformerTrainer, losses
                from datasets import Dataset

                train_batch_size = 250
                train_mini_batch_size = 32

                model = SentenceTransformer('all-MiniLM-L6-v2')
                train_dataset = Dataset.from_dict({
                    "anchor": [f"This is sentence number {i}" for i in range(500)],
                    "positive": [f"This is sentence number {i}" for i in range(1, 501)],
                })
                loss = losses.MegaBatchMarginLoss(model=model, mini_batch_size=train_mini_batch_size)

                args = SentenceTransformerTrainingArguments(
                    output_dir="output",
                    per_device_train_batch_size=train_batch_size,
                )
                trainer = SentenceTransformerTrainer(
                    model=model,
                    args=args,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()
        N)	super__init__r   r   r   r   forward_mini_batchedforward_non_mini_batchedforward)selfr   r   r   r   r   	__class__ l/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/losses/MegaBatchMarginLoss.pyr      s    J
zMegaBatchMarginLoss.__init__zIterable[dict[str, Tensor]]r   )sentence_featureslabelsr   c              	     s   |\ t   }t 6 | j  | d  }| j  W d    n1 sX0    Y  tjt	|t	||j
d}tdt	|| jD ]| j |  fdd|D d }dd |D }t B t||}|d|   }	tj|	dd	\}
}W d    n1 s0    Y  |D ](}|D ]}|| | |  q4q,|D ]}t|| ||< qZ| fd
d|D d }| |d }|j|jksJ |j|jksJ t||}t||}t| j| t|| j  }| }t	|k r|  q|S )Nsentence_embedding)devicer   c                   s   i | ]}| |  qS r   r   .0key)anchorend_idx	start_idxr   r   
<dictcomp>l       z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>c                 S  s   i | ]
}|g qS r   r   r$   r   r   r   r*   r   r+         dimc                   s   i | ]}||   qS r   r   r$   )r(   positiver)   r   r   r*      r+   )listkeystorchZno_gradr   evaldetachtraineyelenr#   ranger   r   pytorch_cos_simmaxappendstackshapeFZcosine_similarityrelur   r   meanZbackward)r   r    r!   Zfeature_namesZall_positive_embZdiagonal_matrixZ
anchor_embZhard_negative_features
cos_scoresnegative_scoresnegatives_maxZnegatives_idsZhard_negative_idr&   Zpositive_embZnegative_embZ
pos_cosineZ
neg_cosinelossesr   )r'   r(   r0   r)   r   r   ^   sH    

(

2 
z(MegaBatchMarginLoss.forward_mini_batchedc                   s    fdd|D }|\}}t ||}t|}|dtj|jd|ji  }tj|dd\}	}
t	 j
| t	|	 j  }| S )Nc                   s   g | ]}  |d  qS )r"   )r   )r%   Zsentence_featurer   r   r   
<listcomp>   r+   z@MegaBatchMarginLoss.forward_non_mini_batched.<locals>.<listcomp>r,   r#   r-   r.   )r   r:   r3   Zdiagonalr7   r>   r#   r;   r?   r@   r   r   rA   )r   r    r!   ZrepsZembeddings_aZembeddings_brB   Zpositive_scoresrC   rD   _rE   r   rF   r   r      s    
 z,MegaBatchMarginLoss.forward_non_mini_batchedstr)r   c                 C  s   dS )Na  
@inproceedings{wieting-gimpel-2018-paranmt,
    title = "{P}ara{NMT}-50{M}: Pushing the Limits of Paraphrastic Sentence Embeddings with Millions of Machine Translations",
    author = "Wieting, John and Gimpel, Kevin",
    editor = "Gurevych, Iryna and Miyao, Yusuke",
    booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2018",
    address = "Melbourne, Australia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P18-1042",
    doi = "10.18653/v1/P18-1042",
    pages = "451--462",
}
r   rF   r   r   r   citation   s    zMegaBatchMarginLoss.citation)r	   r
   Tr   )	__name__
__module____qualname__r   r   r   propertyrJ   __classcell__r   r   r   r   r      s       Q9r   )
__future__r   collections.abcr   r3   Ztorch.nn.functionalr   Z
functionalr?   r   Zsentence_transformersr   r   Moduler   r   r   r   r   <module>   s   