a
    ãÀh½  ã                   @  sV   d Z ddlmZ ddlZddlZddlmZ ddlm	Z	 e 
e¡ZG dd„ deƒZdS )aA  
This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.

Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.

In particular, you can pass "group_by_label" to `batch_sampler` in the `SentenceTransformerTrainingArguments` class.
é    )ÚannotationsN)ÚIterableDataset)ÚInputExamplec                      s<   e Zd ZdZdddddœ‡ fdd	„Zd
d„ Zdd„ Z‡  ZS )ÚSentenceLabelDatasetaU  
    This dataset can be used for some specific Triplet Losses like BATCH_HARD_TRIPLET_LOSS which requires
    multiple examples with the same label in a batch.

    It draws n consecutive, random and unique samples from one label at a time. This is repeated for each label.

    Labels with fewer than n unique samples are ignored.
    This also applied to drawing without replacement, once less than n samples remain for a label, it is skipped.

    This *DOES NOT* check if there are more labels than the batch is large or if the batch size is divisible
    by the samples drawn per label.
    é   Fzlist[InputExample]ÚintÚbool)ÚexamplesÚsamples_per_labelÚwith_replacementc           	        sì   t ƒ  ¡  || _i }|D ](}|j|vr0g ||j< ||j  |¡ qg | _g | _d}| ¡ D ]<\}}t|ƒ| jkrZ| j 	|¡ | j t| jƒ¡ |d7 }qZt
 |¡| _|| _t
j | j¡ t dt|ƒ› dt| jƒ› d| j› d|› d	¡ dS )	aŠ  
        Creates a LabelSampler for a SentenceLabelDataset.

        Args:
            examples (List[InputExample]): A list of InputExamples.
            samples_per_label (int, optional): The number of consecutive, random, and unique samples drawn per label.
                The batch size should be a multiple of samples_per_label. Defaults to 2.
            with_replacement (bool, optional): If True, each sample is drawn at most once (depending on the total number
                of samples per label). If False, one sample can be drawn in multiple draws, but not multiple times in
                the same drawing. Defaults to False.
        r   é   zSentenceLabelDataset: z examples, from which z8 examples could be used (those labels appeared at least z	 times). z different labels found.N)ÚsuperÚ__init__r
   ÚlabelÚappendÚgrouped_inputsÚgroups_right_borderÚitemsÚlenÚextendÚnpÚarangeÚlabel_ranger   ÚrandomÚshuffleÚloggerÚinfo)	Úselfr	   r
   r   Zlabel2exZexampleZ
num_labelsr   Zlabel_examples©Ú	__class__© úo/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/datasets/SentenceLabelDataset.pyr   %   s.    


ÿ
(ÿzSentenceLabelDataset.__init__c                 #  s  d}d}i ‰ |t | jƒk r| j| ‰ˆˆ vr8tƒ ˆ ˆ< ˆdkrDdn| jˆd  }| jˆ }| jrpt ||¡}n‡ ‡fdd„t ||¡D ƒ}t |ƒ| jkrÖtj	j
|| jddD ]&}|d7 }ˆ ˆ  |¡ | j| V  q®|d7 }|t | jƒkrd}i ‰ tj	 | j¡ qd S )Nr   r   c                   s   g | ]}|ˆ ˆ vr|‘qS r    r    )Ú.0Úi©Úalready_seenr   r    r!   Ú
<listcomp>`   ó    z1SentenceLabelDataset.__iter__.<locals>.<listcomp>F)Úreplace)r   r   r   Úsetr   r   r   r   r
   r   ÚchoiceÚaddr   )r   Z	label_idxÚcountZleft_borderZright_borderZ	selectionZelement_idxr    r$   r!   Ú__iter__Q   s,    


zSentenceLabelDataset.__iter__c                 C  s
   t | jƒS )N)r   r   )r   r    r    r!   Ú__len__n   s    zSentenceLabelDataset.__len__)r   F)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r-   r.   Ú__classcell__r    r    r   r!   r      s   ,r   )r2   Ú
__future__r   ÚloggingÚnumpyr   Ztorch.utils.datar   Zsentence_transformers.readersr   Ú	getLoggerr/   r   r   r    r    r    r!   Ú<module>   s   

