a
    h=                     @  sV   d Z ddlmZ ddlZddlZddlZddlmZ G dd dZG dd	 d	eZ	dS )
a@  
This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.

Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.

Instead, you should create a `datasets` `Dataset` for training: https://huggingface.co/docs/datasets/create_dataset
    )annotationsN   )InputExamplec                   @  s6   e Zd ZdZddddejdddfdd	Zdd
dZdS )STSDataReadera1  Reads in the STS dataset. Each line contains two sentences (s1_col_idx, s2_col_idx) and one label (score_col_idx)

    Default values expects a tab separated file with the first & second column the sentence pair and third column the score (0...1). Default config normalizes scores from 0...5 to 0...1
    r   r      	T   c
           
      C  s:   || _ || _|| _|| _|| _|| _|| _|| _|	| _d S )N)	dataset_folderscore_col_idx
s1_col_idx
s2_col_idx	delimiterquotingnormalize_scores	min_score	max_score
selfr	   r   r   r
   r   r   r   r   r    r   g/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/readers/STSDataReader.py__init__   s    zSTSDataReader.__init__c              	   C  s   t j| j|}|dr*tj|dddn
t|dd}tj|| j	| j
d}g }t|D ]~\}}t|| j }	| jr|	| j | j| j  }	|| j }
|| j }|t|t| |
|g|	d |dkrXt||krX qqXW d	   n1 s0    Y  |S )
zJfilename specified which data split to use (train.csv, dev.csv, test.csv).z.gzrtutf8)encodingzutf-8)r   r   )ZguidZtextslabelr   N)ospathjoinr	   endswithgzipopencsvreaderr   r   	enumeratefloatr
   r   r   r   r   r   appendr   strlen)r   filenameZmax_examplesfilepathZfIndataZexamplesidrowZscores1s2r   r   r   get_examples0   s$    


 $zSTSDataReader.get_examplesN)r   )__name__
__module____qualname____doc__r!   
QUOTE_NONEr   r/   r   r   r   r   r      s   
r   c                      s4   e Zd ZdZddddejdddf fdd		Z  ZS )
STSBenchmarkDataReaderzReader especially for the STS benchmark dataset. There, the sentences are in column 5 and 6, the score is in column 4.
    Scores are normalized from 0...5 to 0...1
    r         r   Tr   c
           
        s"   t  j|||||||||	d	 d S )N)	r	   r   r   r
   r   r   r   r   r   )superr   r   	__class__r   r   r   N   s    zSTSBenchmarkDataReader.__init__)r0   r1   r2   r3   r!   r4   r   __classcell__r   r   r9   r   r5   I   s   r5   )
r3   
__future__r   r!   r   r    r   r   r5   r   r   r   r   <module>   s   
5