a
    h                     @   sf   d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 eeZG d	d
 d
Zd
gZdS )zTokenization classes for RAG.    N)Optional   )BatchEncoding)logging   )	RagConfigc                
   @   s   e Zd Zdd Zdd Zedd Zdd Zd	d
 Zdd Z	dd Z
dd Zdee eee  ee ee eee eedddZdS )RagTokenizerc                 C   s   || _ || _| j | _d S N)question_encoder	generatorcurrent_tokenizer)selfr
   r    r   d/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/rag/tokenization_rag.py__init__   s    zRagTokenizer.__init__c                 C   sb   t j|rtd| dt j|dd t j|d}t j|d}| j| | j| d S )NzProvided path (z#) should be a directory, not a fileT)exist_okquestion_encoder_tokenizergenerator_tokenizer)	ospathisfile
ValueErrormakedirsjoinr
   save_pretrainedr   )r   Zsave_directoryZquestion_encoder_pathZgenerator_pathr   r   r   r   #   s    zRagTokenizer.save_pretrainedc                 K   sZ   ddl m} |dd }|d u r*t|}|j||jdd}|j||jdd}| ||dS )N   )AutoTokenizerconfigr   )r   Z	subfolderr   )r
   r   )Zauto.tokenization_autor   popr   from_pretrainedr
   r   )clsZpretrained_model_name_or_pathkwargsr   r   r
   r   r   r   r   r   ,   s    
zRagTokenizer.from_pretrainedc                 O   s   | j |i |S r	   )r   r   argsr!   r   r   r   __call__>   s    zRagTokenizer.__call__c                 O   s   | j j|i |S r	   )r   batch_decoder"   r   r   r   r%   A   s    zRagTokenizer.batch_decodec                 O   s   | j j|i |S r	   )r   decoder"   r   r   r   r&   D   s    zRagTokenizer.decodec                 C   s   | j | _d S r	   )r
   r   r   r   r   r   _switch_to_input_modeG   s    z"RagTokenizer._switch_to_input_modec                 C   s   | j | _d S r	   )r   r   r'   r   r   r   _switch_to_target_modeJ   s    z#RagTokenizer._switch_to_target_modeNlongestT)	src_texts	tgt_texts
max_lengthmax_target_lengthpaddingreturn_tensors
truncationreturnc              	   K   s   t dt |d u r| jj}| |fd||||d|}	|d u rD|	S |d u rT| jj}| f |d||||d|}
|
d |	d< |	S )Nu4  `prepare_seq2seq_batch` is deprecated and will be removed in version 5 of 🤗 Transformers. Use the regular `__call__` method to prepare your inputs and the tokenizer under the `with_target_tokenizer` context manager to prepare your targets. See the documentation of your specific tokenizer for more detailsT)add_special_tokensr0   r-   r/   r1   )Ztext_targetr3   r0   r/   r-   r1   Z	input_idslabels)warningswarnFutureWarningr   Zmodel_max_length)r   r+   r,   r-   r.   r/   r0   r1   r!   Zmodel_inputsr4   r   r   r   prepare_seq2seq_batchM   sB    		z"RagTokenizer.prepare_seq2seq_batch)NNNr*   NT)__name__
__module____qualname__r   r   classmethodr   r$   r%   r&   r(   r)   liststrr   intboolr   r8   r   r   r   r   r      s2   	
      
r   )__doc__r   r5   typingr   Ztokenization_utils_baser   utilsr   Zconfiguration_ragr   Z
get_loggerr9   loggerr   __all__r   r   r   r   <module>   s   
_