a
    h                      @   s   d Z ddlZddlmZ ddlmZmZ ddlmZm	Z	m
Z
 ddlmZmZmZ dd	lmZ dd
lmZ G dd deddZG dd de	ZdgZdS )z%
Speech processor class for Wav2Vec2
    N)contextmanager)OptionalUnion   )ProcessingKwargsProcessorMixinUnpack)
AudioInputPreTokenizedInput	TextInput   )Wav2Vec2FeatureExtractor)Wav2Vec2CTCTokenizerc                   @   s   e Zd Zi ZdS )Wav2Vec2ProcessorKwargsN)__name__
__module____qualname__	_defaults r   r   l/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/wav2vec2/processing_wav2vec2.pyr      s   r   F)totalc                       s   e Zd ZdZdZdZ fddZe fddZde	e
eeee eef  ee d	d
dZdd Zedd Zedd Z  ZS )Wav2Vec2Processora  
    Constructs a Wav2Vec2 processor which wraps a Wav2Vec2 feature extractor and a Wav2Vec2 CTC tokenizer into a single
    processor.

    [`Wav2Vec2Processor`] offers all the functionalities of [`Wav2Vec2FeatureExtractor`] and [`PreTrainedTokenizer`].
    See the docstring of [`~Wav2Vec2Processor.__call__`] and [`~Wav2Vec2Processor.decode`] for more information.

    Args:
        feature_extractor (`Wav2Vec2FeatureExtractor`):
            An instance of [`Wav2Vec2FeatureExtractor`]. The feature extractor is a required input.
        tokenizer ([`PreTrainedTokenizer`]):
            An instance of [`PreTrainedTokenizer`]. The tokenizer is a required input.
    r   ZAutoTokenizerc                    s    t  || | j| _d| _d S )NF)super__init__feature_extractorcurrent_processor_in_target_context_manager)selfr   	tokenizer	__class__r   r   r   3   s    zWav2Vec2Processor.__init__c              	      sx   zt  j|fi |W S  ttfyr   td| j dt tj|fi |}t	j|fi |}| ||d Y S 0 d S )NzLoading a tokenizer inside a   from a config that does not include a `tokenizer_class` attribute is deprecated and will be removed in v5. Please add `'tokenizer_class': 'Wav2Vec2CTCTokenizer'` attribute to either your `config.json` or `tokenizer_config.json` file to suppress this warning: )r   r   )
r   from_pretrainedOSError
ValueErrorwarningswarnr   FutureWarningr   r   )clsZpretrained_model_name_or_pathkwargsr   r   r   r   r   r!   8   s    	z!Wav2Vec2Processor.from_pretrainedN)audiotextr(   c           	      K   s   d|v rt d |d}|du r4|du r4td| jtfd| jji|}| jrz| j	|fi |d |d |d S |dur| j
|fi |d }|dur| j|fi |d }|du r|S |du r|S |d	 |d
< |S dS )a*  
        This method forwards all arguments to [`Wav2Vec2FeatureExtractor.__call__`] and/or
        [`PreTrainedTokenizer.__call__`] depending on the input modality and returns their outputs. If both modalities are passed, [`Wav2Vec2FeatureExtractor.__call__`] and [`PreTrainedTokenizer.__call__`] are called.

        Args:
            audio (`np.ndarray`, `torch.Tensor`, `List[np.ndarray]`, `List[torch.Tensor]`, *optional*):
                An audio input is passed to [`Wav2Vec2FeatureExtractor.__call__`].
            text (`str`, `List[str]`, *optional*):
                A text input is passed to [`PreTrainedTokenizer.__call__`].


        Returns:
            This method returns the results of each `call` method. If both are used, the output is a dictionary containing the results of both.
        Z
raw_speechzLUsing `raw_speech` as a keyword argument is deprecated. Use `audio` instead.NzAYou need to specify either an `audio` or `text` input to process.Ztokenizer_init_kwargsZaudio_kwargsZtext_kwargsZcommon_kwargs	input_idslabels)r$   r%   popr#   Z_merge_kwargsr   r   Zinit_kwargsr   r   r   )	r   r)   r*   ZimagesZvideosr(   Zoutput_kwargsinputs	encodingsr   r   r   __call__K   s@    

zWav2Vec2Processor.__call__c                 O   s   | j r| jj|i |S |dd}|dd}t|dkrP|d }|dd }|durr| jj|g|R i |}|dur| jj|fi |}|du r|S |du r|S |d |d< |S dS )ag  
        This method operates on batches of extracted features and/or tokenized text. It forwards all arguments to
        [`Wav2Vec2FeatureExtractor.pad`] and/or [`PreTrainedTokenizer.pad`] depending on the input modality and returns their outputs. If both modalities are passed, [`Wav2Vec2FeatureExtractor.pad`] and [`PreTrainedTokenizer.pad`] are called.

        Args:
            input_features:
                When the first argument is a dictionary containing a batch of tensors, or the `input_features` argument is present, it is passed to [`Wav2Vec2FeatureExtractor.pad`].
            labels:
                When the `label` argument is present, it is passed to [`PreTrainedTokenizer.pad`].

        Returns:
            This method returns the results of each `pad` method. If both are used, the output is a dictionary containing the results of both.
        input_featuresNr,   r   r   r+   )r   r   padr-   lenr   r   )r   argsr(   r1   r,   r   r   r   r2      s"    zWav2Vec2Processor.padc                 C   s   | j j}|dg S )Nr,   )r   model_input_names)r   Zfeature_extractor_input_namesr   r   r   r5      s    z#Wav2Vec2Processor.model_input_namesc                 c   s0   t d d| _| j| _dV  | j| _d| _dS )z
        Temporarily sets the tokenizer for processing the input. Useful for encoding the labels when fine-tuning
        Wav2Vec2.
        z`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your labels by using the argument `text` of the regular `__call__` method (either in the same call as your audio inputs, or in a separate call.TNF)r$   r%   r   r   r   r   )r   r   r   r   as_target_processor   s    z%Wav2Vec2Processor.as_target_processor)NNNN)r   r   r   __doc__Zfeature_extractor_classZtokenizer_classr   classmethodr!   r	   r   r   strlistr   r
   r   r   r0   r2   propertyr5   r   r6   __classcell__r   r   r   r   r   !   s(       8%
r   )r7   r$   
contextlibr   typingr   r   Zprocessing_utilsr   r   r   Ztokenization_utils_baser	   r
   r   Zfeature_extraction_wav2vec2r   Ztokenization_wav2vec2r   r   r   __all__r   r   r   r   <module>   s     