a
    hz"                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZ ddlmZmZmZ dd	lmZ d
dlmZ eeZG dd de	ddZG dd de
ZdgZdS )zq
Processor class for InstructBLIP. Largely copy of Blip2Processor with addition of a tokenizer for the Q-Former.
    N)Union   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixinUnpack)
AddedTokenPreTokenizedInput	TextInput)logging   )AutoTokenizerc                
   @   s*   e Zd Zdddddddddd	i dZdS )InstructBlipProcessorKwargsTFr   )	add_special_tokenspaddingZstrideZreturn_overflowing_tokensZreturn_special_tokens_maskZreturn_offsets_mappingZreturn_token_type_idsZreturn_lengthverbose)text_kwargsimages_kwargsN)__name__
__module____qualname__	_defaults r   r   t/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/instructblip/processing_instructblip.pyr   !   s   r   F)totalc                       s   e Zd ZdZg dZdZdZdZd fdd	Zde	e
eeee ee f ee edd	d
Zedd Z fddZe fddZ  ZS )InstructBlipProcessora  
    Constructs an InstructBLIP processor which wraps a BLIP image processor and a LLaMa/T5 tokenizer into a single
    processor.

    [`InstructBlipProcessor`] offers all the functionalities of [`BlipImageProcessor`] and [`AutoTokenizer`]. See the
    docstring of [`~BlipProcessor.__call__`] and [`~BlipProcessor.decode`] for more information.

    Args:
        image_processor (`BlipImageProcessor`):
            An instance of [`BlipImageProcessor`]. The image processor is a required input.
        tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
        qformer_tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
        num_query_tokens (`int`, *optional*):"
            Number of tokens used by the Qformer as queries, should be same as in model's config.
    )image_processor	tokenizerqformer_tokenizer)ZBlipImageProcessorZBlipImageProcessorFastr   Nc                    sP   t |ds.tdddd| _|j| jgdd n|j| _|| _t ||| d S )Nimage_tokenz<image>FT)
normalizedZspecial)Zspecial_tokens)hasattrr	   r    Z
add_tokensnum_query_tokenssuper__init__)selfr   r   r   r#   kwargs	__class__r   r   r%   J   s    
zInstructBlipProcessor.__init__)imagestextr'   returnc                    s  |du r|du rt d| jtfd| jji|}|d dd}i }|durvt|trb|g}n t|tst|d tst d| j	|fi |d }	|	d|d	< |	d
|d< |d 
ddur|d d  | j8  < | j|fi |d }
|durl| jj| j }d|d d< d|d d< d|d d< | j|fi |d  |
D ]" fdd|
 D |
< qH||
 |dur| j|fi |d }|| t||d}|S )a  
        This method uses [`BlipImageProcessor.__call__`] method to prepare image(s) for the model, and
        [`BertTokenizerFast.__call__`] to prepare text for the model.

        Please refer to the docstring of the above two methods for more information.
        Args:
            images (`ImageInput`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. Both channels-first and channels-last formats are supported.
            text (`TextInput`, `PreTokenizedInput`, `list[TextInput]`, `list[PreTokenizedInput]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
        Nz,You have to specify at least images or text.Ztokenizer_init_kwargsr   return_tensorsr   zAInvalid input text. Please provide a string, or a list of stringsZ	input_idsqformer_input_idsZattention_maskqformer_attention_mask
max_lengthFr   r   Z
truncationc                    s   g | ]}  | qS r   r   ).0sampleZimage_text_encodingkr   r   
<listcomp>       z2InstructBlipProcessor.__call__.<locals>.<listcomp>r   )Ztensor_type)
ValueErrorZ_merge_kwargsr   r   Zinit_kwargspop
isinstancestrlistr   getr#   r    contentupdater   r   )r&   r*   r+   ZaudioZvideosr'   Zoutput_kwargsr-   encodingZqformer_text_encodingZtext_encodingZimage_tokensZimage_encodingr   r3   r   __call__T   sH    


 


zInstructBlipProcessor.__call__c                 C   s$   | j j}| jj}ddg}|| | S )Nr.   r/   )r   model_input_namesr   )r&   Ztokenizer_input_namesZimage_processor_input_namesZqformer_input_namesr   r   r   rA      s    z'InstructBlipProcessor.model_input_namesc                    s   t j|rtd| dt j|dd t j|d}| j| d| jv }|r^| j	d t
 j|fi |}|r|  jdg7  _|S )NzProvided path (z#) should be a directory, not a fileT)exist_okr   )ospathisfiler7   makedirsjoinr   save_pretrained
attributesremover$   )r&   Zsave_directoryr'   Zqformer_tokenizer_pathZqformer_presentoutputsr(   r   r   rH      s    
z%InstructBlipProcessor.save_pretrainedc                    s>   t  j|fi |}t|tr&|d }tj|dd}||_|S )Nr   r   )Z	subfolder)r$   from_pretrainedr9   tupler   r   )clsZpretrained_model_name_or_pathr'   	processorr   r(   r   r   rL      s    
z%InstructBlipProcessor.from_pretrained)N)NNNN)r   r   r   __doc__rI   Zimage_processor_classZtokenizer_classZqformer_tokenizer_classr%   r   r   r   r
   r;   r   r   r   r@   propertyrA   rH   classmethodrL   __classcell__r   r   r(   r   r   2   s*       C
r   )rP   rC   typingr   Zimage_processing_utilsr   Zimage_utilsr   Zprocessing_utilsr   r   r   Ztokenization_utils_baser	   r
   r   utilsr   autor   Z
get_loggerr   loggerr   r   __all__r   r   r   r   <module>   s   
 