a
    hx#                     @   s   d Z ddlZddlmZmZ ddlmZmZmZ ddl	m
Z
mZ ddlmZmZmZmZmZmZ ddlmZ dd	lmZmZmZmZmZ d
dlmZ e rddlZe rddlmZ  ne rddl!mZ  G dd deZ"eG dd deZ#dgZ$dS )z%Fast Image processor class for OWLv2.    N)OptionalUnion   )BaseImageProcessorFastBatchFeatureDefaultFastImageProcessorKwargs)group_images_by_shapereorder_images)OPENAI_CLIP_MEANOPENAI_CLIP_STDChannelDimension
ImageInputPILImageResamplingSizeDict)Unpack)
TensorTypeauto_docstringis_torch_availableis_torchvision_availableis_torchvision_v2_available   )OwlViTImageProcessorFast)
functionalc                   @   s   e Zd ZU dZee ed< dS )Owlv2FastImageProcessorKwargsa  
    do_pad (`bool`, *optional*, defaults to `True`):
        Controls whether to pad the image. Can be overridden by the `do_pad` parameter in the `preprocess`
        method. If `True`, padding will be applied to the bottom and right of the image with grey pixels.
    do_padN)__name__
__module____qualname____doc__r   bool__annotations__ r!   r!   c/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/owlv2/modular_owlv2.pyr   7   s   
r   c                   @   s  e Zd ZejZeZeZ	dddZ
dZdZdZdZdZeZdZdZee dddZeeee d	d
dZddeddddZded ee eed dddZddeeddddZ ed eeed eeeeee!eee f  ee!eee f  ee ee!e"e#f  e$dddZ%dS )Owlv2ImageProcessorFasti  )heightwidthgp?TN)kwargsc                 K   s   t j| fi | d S N)r   __init__)selfr&   r!   r!   r"   r(   P   s    z Owlv2ImageProcessorFast.__init__)imagesr&   c                 K   s   t j| |fi |S r'   )r   
preprocess)r)   r*   r&   r!   r!   r"   r+   S   s    z"Owlv2ImageProcessorFast.preprocess      ?ztorch.Tensor)r*   constant_valuereturnc           
      C   sL   |j dd \}}t||}|| }|| }dd||f}tj|||d}	|	S )z<
        Pad an image with zeros to the given size.
        Nr   )fill)shapemaxFpad)
r)   r*   r-   r$   r%   sizeZ
pad_bottom	pad_rightpaddingZpadded_imager!   r!   r"   _pad_imagesW   s    
z#Owlv2ImageProcessorFast._pad_images)r*   disable_groupingr-   r.   c           
      C   sJ   t ||d\}}i }| D ]\}}| j||d}|||< qt||}	|	S )Nr9   )r-   )r   itemsr8   r	   )
r)   r*   r9   r-   grouped_imagesgrouped_images_indexprocessed_images_groupedr1   stacked_imagesprocessed_imagesr!   r!   r"   r4   d   s    

zOwlv2ImageProcessorFast.pad)imager5   anti_aliasingr.   c                 K   s  |j |jf}|j}t|dd |jt||j }|r|du r`|d d jdd}nLt|t	| }t
|dk rtdn t
|dk|dk@ rtd t
|dkr|}	n:dtd|   d }
tj||
d |
d f| d	}	n|}	tj|	|j |jfd
d}|S )az  
        Resize an image as per the original implementation.

        Args:
            image (`Tensor`):
                Image to resize.
            size (`dict[str, int]`):
                Dictionary containing the height and width to resize the image to.
            anti_aliasing (`bool`, *optional*, defaults to `True`):
                Whether to apply anti-aliasing when downsampling the image.
            anti_aliasing_sigma (`float`, *optional*, defaults to `None`):
                Standard deviation for Gaussian kernel when downsampling the image. If `None`, it will be calculated
                automatically.
        r   N   r   )minzFAnti-aliasing standard deviation must be greater than or equal to zerozWAnti-aliasing standard deviation greater than zero but not down-sampling along all axesr   )sigmaF)r5   Z	antialias)r$   r%   r1   torchZtensortoZdeviceclampZ
atleast_1dZ	ones_likeany
ValueErrorwarningswarnceilintr3   Zgaussian_blurtolistresize)r)   rA   r5   rB   Zanti_aliasing_sigmar&   Zoutput_shapeZinput_shapeZfactorsfilteredZkernel_sizesoutr!   r!   r"   rP   w   s,    ,
zOwlv2ImageProcessorFast.resizezF.InterpolationMode)r*   	do_resizer5   interpolationr   
do_rescalerescale_factordo_normalize
image_mean	image_stdr9   return_tensorsr.   c              	   K   s"  t ||d\}}i }| D ]$\}}| |||d|	|
}|||< qt||}|r^| j||d}t ||d\}}i }| D ](\}}|rz| j|||tjd}|||< qzt||}t ||d\}}i }| D ]$\}}| |d|||	|
}|||< qt||}|rtj	|ddn|}t
d|i|dS )Nr:   F)rA   r5   rT   Zinput_data_formatr   )dimZpixel_values)dataZtensor_type)r   r;   Zrescale_and_normalizer	   r4   rP   r   ZFIRSTrF   stackr   )r)   r*   rS   r5   rT   r   rU   rV   rW   rX   rY   r9   rZ   r&   r<   r=   r>   r1   r?   r@   Zresized_images_groupedZresized_stackZresized_imagesr!   r!   r"   _preprocess   sD    






z#Owlv2ImageProcessorFast._preprocess)r,   )r,   )TN)&r   r   r   r   ZBILINEARZresampler
   rX   r   rY   r5   rV   rS   rU   rW   r   r   Zvalid_kwargsZ	crop_sizeZdo_center_cropr   r(   r   r   r+   floatr8   listr   r   r4   r   rP   r   strr   r   r^   r!   r!   r!   r"   r#   A   sZ   
   :r#   )%r   rK   typingr   r   Zimage_processing_utils_fastr   r   r   Zimage_transformsr   r	   Zimage_utilsr
   r   r   r   r   r   Zprocessing_utilsr   utilsr   r   r   r   r   Z#owlvit.image_processing_owlvit_fastr   rF   Ztorchvision.transforms.v2r   r3   Ztorchvision.transformsr   r#   __all__r!   r!   r!   r"   <module>   s&    
 /