a
    hG0                     @   s,  d dl Z d dlmZ d dlmZmZmZ ddlmZ ddl	m
Z
mZ ddlmZmZ ddlmZmZmZmZ dd	lmZmZmZmZmZmZ d
dlmZ erddlmZ e rd dlZe rd dl m!Z" ne rd dl#m!Z" dee$ee$ f e%e$edddZ&G dd deZ'eG dd deZ(dgZ)dS )    N)Iterable)TYPE_CHECKINGOptionalUnion   )BatchFeature)BaseImageProcessorFastDefaultFastImageProcessorKwargs)group_images_by_shapereorder_images)IMAGENET_STANDARD_MEANIMAGENET_STANDARD_STDPILImageResamplingSizeDict)
TensorTypeauto_docstringis_torch_availableis_torchvision_availableis_torchvision_v2_availablerequires_backends   )BeitImageProcessorFast)DepthEstimatorOutput)
functionaltorch.Tensor)input_imageoutput_sizekeep_aspect_ratiomultiplereturnc                 C   s   ddd}| j dd  \}}|\}}|| }	|| }
|rZtd|
 td|	 k rV|
}	n|	}
||	| |d}||
| |d}t||dS )	Nr   c                 S   sP   t | | | }|d ur2||kr2t| | | }||k rLt| | | }|S )N)roundmathfloorceil)valr   Zmin_valZmax_valx r&   _/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/dpt/modular_dpt.pyconstrain_to_multiple_of;   s    z>get_resize_output_image_size.<locals>.constrain_to_multiple_of   )r   heightwidth)r   N)shapeabsr   )r   r   r   r   r(   Zinput_heightZinput_widthZoutput_heightZoutput_widthZscale_heightZscale_widthZ
new_heightZ	new_widthr&   r&   r'   get_resize_output_image_size5   s    
r0   c                   @   sN   e Zd ZU dZee ed< ee ed< ee ed< ee ed< ee ed< dS )DPTFastImageProcessorKwargsa  
    ensure_multiple_of (`int`, *optional*, defaults to 1):
        If `do_resize` is `True`, the image is resized to a size that is a multiple of this value. Can be overidden
        by `ensure_multiple_of` in `preprocess`.
    do_pad (`bool`, *optional*, defaults to `False`):
        Whether to apply center padding. This was introduced in the DINOv2 paper, which uses the model in
        combination with DPT.
    size_divisor (`int`, *optional*):
        If `do_pad` is `True`, pads the image dimensions to be divisible by this value. This was introduced in the
        DINOv2 paper, which uses the model in combination with DPT.
    keep_aspect_ratio (`bool`, *optional*, defaults to `False`):
        If `True`, the image is resized to the largest possible size such that the aspect ratio is preserved. Can
        be overidden by `keep_aspect_ratio` in `preprocess`.
    do_reduce_labels (`bool`, *optional*, defaults to `self.do_reduce_labels`):
        Whether or not to reduce all label values of segmentation maps by 1. Usually used for datasets where 0
        is used for background, and background itself is not included in all classes of a dataset (e.g.
        ADE20k). The background label will be replaced by 255.
    ensure_multiple_ofsize_divisordo_padr   do_reduce_labelsN)__name__
__module____qualname____doc__r   int__annotations__boolr&   r&   r&   r'   r1   \   s   
r1   c                   @   s4  e Zd ZejZeZeZ	dddZ
dZdZdZdZdZdZdZdZdZdZdZeZdded	eee edd
ddZddeddddZed eeeed	 eeeeeeeeee f  eeeee f  eee eee ee eee e!f  e"dddZ#ddeee!ee$eef  df  ee%e e!f  dddZ&dS )DPTImageProcessorFasti  r+   TFgp?r*   Nr   zF.InterpolationMode)imagesizeinterpolation	antialiasr2   r   r   c                 C   sJ   |j r|jstd|  t||j |jf||d}tj| ||||dS )a<  
        Resize an image to `(size["height"], size["width"])`.

        Args:
            image (`torch.Tensor`):
                Image to resize.
            size (`SizeDict`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            interpolation (`InterpolationMode`, *optional*, defaults to `InterpolationMode.BILINEAR`):
                `InterpolationMode` filter to use when resizing the image e.g. `InterpolationMode.BICUBIC`.
            antialias (`bool`, *optional*, defaults to `True`):
                Whether to use antialiasing when resizing the image
            ensure_multiple_of (`int`, *optional*):
                If `do_resize` is `True`, the image is resized to a size that is a multiple of this value
            keep_aspect_ratio (`bool`, *optional*, defaults to `False`):
                If `True`, and `do_resize` is `True`, the image is resized to the largest possible size such that the aspect ratio is preserved.

        Returns:
            `torch.Tensor`: The resized image.
        zDThe size dictionary must contain the keys 'height' and 'width'. Got )r   r   r   )r@   rA   )r,   r-   
ValueErrorkeysr0   r   resize)selfr>   r?   r@   rA   r2   r   r   r&   r&   r'   rD      s    

zDPTImageProcessorFast.resize)r>   r3   r   c                 C   sN   |j dd \}}dd }|||\}}|||\}}	|||	|f}
t||
S )a  
        Center pad a batch of images to be a multiple of `size_divisor`.

        Args:
            image (`torch.Tensor`):
                Image to pad.  Can be a batch of images of dimensions (N, C, H, W) or a single image of dimensions (C, H, W).
            size_divisor (`int`):
                The width and height of the image will be padded to a multiple of this number.
        r)   Nc                 S   s2   t | | | }||  }|d }|| }||fS )Nr   )r!   r#   )r?   r3   new_sizeZpad_sizeZpad_size_leftZpad_size_rightr&   r&   r'   _get_pad   s
    z1DPTImageProcessorFast.pad_image.<locals>._get_pad)r.   Fpad)rE   r>   r3   r,   r-   rG   pad_topZ
pad_bottompad_left	pad_rightpaddingr&   r&   r'   	pad_image   s    zDPTImageProcessorFast.pad_image)imagesr5   	do_resizer?   r@   do_center_crop	crop_size
do_rescalerescale_factordo_normalize
image_mean	image_stdr   r2   r4   r3   disable_groupingreturn_tensorsr   c              	   K   s   |r|  |}t||d\}}i }| D ](\}}|rJ| j|||||d}|||< q*t||}t||d\}}i }| D ]D\}}|r| ||}|r| ||}| |||	|
||}|||< qzt||}|rtj	|ddn|}t
d|idS )N)rX   )r>   r?   r@   r2   r   r   )dimZpixel_values)data)Zreduce_labelr
   itemsrD   r   Zcenter_croprN   Zrescale_and_normalizetorchstackr   )rE   rO   r5   rP   r?   r@   rQ   rR   rS   rT   rU   rV   rW   r   r2   r4   r3   rX   rY   kwargsZgrouped_imagesZgrouped_images_indexZresized_images_groupedr.   Zstacked_imagesZresized_imagesZprocessed_images_groupedZprocessed_imagesr&   r&   r'   _preprocess   s:    




z!DPTImageProcessorFast._preprocessr   )outputstarget_sizesr   c                 C   s   t | d |j}|dur0t|t|kr0tdg }|du rJdgt| n|}t||D ]D\}}|durtjjj|	d	d|ddd
 }|d	|i qX|S )
a  
        Converts the raw output of [`DepthEstimatorOutput`] into final depth predictions and depth PIL images.
        Only supports PyTorch.

        Args:
            outputs ([`DepthEstimatorOutput`]):
                Raw outputs of the model.
            target_sizes (`TensorType` or `List[Tuple[int, int]]`, *optional*):
                Tensor of shape `(batch_size, 2)` or list of tuples (`Tuple[int, int]`) containing the target size
                (height, width) of each image in the batch. If left to None, predictions will not be resized.

        Returns:
            `List[Dict[str, TensorType]]`: A list of dictionaries of tensors representing the processed depth
            predictions.
        r]   Nz]Make sure that you pass in as many target sizes as the batch dimension of the predicted depthr   r*   ZbicubicF)r?   modeZalign_cornerspredicted_depth)r   rd   lenrB   zipr]   nnr   ZinterpolateZ	unsqueezeZsqueezeappend)rE   ra   rb   rd   resultsdepthZtarget_sizer&   r&   r'   post_process_depth_estimation  s    

z3DPTImageProcessorFast.post_process_depth_estimation)NTr*   F)r*   )N)'r6   r7   r8   r   ZBICUBICZresampler   rV   r   rW   r?   rP   rS   rU   r4   rT   r2   r   r5   rR   rQ   r1   Zvalid_kwargsr   r<   r   r:   rD   rN   listfloatr   strr   r   r`   tupledictrk   r&   r&   r&   r'   r=   w   sz   
    - > r=   )*r!   collections.abcr   typingr   r   r   Zimage_processing_baser   Zimage_processing_utils_fastr   r	   Zimage_transformsr
   r   Zimage_utilsr   r   r   r   utilsr   r   r   r   r   r   Zbeit.image_processing_beit_fastr   Zmodeling_outputsr   r]   Ztorchvision.transforms.v2r   rH   Ztorchvision.transformsr:   r<   r0   r1   r=   __all__r&   r&   r&   r'   <module>   s6    ' ?