a
    hRC                     @   s  d Z ddlZddlmZ ddlZddlZddlmZmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ eeZG dd dejZ G dd dejZ!G dd dejZ"G dd dejZ#G dd dejZ$G dd dejZ%G dd dejZ&eG dd deZ'eG dd de'Z(ed d!G d"d# d#e'Z)ed$d!G d%d& d&e'eZ*g d'Z+dS )(zPyTorch ResNet model.    N)Optional)Tensornn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging)BackboneMixin   )ResNetConfigc                       s<   e Zd Zd
eeeeed fddZeeddd	Z  ZS )ResNetConvLayerr   r   relu)in_channelsout_channelskernel_sizestride
activationc                    sP   t    tj|||||d dd| _t|| _|d urBt| nt | _	d S )N   F)r   r   paddingbias)
super__init__r   Conv2dconvolutionBatchNorm2dnormalizationr	   Identityr   )selfr   r   r   r   r   	__class__ f/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/resnet/modeling_resnet.pyr   *   s    
zResNetConvLayer.__init__inputreturnc                 C   s"   |  |}| |}| |}|S N)r!   r#   r   r%   r+   hidden_stater(   r(   r)   forward4   s    


zResNetConvLayer.forward)r   r   r   )	__name__
__module____qualname__intstrr   r   r0   __classcell__r(   r(   r&   r)   r   )   s
    

r   c                       s6   e Zd ZdZed fddZeedddZ  ZS )ResNetEmbeddingszO
    ResNet Embeddings (stem) composed of a single aggressive convolution.
    configc                    sB   t    t|j|jdd|jd| _tjdddd| _	|j| _d S )N   r   )r   r   r   r   r   )r   r   r   )
r   r   r   num_channelsembedding_size
hidden_actembedderr   Z	MaxPool2dpoolerr%   r9   r&   r(   r)   r   @   s    
zResNetEmbeddings.__init__)pixel_valuesr,   c                 C   s4   |j d }|| jkrtd| |}| |}|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)shaper;   
ValueErrorr>   r?   )r%   rA   r;   Z	embeddingr(   r(   r)   r0   H   s    



zResNetEmbeddings.forward)	r1   r2   r3   __doc__r   r   r   r0   r6   r(   r(   r&   r)   r7   ;   s   r7   c                       s<   e Zd ZdZd	eeed fddZeedddZ  ZS )
ResNetShortCutz
    ResNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r   )r   r   r   c                    s0   t    tj||d|dd| _t|| _d S )Nr   F)r   r   r   )r   r   r   r    r!   r"   r#   )r%   r   r   r   r&   r(   r)   r   Y   s    
zResNetShortCut.__init__r*   c                 C   s   |  |}| |}|S r-   )r!   r#   r.   r(   r(   r)   r0   ^   s    

zResNetShortCut.forward)r   )	r1   r2   r3   rD   r4   r   r   r0   r6   r(   r(   r&   r)   rE   S   s   rE   c                       s6   e Zd ZdZd	eeeed fddZdd Z  ZS )
ResNetBasicLayerzO
    A classic ResNet's residual layer composed by two `3x3` convolutions.
    r   r   )r   r   r   r   c                    sf   t    ||kp|dk}|r,t|||dnt | _tt|||dt||d d| _t	| | _
d S )Nr   r   r   r   r   rE   r   r$   shortcut
Sequentialr   layerr	   r   )r%   r   r   r   r   should_apply_shortcutr&   r(   r)   r   i   s    
zResNetBasicLayer.__init__c                 C   s.   |}|  |}| |}||7 }| |}|S r-   rL   rJ   r   r%   r/   Zresidualr(   r(   r)   r0   u   s    


zResNetBasicLayer.forward)r   r   )	r1   r2   r3   rD   r4   r5   r   r0   r6   r(   r(   r&   r)   rF   d   s   rF   c                       s:   e Zd ZdZdeeeeeed fddZd	d
 Z  Z	S )ResNetBottleNeckLayera  
    A classic ResNet's bottleneck layer composed by three `3x3` convolutions.

    The first `1x1` convolution reduces the input by a factor of `reduction` in order to make the second `3x3`
    convolution faster. The last `1x1` convolution remaps the reduced features to `out_channels`. If
    `downsample_in_bottleneck` is true, downsample will be in the first layer instead of the second layer.
    r   r      F)r   r   r   r   	reductiondownsample_in_bottleneckc           	   
      s   t    ||kp|dk}|| }|r4t|||dnt | _tt||d|rR|nddt|||sf|nddt||dd d| _t	| | _
d S )Nr   rG   )r   r   )r   r   rI   )	r%   r   r   r   r   rR   rS   rM   Zreduces_channelsr&   r(   r)   r      s    	
zResNetBottleNeckLayer.__init__c                 C   s.   |}|  |}| |}||7 }| |}|S r-   rN   rO   r(   r(   r)   r0      s    


zResNetBottleNeckLayer.forward)r   r   rQ   F)
r1   r2   r3   rD   r4   r5   boolr   r0   r6   r(   r(   r&   r)   rP   ~   s       rP   c                       s@   e Zd ZdZd	eeeeed fddZeedddZ  Z	S )
ResNetStagez4
    A ResNet stage composed by stacked layers.
    r   )r9   r   r   r   depthc                    s   t     jdkrtnt jdkr>|| j jd}n|| jd}tj|g fddt	|d D R  | _
d S )NZ
bottleneck)r   r   rS   )r   r   c                    s   g | ]} j d qS )rH   )r=   ).0_r9   rL   r   r(   r)   
<listcomp>       z(ResNetStage.__init__.<locals>.<listcomp>r   )r   r   Z
layer_typerP   rF   r=   rS   r   rK   rangelayers)r%   r9   r   r   r   rV   Zfirst_layerr&   rY   r)   r      s     

zResNetStage.__init__r*   c                 C   s   |}| j D ]}||}q
|S r-   )r]   )r%   r+   r/   rL   r(   r(   r)   r0      s    

zResNetStage.forward)r   r   )
r1   r2   r3   rD   r   r4   r   r   r0   r6   r(   r(   r&   r)   rU      s   	  rU   c                       s8   e Zd Zed fddZd	eeeedddZ  Z	S )
ResNetEncoderr8   c              	      s   t    tg | _| jt||j|jd |j	r6dnd|j
d d t|j|jdd  }t||j
dd  D ]$\\}}}| jt||||d qrd S )Nr   r   r   )r   rV   )rV   )r   r   r   Z
ModuleListstagesappendrU   r<   hidden_sizesZdownsample_in_first_stageZdepthszip)r%   r9   Zin_out_channelsr   r   rV   r&   r(   r)   r      s    
	 zResNetEncoder.__init__FT)r/   output_hidden_statesreturn_dictr,   c                 C   sb   |rdnd }| j D ]}|r$||f }||}q|r<||f }|sVtdd ||fD S t||dS )Nr(   c                 s   s   | ]}|d ur|V  qd S r-   r(   )rW   vr(   r(   r)   	<genexpr>   r[   z(ResNetEncoder.forward.<locals>.<genexpr>)last_hidden_statehidden_states)r_   tupler   )r%   r/   rc   rd   rh   Zstage_moduler(   r(   r)   r0      s    



zResNetEncoder.forward)FT)
r1   r2   r3   r   r   r   rT   r   r0   r6   r(   r(   r&   r)   r^      s    r^   c                   @   s.   e Zd ZU eed< dZdZddgZdd ZdS )	ResNetPreTrainedModelr9   resnetrA   r   rE   c                 C   s   t |tjr"tjj|jddd nt |tjrtjj|jt	dd |j
d urtj|j\}}|dkrxdt	| nd}tj|j
| | n2t |tjtjfrtj|jd tj|j
d d S )NZfan_outr   )modeZnonlinearity   )ar   r   )
isinstancer   r    initZkaiming_normal_weightLinearZkaiming_uniform_mathsqrtr   Z_calculate_fan_in_and_fan_outZuniform_r"   Z	GroupNormZ	constant_)r%   moduleZfan_inrX   boundr(   r(   r)   _init_weights   s    
z#ResNetPreTrainedModel._init_weightsN)	r1   r2   r3   r   __annotations__Zbase_model_prefixZmain_input_nameZ_no_split_modulesrw   r(   r(   r(   r)   rj      s
   
rj   c                       s>   e Zd Z fddZedeee ee edddZ	  Z
S )ResNetModelc                    s>   t  | || _t|| _t|| _td| _	| 
  d S )N)r   r   )r   r   r9   r7   r>   r^   encoderr   ZAdaptiveAvgPool2dr?   	post_initr@   r&   r(   r)   r     s    

zResNetModel.__init__NrA   rc   rd   r,   c                 C   s|   |d ur|n| j j}|d ur |n| j j}| |}| j|||d}|d }| |}|sl||f|dd   S t|||jdS )Nrc   rd   r   r   )rg   pooler_outputrh   )r9   rc   use_return_dictr>   rz   r?   r   rh   )r%   rA   rc   rd   embedding_outputZencoder_outputsrg   pooled_outputr(   r(   r)   r0     s     

zResNetModel.forward)NN)r1   r2   r3   r   r   r   r   rT   r   r0   r6   r(   r(   r&   r)   ry     s   	 ry   z
    ResNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )Zcustom_introc                       sL   e Zd Z fddZedeej eej ee	 ee	 e
dddZ  ZS )ResNetForImageClassificationc                    s^   t  | |j| _t|| _tt |jdkrFt|j	d |jnt
 | _|   d S )Nr   )r   r   
num_labelsry   rk   r   rK   ZFlattenrr   ra   r$   
classifierr{   r@   r&   r(   r)   r   <  s    
$z%ResNetForImageClassification.__init__N)rA   labelsrc   rd   r,   c                 C   sl  |dur|n| j j}| j|||d}|r.|jn|d }| |}d}|dur,| j jdu r| jdkrnd| j _n4| jdkr|jtj	ks|jtj
krd| j _nd| j _| j jdkrt }	| jdkr|	| | }n
|	||}nN| j jdkrt }	|	|d| j|d}n| j jdkr,t }	|	||}|s\|f|dd  }
|durX|f|
 S |
S t|||jd	S )
a0  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr}   r   Z
regressionZsingle_label_classificationZmulti_label_classificationr   r   )losslogitsrh   )r9   r   rk   r~   r   Zproblem_typer   Zdtypetorchlongr4   r   Zsqueezer   viewr   r   rh   )r%   rA   r   rc   rd   outputsr   r   r   Zloss_fctoutputr(   r(   r)   r0   H  s6    



"


z$ResNetForImageClassification.forward)NNNN)r1   r2   r3   r   r   r   r   ZFloatTensorZ
LongTensorrT   r   r0   r6   r(   r(   r&   r)   r   5  s       r   zO
    ResNet backbone, to be used with frameworks like DETR and MaskFormer.
    c                       sB   e Zd ZdZ fddZedeee ee e	dddZ
  ZS )	ResNetBackboneFc                    sH   t  | t  | |jg|j | _t|| _t|| _	| 
  d S r-   )r   r   Z_init_backboner<   ra   Znum_featuresr7   r>   r^   rz   r{   r@   r&   r(   r)   r     s    

zResNetBackbone.__init__Nr|   c                 C   s   |dur|n| j j}|dur |n| j j}| |}| j|ddd}|j}d}t| jD ] \}}	|	| jv rV||| f7 }qV|s|f}
|r|
|jf7 }
|
S t	||r|jndddS )a!  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
        >>> model = AutoBackbone.from_pretrained(
        ...     "microsoft/resnet-50", out_features=["stage1", "stage2", "stage3", "stage4"]
        ... )

        >>> inputs = processor(image, return_tensors="pt")

        >>> outputs = model(**inputs)
        >>> feature_maps = outputs.feature_maps
        >>> list(feature_maps[-1].shape)
        [1, 2048, 7, 7]
        ```NTr}   r(   )feature_mapsrh   Z
attentions)
r9   r   rc   r>   rz   rh   	enumerateZstage_namesZout_featuresr
   )r%   rA   rc   rd   r   r   rh   r   idxZstager   r(   r(   r)   r0     s(    

zResNetBackbone.forward)NN)r1   r2   r3   Zhas_attentionsr   r   r   r   rT   r
   r0   r6   r(   r(   r&   r)   r   {  s    r   )r   ry   rj   r   ),rD   rs   typingr   r   Ztorch.utils.checkpointr   r   Ztorch.nnr   r   r   Zactivationsr	   Zmodeling_outputsr
   r   r   r   Zmodeling_utilsr   utilsr   r   Zutils.backbone_utilsr   Zconfiguration_resnetr   Z
get_loggerr1   loggerModuler   r7   rE   rF   rP   rU   r^   rj   ry   r   r   __all__r(   r(   r(   r)   <module>   sB   
*&)'@G