a
    h;                     @   s|  d Z ddlZddlmZ ddlZddlZddlmZmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZmZ ddlmZ eeZG dd dejZG dd dejZG dd dejZG dd dejZ G dd dejZ!G dd dejZ"G dd dejZ#G dd dejZ$eG dd deZ%eG dd  d e%Z&ed!d"G d#d$ d$e%Z'g d%Z(dS )&zPyTorch RegNet model.    N)Optional)Tensornn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging   )RegNetConfigc                       s:   e Zd Zd	eeeeeee d fddZdd Z  ZS )
RegNetConvLayerr   r   relu)in_channelsout_channelskernel_sizestridegroups
activationc              	      sR   t    tj|||||d |dd| _t|| _|d urDt| nt | _	d S )N   F)r   r   paddingr   bias)
super__init__r   Conv2dconvolutionBatchNorm2dnormalizationr	   Identityr   )selfr   r   r   r   r   r   	__class__ f/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/regnet/modeling_regnet.pyr   (   s    	
	zRegNetConvLayer.__init__c                 C   s"   |  |}| |}| |}|S N)r    r"   r   r$   hidden_stater'   r'   r(   forward>   s    


zRegNetConvLayer.forward)r   r   r   r   )	__name__
__module____qualname__intr   strr   r,   __classcell__r'   r'   r%   r(   r   '   s       r   c                       s.   e Zd ZdZed fddZdd Z  ZS )RegNetEmbeddingszO
    RegNet Embeddings (stem) composed of a single aggressive convolution.
    configc                    s0   t    t|j|jdd|jd| _|j| _d S )Nr   r   )r   r   r   )r   r   r   num_channelsembedding_size
hidden_actembedderr$   r5   r%   r'   r(   r   J   s
    
zRegNetEmbeddings.__init__c                 C   s*   |j d }|| jkrtd| |}|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)shaper6   
ValueErrorr9   )r$   pixel_valuesr6   r+   r'   r'   r(   r,   Q   s    


zRegNetEmbeddings.forward)r-   r.   r/   __doc__r   r   r,   r2   r'   r'   r%   r(   r3   E   s   r3   c                       s<   e Zd ZdZd	eeed fddZeedddZ  ZS )
RegNetShortCutz
    RegNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r   )r   r   r   c                    s0   t    tj||d|dd| _t|| _d S )Nr   F)r   r   r   )r   r   r   r   r    r!   r"   )r$   r   r   r   r%   r'   r(   r   b   s    
zRegNetShortCut.__init__)inputreturnc                 C   s   |  |}| |}|S r)   )r    r"   )r$   r@   r+   r'   r'   r(   r,   g   s    

zRegNetShortCut.forward)r   )	r-   r.   r/   r>   r0   r   r   r,   r2   r'   r'   r%   r(   r?   \   s   r?   c                       s0   e Zd ZdZeed fddZdd Z  ZS )RegNetSELayerz
    Squeeze and Excitation layer (SE) proposed in [Squeeze-and-Excitation Networks](https://huggingface.co/papers/1709.01507).
    )r   reduced_channelsc              	      sL   t    td| _ttj||ddt tj||ddt | _	d S )Nr   r   r   )r   )
r   r   r   AdaptiveAvgPool2dpooler
Sequentialr   ZReLUZSigmoid	attention)r$   r   rC   r%   r'   r(   r   r   s    
zRegNetSELayer.__init__c                 C   s    |  |}| |}|| }|S r)   )rF   rH   )r$   r+   ZpooledrH   r'   r'   r(   r,   }   s    

zRegNetSELayer.forward)r-   r.   r/   r>   r0   r   r,   r2   r'   r'   r%   r(   rB   m   s   rB   c                       s6   e Zd ZdZdeeeed fddZdd Z  ZS )	RegNetXLayerzt
    RegNet's layer composed by three `3x3` convolutions, same as a ResNet bottleneck layer with reduction = 1.
    r   r5   r   r   r   c              
      s   t    ||kp|dk}td||j }|r<t|||dnt | _tt	||d|j
dt	|||||j
dt	||dd d| _t|j
 | _d S )Nr   r   r   r   r   r   r   )r   r   maxgroups_widthr?   r   r#   shortcutrG   r   r8   layerr	   r   r$   r5   r   r   r   Zshould_apply_shortcutr   r%   r'   r(   r      s    
zRegNetXLayer.__init__c                 C   s.   |}|  |}| |}||7 }| |}|S r)   rQ   rP   r   r$   r+   Zresidualr'   r'   r(   r,      s    


zRegNetXLayer.forward)r   	r-   r.   r/   r>   r   r0   r   r,   r2   r'   r'   r%   r(   rI      s   rI   c                       s6   e Zd ZdZdeeeed fddZdd Z  ZS )	RegNetYLayerzC
    RegNet's Y layer: an X layer with Squeeze and Excitation.
    r   rJ   c                    s   t    ||kp|dk}td||j }|r<t|||dnt | _tt	||d|j
dt	|||||j
dt|tt|d dt	||dd d| _t|j
 | _d S )Nr   rK   rL   rM      )rC   )r   r   rN   rO   r?   r   r#   rP   rG   r   r8   rB   r0   roundrQ   r	   r   rR   r%   r'   r(   r      s    
zRegNetYLayer.__init__c                 C   s.   |}|  |}| |}||7 }| |}|S r)   rS   rT   r'   r'   r(   r,      s    


zRegNetYLayer.forward)r   rU   r'   r'   r%   r(   rV      s   rV   c                       s8   e Zd ZdZdeeeeed fddZdd Z  ZS )	RegNetStagez4
    A RegNet stage composed by stacked layers.
    r   )r5   r   r   r   depthc                    sZ   t     jdkrtnttj ||dg fddt|d D R  | _d S )NxrK   c                    s   g | ]} qS r'   r'   ).0_r5   rQ   r   r'   r(   
<listcomp>       z(RegNetStage.__init__.<locals>.<listcomp>r   )	r   r   Z
layer_typerI   rV   r   rG   rangelayers)r$   r5   r   r   r   rZ   r%   r^   r(   r      s    
zRegNetStage.__init__c                 C   s   |  |}|S r)   )rb   r*   r'   r'   r(   r,      s    
zRegNetStage.forward)r   r   rU   r'   r'   r%   r(   rY      s   	  rY   c                       s8   e Zd Zed fddZd	eeeedddZ  Z	S )
RegNetEncoderr4   c              	      s   t    tg | _| jt||j|jd |j	r6dnd|j
d d t|j|jdd  }t||j
dd  D ]$\\}}}| jt||||d qrd S )Nr   r   r   )r   rZ   )rZ   )r   r   r   Z
ModuleListstagesappendrY   r7   hidden_sizesZdownsample_in_first_stageZdepthszip)r$   r5   Zin_out_channelsr   r   rZ   r%   r'   r(   r      s    
	 zRegNetEncoder.__init__FT)r+   output_hidden_statesreturn_dictrA   c                 C   sb   |rdnd }| j D ]}|r$||f }||}q|r<||f }|sVtdd ||fD S t||dS )Nr'   c                 s   s   | ]}|d ur|V  qd S r)   r'   )r\   vr'   r'   r(   	<genexpr>   r`   z(RegNetEncoder.forward.<locals>.<genexpr>)last_hidden_statehidden_states)rd   tupler
   )r$   r+   rh   ri   rm   Zstage_moduler'   r'   r(   r,      s    



zRegNetEncoder.forward)FT)
r-   r.   r/   r   r   r   boolr
   r,   r2   r'   r'   r%   r(   rc      s    rc   c                   @   s,   e Zd ZU eed< dZdZdgZdd ZdS )RegNetPreTrainedModelr5   regnetr=   rV   c                 C   s   t |tjr"tjj|jddd nt |tjrtjj|jt	dd |j
d urtj|j\}}|dkrxdt	| nd}tj|j
| | n2t |tjtjfrtj|jd tj|j
d d S )NZfan_outr   )modeZnonlinearity   )ar   r   )
isinstancer   r   initZkaiming_normal_weightLinearZkaiming_uniform_mathsqrtr   Z_calculate_fan_in_and_fan_outZuniform_r!   Z	GroupNormZ	constant_)r$   moduleZfan_inr]   boundr'   r'   r(   _init_weights  s    
z#RegNetPreTrainedModel._init_weightsN)	r-   r.   r/   r   __annotations__Zbase_model_prefixZmain_input_nameZ_no_split_modulesr}   r'   r'   r'   r(   rp     s
   
rp   c                       s>   e Zd Z fddZedeee ee edddZ	  Z
S )RegNetModelc                    s>   t  | || _t|| _t|| _td| _	| 
  d S )NrD   )r   r   r5   r3   r9   rc   encoderr   rE   rF   	post_initr:   r%   r'   r(   r     s    

zRegNetModel.__init__N)r=   rh   ri   rA   c                 C   s|   |d ur|n| j j}|d ur |n| j j}| |}| j|||d}|d }| |}|sl||f|dd   S t|||jdS )Nrh   ri   r   r   )rl   pooler_outputrm   )r5   rh   use_return_dictr9   r   rF   r   rm   )r$   r=   rh   ri   Zembedding_outputZencoder_outputsrl   pooled_outputr'   r'   r(   r,   (  s     

zRegNetModel.forward)NN)r-   r.   r/   r   r   r   r   ro   r   r,   r2   r'   r'   r%   r(   r     s   	 r   z
    RegNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )Zcustom_introc                       sL   e Zd Z fddZedeej eej ee	 ee	 e
dddZ  ZS )RegNetForImageClassificationc                    s^   t  | |j| _t|| _tt |jdkrFt|j	d |jnt
 | _|   d S )Nr   )r   r   
num_labelsr   rq   r   rG   ZFlattenrx   rf   r#   
classifierr   r:   r%   r'   r(   r   M  s    
$z%RegNetForImageClassification.__init__N)r=   labelsrh   ri   rA   c                 C   sl  |dur|n| j j}| j|||d}|r.|jn|d }| |}d}|dur,| j jdu r| jdkrnd| j _n4| jdkr|jtj	ks|jtj
krd| j _nd| j _| j jdkrt }	| jdkr|	| | }n
|	||}nN| j jdkrt }	|	|d| j|d}n| j jdkr,t }	|	||}|s\|f|dd  }
|durX|f|
 S |
S t|||jd	S )
a0  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   Z
regressionZsingle_label_classificationZmulti_label_classificationr   r   )losslogitsrm   )r5   r   rq   r   r   Zproblem_typer   Zdtypetorchlongr0   r   Zsqueezer   viewr   r   rm   )r$   r=   r   rh   ri   outputsr   r   r   Zloss_fctoutputr'   r'   r(   r,   Y  s6    



"


z$RegNetForImageClassification.forward)NNNN)r-   r.   r/   r   r   r   r   ZFloatTensorZ
LongTensorro   r   r,   r2   r'   r'   r%   r(   r   E  s       r   )r   r   rp   ))r>   ry   typingr   r   Ztorch.utils.checkpointr   r   Ztorch.nnr   r   r   Zactivationsr	   Zmodeling_outputsr
   r   r   Zmodeling_utilsr   utilsr   r   Zconfiguration_regnetr   Z
get_loggerr-   loggerModuler   r3   r?   rB   rI   rV   rY   rc   rp   r   r   __all__r'   r'   r'   r(   <module>   s:   
!&'@