a
    hzS                     @   s  d Z ddlZddlmZmZ ddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZmZ ddlmZ eeZeedddZd-eeef edddZ G dd dej!Z"G dd dej#Z$G dd dej!Z%G dd dej!Z&G dd dej!Z'G dd dej!Z(G d d! d!ej!Z)G d"d# d#ej!Z*eG d$d% d%eZ+eG d&d' d'e+Z,ed(d)G d*d+ d+e+Z-g d,Z.dS ).zPyTorch EfficientNet model.    N)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging   )EfficientNetConfig)confignum_channelsc                 C   sJ   | j }|| j9 }t|t||d  | | }|d| k rB||7 }t|S )z<
    Round number of filters based on depth multiplier.
       g?)Zdepth_divisorZwidth_coefficientmaxint)r   r   ZdivisorZnew_dim r   r/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/efficientnet/modeling_efficientnet.pyround_filters'   s    
r   T)kernel_sizeadjustc                 C   sr   t | tr| | f} | d d | d d f}|rR|d d |d |d d |d fS |d |d |d |d fS dS )aJ  
    Utility function to get the tuple padding value for the depthwise convolution.

    Args:
        kernel_size (`int` or `tuple`):
            Kernel size of the convolution layers.
        adjust (`bool`, *optional*, defaults to `True`):
            Adjusts padding value to apply to right and bottom sides of the input.
    r   r   r   N)
isinstancer   )r   r   Zcorrectr   r   r   correct_pad6   s    

$r   c                       s:   e Zd ZdZed fddZejejdddZ  Z	S )EfficientNetEmbeddingszL
    A module that corresponds to the stem module of the original work.
    r   c                    sh   t    t|d| _tjdd| _tj|j| jddddd| _	tj
| j|j|jd	| _t|j | _d S )
N    )r   r   r   r   paddingr   r   validFr   strider"   bias)epsmomentum)super__init__r   out_dimr   	ZeroPad2dr"   Conv2dr   convolutionBatchNorm2dbatch_norm_epsbatch_norm_momentum	batchnormr	   
hidden_act
activationselfr   	__class__r   r   r*   O   s    
zEfficientNetEmbeddings.__init__)pixel_valuesreturnc                 C   s,   |  |}| |}| |}| |}|S N)r"   r.   r2   r4   )r6   r9   featuresr   r   r   forwardZ   s
    



zEfficientNetEmbeddings.forward)
__name__
__module____qualname____doc__r   r*   torchTensorr=   __classcell__r   r   r7   r   r   J   s   r   c                       s   e Zd Zd fdd	Z  ZS )	EfficientNetDepthwiseConv2dr   r   r   Tzerosc	           
         s*   || }	t  j||	|||||||d	 d S )N)	in_channelsout_channelsr   r%   r"   dilationgroupsr&   padding_mode)r)   r*   )
r6   rG   Zdepth_multiplierr   r%   r"   rI   r&   rK   rH   r7   r   r   r*   d   s    z$EfficientNetDepthwiseConv2d.__init__)r   r   r   r   r   TrF   )r>   r?   r@   r*   rD   r   r   r7   r   rE   c   s          rE   c                       s@   e Zd ZdZeeeed fddZejej	dddZ
  ZS )EfficientNetExpansionLayerz_
    This corresponds to the expansion phase of each block in the original implementation.
    r   in_dimr+   r%   c                    sB   t    tj||dddd| _tj||jd| _t|j	 | _
d S )Nr   sameFrG   rH   r   r"   r&   )num_featuresr'   )r)   r*   r   r-   expand_convr/   r0   	expand_bnr	   r3   
expand_act)r6   r   rN   r+   r%   r7   r   r   r*      s    
z#EfficientNetExpansionLayer.__init__hidden_statesr:   c                 C   s"   |  |}| |}| |}|S r;   )rR   rS   rT   r6   rV   r   r   r   r=      s    


z"EfficientNetExpansionLayer.forward)r>   r?   r@   rA   r   r   r*   rB   FloatTensorrC   r=   rD   r   r   r7   r   rL   }   s   rL   c                       sB   e Zd ZdZeeeeed fddZej	ej
dddZ  ZS )EfficientNetDepthwiseLayerzk
    This corresponds to the depthwise convolution phase of each block in the original implementation.
    r   rN   r%   r   adjust_paddingc                    sv   t    || _| jdkrdnd}t||d}tj|d| _t||||dd| _tj	||j
|jd| _t|j | _d S )	Nr   r#   rO   )r   r!   Fr$   rQ   r'   r(   )r)   r*   r%   r   r   r,   depthwise_conv_padrE   depthwise_convr/   r0   r1   depthwise_normr	   r3   depthwise_act)r6   r   rN   r%   r   r[   Zconv_padr"   r7   r   r   r*      s    


z#EfficientNetDepthwiseLayer.__init__rU   c                 C   s6   | j dkr| |}| |}| |}| |}|S )Nr   )r%   r]   r^   r_   r`   rW   r   r   r   r=      s    




z"EfficientNetDepthwiseLayer.forwardr>   r?   r@   rA   r   r   boolr*   rB   rX   rC   r=   rD   r   r   r7   r   rY      s   rY   c                       sB   e Zd ZdZd	eeeed fddZej	ej
dddZ  ZS )
EfficientNetSqueezeExciteLayerzl
    This corresponds to the Squeeze and Excitement phase of each block in the original implementation.
    Fr   rN   
expand_dimexpandc                    s   t    |r|n|| _tdt||j | _tjdd| _	tj
| j| jddd| _tj
| j| jddd| _t|j | _t | _d S )Nr   )Zoutput_sizerO   )rG   rH   r   r"   )r)   r*   dimr   r   Zsqueeze_expansion_ratioZdim_ser   ZAdaptiveAvgPool2dsqueezer-   reducerf   r	   r3   
act_reduceZSigmoid
act_expand)r6   r   rN   re   rf   r7   r   r   r*      s$    
z'EfficientNetSqueezeExciteLayer.__init__rU   c                 C   sF   |}|  |}| |}| |}| |}| |}t||}|S r;   )rh   ri   rj   rf   rk   rB   mul)r6   rV   inputsr   r   r   r=      s    




z&EfficientNetSqueezeExciteLayer.forward)Fra   r   r   r7   r   rc      s   rc   c                       sH   e Zd ZdZeeeeeed fddZe	j
e	j
e	jdddZ  ZS )EfficientNetFinalBlockLayerz[
    This corresponds to the final phase of each block in the original implementation.
    r   rN   r+   r%   	drop_rateid_skipc                    sX   t    |dko| | _tj||dddd| _tj||j|jd| _	tj
|d| _d S )Nr   rO   FrP   r\   p)r)   r*   apply_dropoutr   r-   project_convr/   r0   r1   
project_bnDropoutdropout)r6   r   rN   r+   r%   rp   rq   r7   r   r   r*      s    

z$EfficientNetFinalBlockLayer.__init__)
embeddingsrV   r:   c                 C   s0   |  |}| |}| jr,| |}|| }|S r;   )ru   rv   rt   rx   )r6   ry   rV   r   r   r   r=      s    


z#EfficientNetFinalBlockLayer.forwardr>   r?   r@   rA   r   r   floatrb   r*   rB   rX   rC   r=   rD   r   r   r7   r   rn      s   rn   c                
       sJ   e Zd ZdZeeeeeeeeed	 fddZe	j
e	jdddZ  ZS )EfficientNetBlocka  
    This corresponds to the expansion and depthwise convolution phase of each block in the original implementation.

    Args:
        config ([`EfficientNetConfig`]):
            Model configuration class.
        in_dim (`int`):
            Number of input channels.
        out_dim (`int`):
            Number of output channels.
        stride (`int`):
            Stride size to be used in convolution layers.
        expand_ratio (`int`):
            Expand ratio to set the output dimensions for the expansion and squeeze-excite layers.
        kernel_size (`int`):
            Kernel size for the depthwise convolution layer.
        drop_rate (`float`):
            Dropout rate to be used in the final phase of each block.
        id_skip (`bool`):
            Whether to apply dropout and sum the final hidden states with the input embeddings during the final phase
            of each block. Set to `True` for the first block of each stage.
        adjust_padding (`bool`):
            Whether to apply padding to only right and bottom side of the input kernel before the depthwise convolution
            operation, set to `True` for inputs with odd input sizes.
    )	r   rN   r+   r%   expand_ratior   rp   rq   r[   c
                    s   t    || _| jdk| _|| }
| jr<t|||
|d| _t|| jrJ|
n||||	d| _t|||
| jd| _	t
|| jr||
n|||||d| _d S )Nr   rM   rZ   rd   ro   )r)   r*   r}   rf   rL   	expansionrY   r^   rc   squeeze_excitern   
projection)r6   r   rN   r+   r%   r}   r   rp   rq   r[   Zexpand_in_dimr7   r   r   r*   !  s4    

zEfficientNetBlock.__init__rU   c                 C   s<   |}| j dkr| |}| |}| |}| ||}|S )Nr   )r}   r~   r^   r   r   )r6   rV   ry   r   r   r   r=   J  s    



zEfficientNetBlock.forwardrz   r   r   r7   r   r|     s   )r|   c                       sF   e Zd ZdZed fddZd
ejee	 ee	 e
ddd	Z  ZS )EfficientNetEncoderz
    Forward propagates the embeddings through each EfficientNet block.

    Args:
        config ([`EfficientNetConfig`]):
            Model configuration class.
    r   c                    sn  t    |_|j_fdd t|j}t fdd|jD }d}g }t|D ]}t	||j| }t	||j
| }|j| }	|j| }
|j| }t |j| D ]p}|dk}|dkrdn|	}	|dkr|n|}||jv}|j| | }t||||	|
||||d	}|| |d7 }qqVt|_tj|t	|ddd	d
d_tj|j|j|jd_t|j _d S )Nc                    s   t t j|  S r;   )r   mathceildepth_coefficient)Zrepeats)r6   r   r   round_repeatse  s    z3EfficientNetEncoder.__init__.<locals>.round_repeatsc                 3   s   | ]} |V  qd S r;   r   ).0n)r   r   r   	<genexpr>j      z/EfficientNetEncoder.__init__.<locals>.<genexpr>r   r   )	r   rN   r+   r%   r   r}   rp   rq   r[   i   rO   FrP   r\   )r)   r*   r   r   lenrG   sumZnum_block_repeatsranger   rH   stridesZkernel_sizesZexpand_ratiosZdepthwise_paddingZdrop_connect_rater|   appendr   Z
ModuleListblocksr-   top_convr/   
hidden_dimr0   r1   top_bnr	   r3   top_activation)r6   r   Znum_base_blocksZ
num_blocksZcurr_block_numr   irN   r+   r%   r   r}   jrq   r[   rp   blockr7   )r   r6   r   r*   `  sZ    






zEfficientNetEncoder.__init__FT)rV   output_hidden_statesreturn_dictr:   c                 C   st   |r
|fnd }| j D ]}||}|r||f7 }q| |}| |}| |}|shtdd ||fD S t||dS )Nc                 s   s   | ]}|d ur|V  qd S r;   r   )r   vr   r   r   r     r   z.EfficientNetEncoder.forward.<locals>.<genexpr>)last_hidden_staterV   )r   r   r   r   tupler
   )r6   rV   r   r   Zall_hidden_statesr   r   r   r   r=     s    



zEfficientNetEncoder.forward)FT)r>   r?   r@   rA   r   r*   rB   rX   r   rb   r
   r=   rD   r   r   r7   r   r   W  s   :  r   c                   @   s2   e Zd ZU eed< dZdZg Zej	dddZ
dS )EfficientNetPreTrainedModelr   efficientnetr9   )modulec                 C   sF   t |tjtjtjfrB|jjjd| jj	d |j
durB|j
j  dS )zInitialize the weightsg        )meanZstdN)r   r   Linearr-   r/   weightdataZnormal_r   Zinitializer_ranger&   Zzero_)r6   r   r   r   r   _init_weights  s    
z)EfficientNetPreTrainedModel._init_weightsN)r>   r?   r@   r   __annotations__Zbase_model_prefixZmain_input_nameZ_no_split_modulesr   Moduler   r   r   r   r   r     s
   
r   c                       sR   e Zd Zed fddZedeej ee	 ee	 e
eef dddZ  ZS )	EfficientNetModelr   c                    s~   t  | || _t|| _t|| _|jdkrDtj	|j
dd| _n.|jdkrbtj|j
dd| _ntd|j |   d S )Nr   T)Z	ceil_moder   z2config.pooling must be one of ['mean', 'max'] got )r)   r*   r   r   ry   r   encoderZpooling_typer   Z	AvgPool2dr   poolerZ	MaxPool2d
ValueErrorZpooling	post_initr5   r7   r   r   r*     s    



zEfficientNetModel.__init__N)r9   r   r   r:   c                 C   s   |d ur|n| j j}|d ur |n| j j}|d u r8td| |}| j|||d}|d }| |}||jd d }|s||f|dd   S t	|||j
dS )Nz You have to specify pixel_valuesr   r   r   r   r   )r   pooler_outputrV   )r   r   use_return_dictr   ry   r   r   Zreshapeshaper   rV   )r6   r9   r   r   Zembedding_outputZencoder_outputsr   pooled_outputr   r   r   r=     s*    

zEfficientNetModel.forward)NNN)r>   r?   r@   r   r*   r   r   rB   rX   rb   r   r   r   r=   rD   r   r   r7   r   r     s      
r   z
    EfficientNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g.
    for ImageNet.
    )Zcustom_introc                	       sT   e Zd Z fddZedeej eej ee	 ee	 e
eef dddZ  ZS )"EfficientNetForImageClassificationc                    sd   t  | |j| _|| _t|| _tj|jd| _	| jdkrNt
|j| jnt | _|   d S )Nrr   r   )r)   r*   
num_labelsr   r   r   r   rw   Zdropout_raterx   r   r   ZIdentity
classifierr   r5   r7   r   r   r*     s    
$z+EfficientNetForImageClassification.__init__N)r9   labelsr   r   r:   c                 C   sv  |dur|n| j j}| j|||d}|r.|jn|d }| |}| |}d}|dur6| j jdu r| jdkrxd| j _n4| jdkr|jt	j
ks|jt	jkrd| j _nd| j _| j jdkrt }	| jdkr|	| | }n
|	||}nN| j jdkrt }	|	|d| j|d}n| j jdkr6t }	|	||}|sf|f|dd  }
|durb|f|
 S |
S t|||jd	S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   Z
regressionZsingle_label_classificationZmulti_label_classificationr   )losslogitsrV   )r   r   r   r   rx   r   Zproblem_typer   ZdtyperB   longr   r   rh   r   viewr   r   rV   )r6   r9   r   r   r   outputsr   r   r   Zloss_fctoutputr   r   r   r=     s@    




"


z*EfficientNetForImageClassification.forward)NNNN)r>   r?   r@   r*   r   r   rB   rX   Z
LongTensorrb   r   r   r   r=   rD   r   r   r7   r   r     s       
r   )r   r   r   )T)/rA   r   typingr   r   rB   Ztorch.utils.checkpointr   Ztorch.nnr   r   r   Zactivationsr	   Zmodeling_outputsr
   r   r   Zmodeling_utilsr   utilsr   r   Zconfiguration_efficientnetr   Z
get_loggerr>   loggerr   r   r   rb   r   r   r   r-   rE   rL   rY   rc   rn   r|   r   r   r   r   __all__r   r   r   r   <module>   s>   
''!QZ8E