a
    hI                     @   s  d Z ddlZddlZddlmZmZ ddlZddlZddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZm Z  ddl!m"Z" dZ#G dd deZ$G dd deZ%G dd deZ&G dd dej'Z(G dd deZ)G dd dej'Z*G dd deZ+G dd de+Z,G d d! d!eZ-G d"d# d#eZ.G d$d% d%eZ/G d&d' d'ej'Z0eG d(d) d)eZ1eG d*d+ d+e1Z2G d,d- d-eZ3G d.d/ d/eZ4g d0Z5dS )1zPyTorch SEW model.    N)OptionalUnion)nn   )ACT2FN)is_deepspeed_zero3_enabled)is_fsdp_managed_module)BaseModelOutput)PreTrainedModel)auto_docstring   )Wav2Vec2AttentionWav2Vec2EncoderLayerWav2Vec2FeatureEncoderWav2Vec2FeedForwardWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2GroupNormConvLayerWav2Vec2LayerNormConvLayerWav2Vec2NoLayerNormConvLayerWav2Vec2SamePadLayer_compute_mask_indices   )	SEWConfigc                   @   s   e Zd ZdS )SEWNoLayerNormConvLayerN__name__
__module____qualname__ r   r   _/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/sew/modular_sew.pyr   2   s   r   c                   @   s   e Zd ZdS )SEWLayerNormConvLayerNr   r   r   r   r    r!   6   s   r!   c                   @   s   e Zd ZdS )SEWGroupNormConvLayerNr   r   r   r   r    r"   :   s   r"   c                       s$   e Zd Z fddZdd Z  ZS )SEWPositionalConvEmbeddingc                    s(  t    tj|j|j|j|jd |j|jd| _tj	j
}ttj	jdrRtj	jj
}t rdd l}|jj| jjdd" || jddd| _W d    n1 s0    Y  t| jdr| jjjj}| jjjj}n| jj}| jj}|j| | |j| | n|| jddd| _t|j| _t|j | _d S )	Nr   )kernel_sizepaddinggroupsstrideweight_normr   Zmodifier_rankweight)namedimparametrizations)super__init__r   Conv1dhidden_sizeZnum_conv_pos_embeddingsZnum_conv_pos_embedding_groupssqueeze_factorconvutilsr(   hasattrr-   r   	deepspeedzeroGatheredParametersr*   Z	original0Z	original1weight_gweight_vZregister_external_parameterSEWSamePadLayerr%   r   feat_extract_activation
activation)selfconfigr(   r6   r9   r:   	__class__r   r    r/   ?   s4    
	
0z#SEWPositionalConvEmbedding.__init__c                 C   s"   |  |}| |}| |}|S N)r3   r%   r=   )r>   hidden_statesr   r   r    forwarda   s    


z"SEWPositionalConvEmbedding.forwardr   r   r   r/   rD   __classcell__r   r   r@   r    r#   >   s   "r#   c                   @   s   e Zd ZdS )r;   Nr   r   r   r   r    r;   i   s   r;   c                       s$   e Zd Z fddZdd Z  ZS )SEWUpsamplingc                    s:   t    t|j|j|j | _t|j | _	|j| _d S rB   )
r.   r/   r   Linearr1   r2   
projectionr   r<   r=   r>   r?   r@   r   r    r/   n   s    
zSEWUpsampling.__init__c                 C   sd   |  |}| |}| jdkr`| \}}}|| j }|| j }|||| j|}||||}|S )Nr   )rI   r=   r2   sizeZreshape)r>   rC   ZbszZsrc_lenZsrc_embed_dimZtgt_lenZtgt_embed_dimr   r   r    rD   t   s    




zSEWUpsampling.forwardrE   r   r   r@   r    rG   m   s   rG   c                   @   s   e Zd ZdS )SEWFeatureEncoderNr   r   r   r   r    rL      s   rL   c                       s   e Zd Z fddZ  ZS )SEWFeatureExtractorc                    s8   t  | td| jj d| jjd j dt d S )NzThe class `zD` has been depreciated and will be removed in Transformers v5. Use `r   z
` instead.)r.   r/   warningswarnrA   r   	__bases__FutureWarningrJ   r@   r   r    r/      s    zSEWFeatureExtractor.__init__)r   r   r   r/   rF   r   r   r@   r    rM      s   rM   c                   @   s   e Zd ZdS )SEWAttentionNr   r   r   r   r    rR      s   rR   c                   @   s   e Zd ZdS )SEWFeedForwardNr   r   r   r   r    rS      s   rS   c                   @   s   e Zd ZdS )SEWEncoderLayerNr   r   r   r   r    rT      s   rT   c                       s&   e Zd Z fddZdddZ  ZS )	
SEWEncoderc                    s   t     | _t | _t j j| _tj	 j
 jd| _t j| _t fddt jD | _t | _d| _d S )Nepsc                    s   g | ]}t  qS r   )rT   ).0_r?   r   r    
<listcomp>       z'SEWEncoder.__init__.<locals>.<listcomp>F)r.   r/   r?   r#   pos_conv_embedr   Z	AvgPool1dr2   pool	LayerNormr1   layer_norm_eps
layer_normDropoutZhidden_dropoutdropoutZ
ModuleListrangeZnum_hidden_layerslayersrG   upsampleZgradient_checkpointingrJ   r@   rZ   r    r/      s    

 
zSEWEncoder.__init__NFTc              	   C   s  |rdnd }|rdnd }|d ur8| ddd|jd }| jjdkrld|| < |d urfd|v rf|nd }nd|| < | d}	|	| jj }
|jd | jj }tj	d||
j
ddd|
jd d}||
ddk  }d	|d d d d d d f j|jd
 }|t|jj }||jd d|jd |jd }|jd }|dd}| |}| |}t|d|d}|dd |f |dd |f  }|dd}| |}| |}t pt| }| jD ]t}|r||f }tg }| jo|| jjk }|r|r"||||d}|d }|r,d}|r||d f }q|rT||f }| |}|jd |k rtj |ddd||jd  f}|st!dd |||fD S t"|||dS )Nr   r   r   Zflash_attention_2        r   device      ?)dtype.)attention_maskoutput_attentions)NNc                 s   s   | ]}|d ur|V  qd S rB   r   )rX   vr   r   r    	<genexpr>   r\   z%SEWEncoder.forward.<locals>.<genexpr>Zlast_hidden_staterC   
attentions)#Z	unsqueezerepeatshaper?   Z_attn_implementationlongsumr2   torcharangerj   viewexpandtorl   Zfinfomin	transposer]   r^   rK   ra   rc   r   r   re   ZrandtrainingZ	layerdroprf   r   Z
functionalpadtupler	   )r>   rC   rm   rn   output_hidden_statesreturn_dictZall_hidden_statesZall_self_attentionsZexpand_attention_maskinput_lengthsoutput_lengthsZmax_encoder_lengthZattention_idsZn_input_timestepsZposition_embeddingsZpooled_hidden_statesZ
min_lengthZsynced_gpuslayerZdropout_probabilityZskip_the_layerZlayer_outputsr   r   r    rD      sv    



&


 






 zSEWEncoder.forward)NFFTrE   r   r   r@   r    rU      s       rU   c                   @   s`   e Zd ZU eed< dZdZdZdZdZ	dZ
dd Zeejef dd	d
ZeejdddZdS )SEWPreTrainedModelr?   Zsewinput_valuesTFc              	   C   s  t |trTtjj|jjddtd|jj	d |jj
   d tj|jjd n,t |tjrz|jjjd| jjd nt |tjtjfr|jj  |jjd nt |tjrt rpddl}t|dr*t|d	r*|jj|j|jgdd
  tj|jj W d   n1 s0    Y  nD|jj|jdd
  tj|jj W d   n1 sd0    Y  ntj|jj t |tjtjfr|jdur|jj  dS )zInitialize the weightsr   r   r   )meanZstdrh   rk   Nr:   r9   r)   )
isinstancer#   r   initZnormal_r3   r*   mathsqrtr$   Zin_channelsZ	constant_ZbiasrH   datar?   Zinitializer_ranger_   Z	GroupNormZzero_Zfill_r0   r   r6   r5   r7   r8   r:   r9   Zkaiming_normal_)r>   moduler6   r   r   r    _init_weights  s.    
 22 z SEWPreTrainedModel._init_weights)r   c                 C   s4   dd }t | jj| jjD ]\}}||||}q|S )zH
        Computes the output length of the convolutional layers
        c                 S   s   t j| | |ddd S )Nfloor)Zrounding_moder   )rw   div)Zinput_lengthr$   r'   r   r   r    _conv_out_length3  s    zMSEWPreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_length)zipr?   Zconv_kernelZconv_stride)r>   r   r   r$   r'   r   r   r     _get_feat_extract_output_lengths.  s    z3SEWPreTrainedModel._get_feat_extract_output_lengths)feature_vector_lengthrm   c                 C   s~   |  |dtj}|jd }tj||f|j|jd}d|tj	|jd |jd|d f< |
dgd
dg }|S )Nrg   r   )rl   rj   r   ri   )r   rv   r{   rw   ru   rt   Zzerosrl   rj   rx   flipZcumsumbool)r>   r   rm   r   
batch_sizer   r   r    "_get_feature_vector_attention_mask=  s    
"z5SEWPreTrainedModel._get_feature_vector_attention_maskN)r   r   r   r   __annotations__Zbase_model_prefixZmain_input_nameZsupports_gradient_checkpointingZ_supports_flash_attnZ_supports_sdpaZ_supports_flex_attnr   r   rw   
LongTensorintr   r   r   r   r   r    r     s   
 r   c                       s   e Zd Zed fddZdejeej eej dddZ	e
deej eej eej ee ee ee eeef dd	d
Z  ZS )SEWModelrZ   c                    s   t  | || _t|| _tj|jd |jd| _	|jd |j
k| _| jrbt|jd |j
| _t|j| _|jdks|jdkrtt|j
 | _t|| _|   d S )Nrg   rV   rh   )r.   r/   r?   rL   feature_extractorr   r_   Zconv_dimr`   ra   r1   project_featuresrH   feature_projectionrb   Zfeat_proj_dropoutfeature_dropoutmask_time_probmask_feature_prob	Parameterrw   TensorZuniform_masked_spec_embedrU   encoderZ	post_initrJ   r@   r   r    r/   L  s    

zSEWModel.__init__N)rC   mask_time_indicesrm   c                 C   s  t | jdds|S | \}}}|dur<| j|j||< nZ| jjdkr| jrt||f| jj| jj	|| jj
d}tj||jtjd}| j|j||< | jjdkr| jrt||f| jj| jj| jjd}tj||jtjd}|dddf d|d}d||< |S )	z
        Masks extracted features along time axis and/or along feature axis according to
        [SpecAugment](https://huggingface.co/papers/1904.08779).
        Zapply_spec_augmentTNr   )	mask_probmask_lengthrm   	min_masks)rj   rl   )r   r   r   rg   )getattrr?   rK   r   r{   rl   r   r~   r   Zmask_time_lengthZmask_time_min_masksrw   Ztensorrj   r   r   Zmask_feature_lengthZmask_feature_min_masksrz   )r>   rC   r   rm   r   Zsequence_lengthr1   Zmask_feature_indicesr   r   r    _mask_hidden_states`  s4    zSEWModel._mask_hidden_states)r   rm   r   rn   r   r   returnc           
      C   s   |dur|n| j j}|dur |n| j j}|dur4|n| j j}| |}|dd}| |}| jrl| |}| 	|}|dur| 
|jd |}| j||d}| j|||||d}	|	d }|s|f|	dd  S t||	j|	jdS )a/  
        mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
            masked extracted features in *config.proj_codevector_dim* space.
        Nr   r   )r   )rm   rn   r   r   r   rq   )r?   rn   r   Zuse_return_dictr   r}   ra   r   r   r   r   rt   r   r   r	   rC   rr   )
r>   r   rm   r   rn   r   r   Zextract_featuresrC   Zencoder_outputsr   r   r    rD     s8    



zSEWModel.forward)NN)NNNNN)r   r   r   r   r/   rw   ZFloatTensorr   r   r   r   r   r   r   r   r	   rD   rF   r   r   r@   r    r   J  s.     .     
r   c                   @   s   e Zd ZdS )	SEWForCTCNr   r   r   r   r    r     s   r   c                   @   s   e Zd ZdS )SEWForSequenceClassificationNr   r   r   r   r    r     s   r   )r   r   r   r   )6__doc__r   rN   typingr   r   rw   Ztorch.utils.checkpointr   Zactivationsr   Zintegrations.deepspeedr   Zintegrations.fsdpr   Zmodeling_outputsr	   Zmodeling_utilsr
   r4   r   Zwav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   r   r   r   r   Zconfiguration_sewr   Z_HIDDEN_STATES_START_POSITIONr   r!   r"   Moduler#   r;   rG   rL   rM   rR   rS   rT   rU   r   r   r   r   __all__r   r   r   r    <module>   sD   4+fEz