a
    hZ                     @   s  d dl Z d dlmZmZ d dlZd dlmZ d dlm  mZ	 ddl
mZ ddlmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ d
dlmZmZmZmZmZmZmZmZm Z  ddl!m"Z" e#e$Z%G dd deZ&G dd deZ'G dd dej(Z)G dd deZ*G dd deZ+G dd deZ,G dd dej(Z-G dd dej(Z.G dd dej(Z/G d d! d!ee Z0eZ1G d"d# d#eZ2G d$d% d%eZ3G d&d' d'eZ4G d(d) d)eZ5G d*d+ d+eZ6g d,Z7dS )-    N)OptionalUnion   )is_deepspeed_zero3_enabled)is_fsdp_managed_module)GradientCheckpointingLayer)BaseModelOutputWav2Vec2BaseModelOutput)PreTrainedModel)logging   )	Wav2Vec2FeatureProjectionWav2Vec2FeedForward#Wav2Vec2ForAudioFrameClassificationWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2ForXVectorWav2Vec2ModelWav2Vec2PositionalConvEmbeddingWav2Vec2PreTrainedModel   )WavLMConfigc                   @   s   e Zd ZdS )WavLMPositionalConvEmbeddingN__name__
__module____qualname__ r   r   c/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/wavlm/modular_wavlm.pyr      s   r   c                   @   s   e Zd ZdS )WavLMFeatureProjectionNr   r   r   r   r   r   #   s   r   c                       s   e Zd ZdZdeeeeeed fddZdej	e
ej	 e
ej	 eeej	e
ej	 e
eej	  f dddZejeejejf ejeejejfdddZeeejdddZejejdddZ  ZS )WavLMAttentionz=Multi-headed attention from 'Attention Is All You Need' paper        @     T	embed_dim	num_headsdropoutnum_bucketsmax_distancehas_relative_position_biasc                    s   t    || _|| _|| _|| | _| j| | jkrNtd| j d| d| jd | _t	||| _
t	||| _t	||| _t	||| _|| _|| _ttd| jdd| _t	| jd| _|rt| j| j| _d S )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      r      )super__init__r%   r&   r'   Zhead_dim
ValueErrorZscalingnnLineark_projv_projq_projout_projr(   r)   	ParametertorchZonesgru_rel_pos_constgru_rel_pos_linearZ	Embeddingrel_attn_embed)selfr%   r&   r'   r(   r)   r*   	__class__r   r   r-   *   s,    	


zWavLMAttention.__init__NFr   )hidden_statesattention_maskposition_biasoutput_attentionsreturnc                 C   s  |  \}}}|du rH| ||}|d|ddd|| j ||}||jdd | jdf }	|	dddd}	| |	}
|
|	jdd d 	d}
t
|
jddd\}}||| j d	  d
 }||| j dd| }|d||f}| ||||\}}|||fS )z'Attention layer with relative attentionNr   r   r   r   )r      dim      ?g       @)sizecompute_bias	unsqueezerepeatviewr&   shapepermuter8   sumr6   Zsigmoidchunkr7   torch_multi_head_self_attention)r:   r=   r>   r?   r@   indexZbszZtgt_len_Zgated_hidden_statesZrelative_position_projZgate_aZgate_bZgate_outputgated_position_biasattn_outputattn_weightsr   r   r   forwardN   s"    	$
zWavLMAttention.forward)r=   r>   rS   r@   rA   c                 C   s   | dd } }}|dur&|dnd}d }	}
d}tj|||| j| jtdgt| j	j
| jj
| jj
f|	|
|| j| jj| jj
| j|||d| j	j| jj| jjd\}}| dd}|dur|dddf |jdd | jf |jdd  }||fS )zCsimple wrapper around torch's multi_head_attention_forward functionr   r   NFT)Zuse_separate_proj_weightZq_proj_weightZk_proj_weightZv_proj_weight)Z	transposeneFZmulti_head_attention_forwardr%   r&   r6   emptycatr3   biasr1   r2   r'   r4   weighttrainingZbroadcast_torL   )r:   r=   r>   rS   r@   querykeyvalueZkey_padding_maskZbias_kZbias_vZadd_zero_attnrT   rU   r   r   r   rP   w   sB    	

"z.WavLMAttention.torch_multi_head_self_attention)query_length
key_lengthrA   c                 C   sv   t j|t jdd d d f }t j|t jdd d d f }|| }| |}|| jjj}| |}|g d}|S )N)Zdtype)r   r   r   )	r6   Zarangelong_relative_positions_buckettor9   r\   ZdevicerM   )r:   ra   rb   Zcontext_positionZmemory_positionZrelative_positionZrelative_position_bucketvaluesr   r   r   rH      s    

zWavLMAttention.compute_bias)relative_positionsrA   c                 C   s   | j d }|dktj| }t|}|d }||k }t| | }|t| j|  }|||  }|| tj}t	|t
||d }|t|||7 }|S )Nr   r   r   )r(   re   r6   rc   abslogfloatmathr)   minZ	full_likewhere)r:   rg   r(   Zrelative_bucketsZ	max_exactZis_smallZrelative_positions_if_largeZrelative_position_if_larger   r   r   rd      s    

z)WavLMAttention._relative_positions_bucket)r!   r"   r#   T)NNFr   )r   r   r   __doc__intrj   boolr-   r6   ZTensorr   tuplerV   FloatTensorr   Z
LongTensorZ
BoolTensorrP   rH   rd   __classcell__r   r   r;   r   r    '   s@       '    +
7
r    c                   @   s   e Zd ZdS )WavLMFeedForwardNr   r   r   r   r   rt      s   rt   c                       s0   e Zd Zd
eed fddZddd	Z  ZS )WavLMEncoderLayerTconfigr*   c                    sn   t    t|j|j|j|j|j|d| _t	
|j| _t	j|j|jd| _t|| _t	j|j|jd| _d S Nr$   epsr,   r-   r    hidden_sizeZnum_attention_headsZattention_dropoutr(   Zmax_bucket_distance	attentionr/   Dropouthidden_dropoutr'   	LayerNormlayer_norm_eps
layer_normrt   feed_forwardfinal_layer_normr:   rw   r*   r;   r   r   r-      s    

zWavLMEncoderLayer.__init__NFr   c           	      C   sl   |}| j |||||d\}}}| |}|| }| |}|| | }| |}||f}|rh||f7 }|S )Nr>   r?   r@   rQ   )r}   r'   r   r   r   )	r:   r=   r>   r?   r@   rQ   attn_residualrU   outputsr   r   r   rV      s"    



zWavLMEncoderLayer.forward)T)NNFr   r   r   r   r   rp   r-   rV   rs   r   r   r;   r   ru      s   ru   c                       s0   e Zd Zd	eed fddZd
ddZ  ZS ) WavLMEncoderLayerStableLayerNormTrv   c                    sn   t    t|j|j|j|j|j|d| _t	
|j| _t	j|j|jd| _t|| _t	j|j|jd| _d S rx   r{   r   r;   r   r   r-      s    

z)WavLMEncoderLayerStableLayerNorm.__init__NFc                 C   sf   |}|  |}| j||||d\}}}| |}|| }|| | | }||f}|rb||f7 }|S )N)r>   r?   r@   )r   r}   r'   r   r   )r:   r=   r>   r?   r@   r   rU   r   r   r   r   rV   
  s    


z(WavLMEncoderLayerStableLayerNorm.forward)T)NNFr   r   r   r;   r   r      s   r   c                       s&   e Zd Z fddZdddZ  ZS )	WavLMEncoderc                    sf   t     | _t | _tj j jd| _	t
 j| _t fddt jD | _d| _d S )Nry   c                    s   g | ]}t  |d kdqS r   )r*   )ru   .0irw   r   r   
<listcomp>'      z)WavLMEncoder.__init__.<locals>.<listcomp>Fr,   r-   rw   r   pos_conv_embedr/   r   r|   r   r   r~   r   r'   Z
ModuleListrangeZnum_hidden_layerslayersZgradient_checkpointingr:   rw   r;   r   r   r-      s    

zWavLMEncoder.__init__NFTc                 C   sB  |rdnd }|rdnd }|d urD| ddd|jd }d|| < | |}	||	 }| |}| |}t pvt| }
d }t| j	D ]~\}}|r||f }t
g }| jo|dko|| jjk }|r|
r||||||d}|d d \}}|rd}|r||d f }q|r||f }|s4tdd	 |||fD S t|||d
S )Nr   rB   r   r   r   r   NNNc                 s   s   | ]}|d ur|V  qd S Nr   r   vr   r   r   	<genexpr>a  r   z'WavLMEncoder.forward.<locals>.<genexpr>Zlast_hidden_stater=   Z
attentions)rI   rJ   rL   r   r   r'   r   r   	enumerater   r6   randr]   rw   	layerdroprq   r   r:   r=   r>   r@   Zoutput_hidden_statesZreturn_dictZall_hidden_statesZall_self_attentionsZexpand_attention_maskZposition_embeddingsZsynced_gpusr?   r   layerZdropout_probabilityZskip_the_layerZlayer_outputsr   r   r   rV   +  sL    






zWavLMEncoder.forward)NFFTr   r   r   r-   rV   rs   r   r   r;   r   r     s       r   c                       s&   e Zd Z fddZdddZ  ZS )	WavLMEncoderStableLayerNormc                    sf   t     | _t | _tj j jd| _	t
 j| _t fddt jD | _d| _d S )Nry   c                    s   g | ]}t  |d kdqS r   )r   r   r   r   r   r   q  s   z8WavLMEncoderStableLayerNorm.__init__.<locals>.<listcomp>Fr   r   r;   r   r   r-   j  s    


z$WavLMEncoderStableLayerNorm.__init__NFTc                 C   s@  |rdnd }|rdnd }|d urD| ddd|jd }d|| < | |}	||	 }| |}t plt| }
d }t| jD ]|\}}|r||f }t	
g }| jo|dko|| jjk }|r|
r|||||d}|d d \}}|rd}|r|||d f }q|| |}|r||f }|s2tdd	 |||fD S t|||d
S )Nr   rB   r   r   r   )r>   r@   r?   r   c                 s   s   | ]}|d ur|V  qd S r   r   r   r   r   r   r     r   z6WavLMEncoderStableLayerNorm.forward.<locals>.<genexpr>r   )rI   rJ   rL   r   r'   r   r   r   r   r6   r   r]   rw   r   r   rq   r   r   r   r   r   rV   x  sF    






z#WavLMEncoderStableLayerNorm.forward)NFFTr   r   r   r;   r   r   i  s       r   c                       s4   e Zd ZdZ fddZedd Zdd Z  ZS )WavLMGumbelVectorQuantizerz
    Vector quantization using gumbel softmax. See [CATEGORICAL REPARAMETERIZATION WITH
    GUMBEL-SOFTMAX](https://huggingface.co/papers/1611.01144) for more information.
    c                    s   t    |j| _|j| _|j| j dkrDtd|j d| j dt	t
d| j| j |j| j | _t|jd | j| j | _d| _d S )Nr   z`config.codevector_dim z5 must be divisible by `config.num_codevector_groups` z for concatenation.r   rB   r   )r,   r-   Znum_codevector_groups
num_groupsZnum_codevectors_per_groupnum_varsZcodevector_dimr.   r/   r5   r6   rr   codevectorsr0   Zconv_dimweight_projtemperaturer   r;   r   r   r-     s    

z#WavLMGumbelVectorQuantizer.__init__c                 C   s8   | j dd}ttj|t|d  dd  }|S )Nr   rD   gHz>rB   )meanr6   exprN   ri   )ZprobsZmarginal_probs
perplexityr   r   r   _compute_perplexity  s    (z.WavLMGumbelVectorQuantizer._compute_perplexityc                 C   s  |j \}}}| |}||| | j d}| jrtjj| | j	dd}|
|}tj||| | jd dd}| |}nH|jdd}|j|j  d|ddd}||| | jd}| |}||| d}|d| j }	|	|| | j| jd}
|
d||d}
|
|fS )NrB   T)tauhardrD   r   rF   )rL   r   rK   r   r]   r/   
functionalZgumbel_softmaxrj   r   Ztype_asr6   Zsoftmaxr   ZargmaxZ	new_zerosZscatter_rI   r   r   rN   )r:   r=   Z
batch_sizeZsequence_lengthr|   Zcodevector_probsZcodevector_soft_distr   Zcodevector_idxZcodevectors_per_groupr   r   r   r   rV     s*    


z"WavLMGumbelVectorQuantizer.forward)	r   r   r   rn   r-   staticmethodr   rV   rs   r   r   r;   r   r     s
   
r   c                   @   sN   e Zd ZU eed< dZdZdZdZdZ	dZ
dd Zdd	 Zd
d Zdd ZdS )WavLMPreTrainedModelrw   ZwavlmZinput_valuesTFc              	   C   s  t |tr>|jjjjddd |jjj  tj	
|j njt |trtj	j|jjddtd|jjd |jj   d tj	|jjd nt |trtd|jj }tj	j
|jj| |d tj	j
|jj| |d nt |tjr|jjjd| jjd |jdur|jj  nt |tjtjfrN|jj  |jjd nZt |tjrtj	|j |jdurt|j|j|jd   }tj	j
|j| |d dS )	zInitialize the weightsr!   r   )r   Zstdr   r   )abNrF   )
isinstancer   r   r\   dataZnormal_r[   Zzero_r/   initZuniform_r   r   convrk   sqrtZkernel_sizeZin_channelsZ	constant_r   Z
projectionZin_featuresr0   rw   Zinitializer_ranger   Z	GroupNormZfill_ZConv1dZkaiming_normal_groups)r:   modulekr   r   r   _init_weights  s6    

 
z"WavLMPreTrainedModel._init_weightsc                 C   s   t dd S NzNot needed for WavLMAttributeErrorr:   r   r   r   _get_adapters$  s    z"WavLMPreTrainedModel._get_adaptersc                 C   s   t dd S r   r   r   r   r   r   init_adapter_layers'  s    z(WavLMPreTrainedModel.init_adapter_layersc                 C   s   t dd S r   r   r   r   r   r   load_adapter*  s    z!WavLMPreTrainedModel.load_adapterN)r   r   r   r   __annotations__Zbase_model_prefixZmain_input_nameZsupports_gradient_checkpointingZ_supports_flash_attnZ_supports_sdpaZ_supports_flex_attnr   r   r   r   r   r   r   r   r     s   
!r   c                   @   s   e Zd ZdS )
WavLMModelNr   r   r   r   r   r   1  s   r   c                   @   s   e Zd ZdS )WavLMForCTCNr   r   r   r   r   r   5  s   r   c                   @   s   e Zd ZdS )WavLMForSequenceClassificationNr   r   r   r   r   r   9  s   r   c                   @   s   e Zd ZdS ) WavLMForAudioFrameClassificationNr   r   r   r   r   r   =  s   r   c                   @   s   e Zd ZdS )WavLMForXVectorNr   r   r   r   r   r   A  s   r   )r   r   r   r   r   r   )8rk   typingr   r   r6   Ztorch.nnr/   Ztorch.nn.functionalr   rX   Zintegrations.deepspeedr   Zintegrations.fsdpr   Zmodeling_layersr   Zmodeling_outputsr   r	   Zmodeling_utilsr
   utilsr   Zwav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   r   r   Zconfiguration_wavlmr   Z
get_loggerr   loggerr   r   Moduler    rt   ru   r   r   r   r   r   ZWavLMBaseModelOutputr   r   r   r   r   __all__r   r   r   r   <module>   s>   ,
 ')%JKF4