a
    h8%                     @   s   d dl mZmZ d dlZd dlZd dlmZ d dlmZ ddlm	Z	m
Z
 ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, ddl-m.Z. ddl/m0Z0 e1e2Z3G dd de)Z4G dd de#Z5e6e! e6dkrjG dd dej7Z8nedG dd dej9Z8G dd  d e$Z:G d!d" d"e*Z;G d#d$ d$e.Z<G d%d& d&e%Z=G d'd( d(e'Z>G d)d* d*e(Z?G d+d, d,e&Z@g d-ZAdS ).    )CallableOptionalN)version)nn   )CacheDynamicCache)use_kernel_forward_from_hub)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)deprecate_kwarg)check_model_inputs)get_torch_version   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                       s   e Zd Z fddZ  ZS )Qwen2MLPc                    sR   t  | tj| j| jdd| _tj| j| jdd| _tj| j| jdd| _d S )NFZbias)	super__init__r   Linearhidden_sizeZintermediate_sizeZ	gate_projZup_projZ	down_projselfconfig	__class__ c/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/qwen2/modular_qwen2.pyr'   )   s    zQwen2MLP.__init__)__name__
__module____qualname__r'   __classcell__r/   r/   r-   r0   r$   (   s   r$   c                       s   e Zd Zeed fddZedddddeje	ejejf e
ej e
e e
ej ee e	eje
ej f d	d
dZ  ZS )Qwen2Attentionr,   	layer_idxc                    s   t  || tj|j|j| j dd| _tj|j|j| j dd| _	tj|j|j| j dd| _
tj|j| j |jdd| _|j| dkr|jnd | _d S )NTr%   Fsliding_attention)r&   r'   r   r(   r)   Znum_attention_headshead_dimq_projZnum_key_value_headsk_projv_projo_projlayer_typessliding_windowr+   r,   r7   r-   r/   r0   r'   1   s    zQwen2Attention.__init__Zpast_key_valuepast_key_valuesz4.58)new_namer   N)hidden_statesposition_embeddingsattention_maskrA   cache_positionkwargsreturnc                 K   s(  |j d d }g |d| jR }| ||dd}	| ||dd}
| ||dd}|\}}t|	|
||\}	}
|d ur|||d}||
|| j	|\}
}t
}| jjdkrt| jj }|| |	|
||f| jsdn| j| j| jd|\}}|jg |dR   }| |}||fS )Nr"   r   )sincosrF   eagerg        )Zdropoutscalingr?   )shaper9   r:   viewZ	transposer;   r<   r   updater7   r    r,   Z_attn_implementationr   ZtrainingZattention_dropoutrM   r?   Zreshape
contiguousr=   )r+   rC   rD   rE   rA   rF   rG   Zinput_shapeZhidden_shapeZquery_statesZ
key_statesZvalue_statesrK   rJ   Zcache_kwargsZattention_interfaceZattn_outputZattn_weightsr/   r/   r0   forward9   s:    
	

zQwen2Attention.forward)NN)r1   r2   r3   r#   intr'   r   torchTensortupler   r   
LongTensorr   r   rR   r4   r/   r/   r-   r0   r5   0   s     r5   z2.3.0c                       s&   e Zd Zdedd fddZ  ZS )Qwen2RMSNormư>NepsrH   c                    s   t  j||dd d S )NT)Znormalized_shaper[   Zelementwise_affine)r&   r'   r+   r)   r[   r-   r/   r0   r'   j   s    Qwen2RMSNorm.__init__)rY   )r1   r2   r3   floatr'   r4   r/   r/   r-   r0   rX   i   s   rX   RMSNormc                       sB   e Zd Zdedd fddZejejdddZd	d
 Z  Z	S )rX   rY   NrZ   c                    s&   t    tt|| _|| _dS )zC
            Qwen2RMSNorm is equivalent to T5LayerNorm
            N)r&   r'   r   	ParameterrT   Zonesweightvariance_epsilonr\   r-   r/   r0   r'   q   s    
r]   )rC   rH   c                 C   sJ   |j }|tj}|djddd}|t|| j  }| j|| S )Nr   rI   T)Zkeepdim)	ZdtypetorT   Zfloat32powmeanZrsqrtrb   ra   )r+   rC   Zinput_dtypeZvariancer/   r/   r0   rR   y   s
    zQwen2RMSNorm.forwardc                 C   s   t | jj d| j S )Nz, eps=)rV   ra   rN   rb   )r+   r/   r/   r0   
extra_repr   s    zQwen2RMSNorm.extra_repr)rY   )
r1   r2   r3   r^   r'   rT   rU   rR   rf   r4   r/   r/   r-   r0   rX   o   s   c                       s$   e Zd Zeed fddZ  ZS )Qwen2DecoderLayerr6   c                    s    t  j||d |j| | _d S )Nr6   )r&   r'   r>   attention_typer@   r-   r/   r0   r'      s    zQwen2DecoderLayer.__init__)r1   r2   r3   r#   rS   r'   r4   r/   r/   r-   r0   rg      s   rg   c                   @   s   e Zd ZdS )Qwen2PreTrainedModelNr1   r2   r3   r/   r/   r/   r0   ri      s   ri   c                       st   e Zd Zed fddZeedeej	 eej
 eej	 ee eej ee eej	 ee ed	ddZ  ZS )	
Qwen2Modelr,   c                    s   t  | d| jjv | _d S )Nr8   )r&   r'   r,   r>   has_sliding_layersr*   r-   r/   r0   r'      s    zQwen2Model.__init__N)		input_idsrE   position_idsrA   inputs_embeds	use_cacherF   rG   rH   c              
   K   sD  |d u |d uA rt d|d u r*| |}|rB|d u rBt| jd}|d u rz|d urZ| nd}	tj|	|	|jd  |jd}|d u r|	d}t
| }
ts| j|||||d}dtf i |i}
| jrtf i ||
d< |}| ||}| jd | jj D ](}||f|
|j |||||d	|}q| |}t||r<|nd d
S )Nz:You must specify exactly one of input_ids or inputs_embedsrl   r   r"   )device)r,   Zinput_embedsrE   rF   rA   ro   Zfull_attentionr8   )rE   ro   rA   rq   rF   rD   )Zlast_hidden_staterA   )
ValueErrorZembed_tokensr   r,   Zget_seq_lengthrT   ZarangerN   rr   Z	unsqueeze
isinstancedictr
   rm   r   Z
rotary_embZlayersZnum_hidden_layersrh   Znormr   )r+   rn   rE   ro   rA   rp   rq   rF   rG   Zpast_seen_tokensZcausal_mask_mappingZmask_kwargsrC   rD   Zdecoder_layerr/   r/   r0   rR      sZ    



zQwen2Model.forward)NNNNNNN)r1   r2   r3   r#   r'   r   r   r   rT   rW   rU   r   ZFloatTensorboolr   r   r   rR   r4   r/   r/   r-   r0   rk      s*          rk   c                   @   s   e Zd ZdS )Qwen2ForCausalLMNrj   r/   r/   r/   r0   rw      s   rw   c                   @   s   e Zd ZdS )Qwen2ForSequenceClassificationNrj   r/   r/   r/   r0   rx      s   rx   c                   @   s   e Zd ZdS )Qwen2ForTokenClassificationNrj   r/   r/   r/   r0   ry      s   ry   c                   @   s   e Zd ZdS )Qwen2ForQuestionAnsweringNrj   r/   r/   r/   r0   rz      s   rz   )ri   rk   rw   rX   rx   ry   rz   )Btypingr   r   rT   Ztorch.utils.checkpoint	packagingr   r   Zcache_utilsr   r   Zintegrationsr	   Zmasking_utilsr
   r   Zmodeling_flash_attention_utilsr   Zmodeling_outputsr   Zmodeling_utilsr   Zprocessing_utilsr   utilsr   r   r   Zutils.deprecationr   Zutils.genericr   Zutils.import_utilsr   Zllama.modeling_llamar   r   r   r   r   r   r   r   r   r    Zmistral.modeling_mistralr!   Zconfiguration_qwen2r#   Z
get_loggerr1   loggerr$   r5   parser_   rX   Modulerg   ri   rk   rw   rx   ry   rz   __all__r/   r/   r/   r0   <module>   sB   0
7O