a
    h7                     @   s  d Z ddlmZmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZm Z m!Z!m"Z" ddl#m$Z$ e%e&Z'dZ(G dd de Z)G dd deZ*G dd deZ+G dd deZ,G dd deZ-G dd deZ.G dd  d eZ/G d!d" d"eZ0G d#d$ d$eZ1G d%d& d&eZ2g d'Z3dS )(zPyTorch Qwen3 model.    )CallableOptionalN   )Cache)FlashAttentionKwargs)CausalLMOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)deprecate_kwarg   )GemmaMLP)LlamaAttention)
Qwen2DecoderLayerQwen2ForCausalLMQwen2ForQuestionAnsweringQwen2ForSequenceClassificationQwen2ForTokenClassification
Qwen2ModelQwen2PreTrainedModelQwen2RMSNormapply_rotary_pos_embeager_attention_forward   )Qwen3ConfigzQwen/Qwen3-8Bc                   @   s   e Zd ZdS )Qwen3RMSNormN__name__
__module____qualname__ r!   r!   c/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/qwen3/modular_qwen3.pyr   4   s   r   c                   @   s   e Zd ZdS )Qwen3MLPNr   r!   r!   r!   r"   r#   8   s   r#   c                       s   e Zd Zeed fddZedddddeje	ejejf e
ej e
e e
ej ee e	eje
ej f d	d
dZ  ZS )Qwen3Attention)config	layer_idxc                    sP   t  || t| j|jd| _t| j|jd| _|j| dkrF|jnd | _d S )N)epsZsliding_attention)	super__init__r   head_dimZrms_norm_epsq_normk_normZlayer_typessliding_window)selfr%   r&   	__class__r!   r"   r)   =   s    zQwen3Attention.__init__Zpast_key_valuepast_key_valuesz4.58)new_nameversionN)hidden_statesposition_embeddingsattention_maskr1   cache_positionkwargsreturnc                 K   s4  |j d d }g |d| jR }| | ||dd}	| | ||dd}
| ||dd}|\}}t	|	|
||\}	}
|d ur|||d}|
|
|| j|\}
}t}| jjdkrt| jj }|| |	|
||f| jsdn| j| j| jd|\}}|jg |dR   }| |}||fS )Nr   r   )sincosr7   eagerg        )Zdropoutscalingr-   )shaper*   r+   Zq_projviewZ	transposer,   Zk_projZv_projr   updater&   r   r%   Z_attn_implementationr   ZtrainingZattention_dropoutr>   r-   Zreshape
contiguousZo_proj)r.   r4   r5   r6   r1   r7   r8   Zinput_shapeZhidden_shapeZquery_statesZ
key_statesZvalue_statesr<   r;   Zcache_kwargsZattention_interfaceZattn_outputZattn_weightsr!   r!   r"   forwardC   s:    
	

zQwen3Attention.forward)NN)r   r   r    r   intr)   r   torchZTensortupler   r   Z
LongTensorr	   r   rC   __classcell__r!   r!   r/   r"   r$   <   s     r$   c                   @   s   e Zd ZdS )Qwen3DecoderLayerNr   r!   r!   r!   r"   rH   q   s   rH   c                   @   s   e Zd ZdS )Qwen3PreTrainedModelNr   r!   r!   r!   r"   rI   u   s   rI   c                   @   s   e Zd ZdS )
Qwen3ModelNr   r!   r!   r!   r"   rJ   y   s   rJ   c                       s(   e Zd Zee ed fddZ  ZS )Qwen3ForCausalLM)super_kwargsr9   c                    s   t  jf i |S )a^  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Qwen3ForCausalLM

        >>> model = Qwen3ForCausalLM.from_pretrained("Qwen/Qwen3-8B")
        >>> tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```)r(   rC   )r.   rL   r/   r!   r"   rC   ~   s    zQwen3ForCausalLM.forward)r   r   r    r	   r
   r   rC   rG   r!   r!   r/   r"   rK   }   s   rK   c                   @   s   e Zd ZdS )Qwen3ForSequenceClassificationNr   r!   r!   r!   r"   rM      s   rM   c                   @   s   e Zd ZdS )Qwen3ForTokenClassificationNr   r!   r!   r!   r"   rN      s   rN   c                   @   s   e Zd ZdS )Qwen3ForQuestionAnsweringNr   r!   r!   r!   r"   rO      s   rO   )rK   rO   rI   rJ   rM   rN   )4__doc__typingr   r   rE   Zcache_utilsr   Zmodeling_flash_attention_utilsr   Zmodeling_outputsr   Zmodeling_utilsr   Zprocessing_utilsr	   utilsr
   r   Zutils.deprecationr   Zgemma.modeling_gemmar   Zllama.modeling_llamar   Zqwen2.modeling_qwen2r   r   r   r   r   r   r   r   r   r   Zconfiguration_qwen3r   Z
get_loggerr   loggerZ_CHECKPOINT_FOR_DOCr   r#   r$   rH   rI   rJ   rK   rM   rN   rO   __all__r!   r!   r!   r"   <module>   s4   0
5