a
    hV                  
   @   s"  d dl mZmZmZ d dlZd dlmZ ddlmZ ddl	m
Z
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZmZ ddlmZmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, G dd dej-Z.G dd deZ/ej0e1ej0dddZ2d5ej-ej0ej0ej0eej0 e3e3e"e$ dddZ4dd  Z5d6d!d"Z6G d#d$ d$ej-Z7ed%G d&d' d'ej-Z8G d(d) d)ej-Z9e%G d*d+ d+e Z:e%G d,d- d-e:Z;e%G d.d/ d/e:eZ<G d0d1 d1ee:Z=G d2d3 d3ee:Z>g d4Z?dS )7    )CallableOptionalUnionN   )ACT2FN)CacheDynamicCache)GenerationMixin)use_kernel_forward_from_hub)create_causal_mask)FlashAttentionKwargs) GenericForSequenceClassificationGenericForTokenClassificationGradientCheckpointingLayer)BaseModelOutputWithPastCausalLMOutputWithPast)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuple)deprecate_kwarg)check_model_inputs   )
Glm4Configc                       s0   e Zd Z fddZejejdddZ  ZS )Glm4MLPc                    sP   t    || _tj|jd|j dd| _tj|j|jdd| _t	|j
 | _d S )N   FZbias)super__init__confignnLinearhidden_sizeZintermediate_sizegate_up_proj	down_projr   Z
hidden_actactivation_fnselfr#   	__class__ b/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/glm4/modeling_glm4.pyr"   1   s
    
zGlm4MLP.__init__)hidden_statesreturnc                 C   s4   |  |}|jddd\}}|| | }| |S )Nr   dim)r'   chunkr)   r(   )r+   r0   Z	up_statesZgater.   r.   r/   forward9   s    
zGlm4MLP.forward)__name__
__module____qualname__r"   torchFloatTensorr6   __classcell__r.   r.   r,   r/   r   0   s   r   c                       s   e Zd Zeed fddZedddddeje	ej e	ej
 e	e e	e e	ej
 e	eejejf  ee eeje	eejejf  f d
	ddZ  ZS )Glm4DecoderLayerr#   	layer_idxc                    sv   t    |j| _t||d| _t|| _t|j|jd| _	t|j|jd| _
t|j|jd| _t|j|jd| _d S )Nr>   eps)r!   r"   r&   Glm4Attention	self_attnr   mlpGlm4RMSNormrms_norm_epsinput_layernormpost_attention_layernormpost_self_attn_layernormpost_mlp_layernormr+   r#   r?   r,   r.   r/   r"   C   s    

zGlm4DecoderLayer.__init__past_key_valuepast_key_values4.58new_nameversionNF)	r0   attention_maskposition_idsrM   	use_cachecache_positionposition_embeddingskwargsr1   c              
   K   sr   |}	|  |}| jf |||||||d|\}}
| |}|	| }|}	| |}| |}| |}|	| }|S )N)r0   rR   rS   rM   rT   rU   rV   )rG   rC   rI   rH   rD   rJ   )r+   r0   rR   rS   rM   rT   rU   rV   rW   Zresidual_r.   r.   r/   r6   N   s*    





zGlm4DecoderLayer.forward)NNNFNN)r7   r8   r9   r   intr"   r   r:   Tensorr   
LongTensorr   booltupler   r   r;   r6   r<   r.   r.   r,   r/   r=   B   s&         r=   )r0   n_repr1   c                 C   s^   | j \}}}}|dkr| S | dddddddddf |||||} | ||| ||S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r   N)shapeexpandreshape)r0   r^   batchnum_key_value_headsslenhead_dimr.   r.   r/   	repeat_kvs   s
    0rf           )modulequerykeyvaluerR   scalingdropoutrW   c                 K   s   t || j}t || j}	t||dd| }
|d urf|d d d d d d d |jd f }|
| }
tjj|
dtj	d
|j}
tjj|
|| jd}
t|
|	}|dd }||
fS )Nr   r   r2   )r4   dtype)ptrainingr   )rf   num_key_value_groupsr:   matmul	transposer_   r$   Z
functionalZsoftmaxfloat32toro   rm   rq   
contiguous)rh   ri   rj   rk   rR   rl   rm   rW   
key_statesvalue_statesattn_weightscausal_maskattn_outputr.   r.   r/   eager_attention_forward   s    
&r}   c                 C   s>   | ddddf }| ddddf }t j| |fdddS )	z*Rotates half the hidden dims of the input..r   Nr   r   r2   r3   rn   )r:   stackflatten)xx1Zx2r.   r.   r/   rotate_half   s    r   c                 C   s   | |}| |}|dd|jd d f jddd}|dd|jd d f jddd}|jd }| dd|f | d|df  }}|dd|f |d|df  }	}
|| t||  }|	| t|	|  }tj||gdd}tj||
gdd}||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    .Nr2   r   r3   )	unsqueezer_   Zrepeat_interleaver   r:   cat)qkcossinrS   Zunsqueeze_dimZ
rotary_dimZq_rotZq_passZk_rotZk_passZq_embedZk_embedr.   r.   r/   apply_rotary_pos_emb   s    

$$
""r   c                       s   e Zd ZdZdeee d fddZedddd	de	j
ee	j
e	j
f ee	j
 ee ee	j ee ee	j
e	j
f d
ddZ  ZS )rB   z=Multi-headed attention from 'Attention Is All You Need' paperNr>   c                    s   t    || _|| _t|d|j|j | _|j|j | _	| jd | _
|j| _d| _tj|j|j| j |jd| _tj|j|j| j |jd| _tj|j|j| j |jd| _tj|j| j |jdd| _d S )Nre   g      Tr    F)r!   r"   r#   r?   getattrr&   Znum_attention_headsre   rc   rr   rl   attention_dropoutZ	is_causalr$   r%   Zattention_biasq_projk_projv_projo_projrK   r,   r.   r/   r"      s$    
zGlm4Attention.__init__rL   rM   rN   rO   )r0   rV   rR   rM   rU   rW   r1   c                 K   s$  |j d d }g |d| jR }| ||dd}	| ||dd}
| ||dd}|\}}t|	|
||\}	}
|d ur|||d}||
|| j	|\}
}t
}| jjdkrt| jj }|| |	|
||f| jsdn| j| jd|\}}|jg |dR   }| |}||fS )Nr2   r   r   )r   r   rU   eagerrg   )rm   rl   )r_   re   r   viewrt   r   r   r   updater?   r}   r#   Z_attn_implementationr   rq   r   rl   ra   rw   r   )r+   r0   rV   rR   rM   rU   rW   Zinput_shapeZhidden_shapeZquery_statesrx   ry   r   r   Zcache_kwargsZattention_interfacer|   rz   r.   r.   r/   r6      s8    


zGlm4Attention.forward)N)NN)r7   r8   r9   __doc__r   r   rY   r"   r   r:   rZ   r]   r   r[   r   r   r6   r<   r.   r.   r,   r/   rB      s     rB   ZRMSNormc                       s.   e Zd Zd fdd	Zdd Zdd Z  ZS )	rE   ư>c                    s&   t    tt|| _|| _dS )z:
        Glm4RMSNorm is equivalent to T5LayerNorm
        N)r!   r"   r$   	Parameterr:   Zonesweightvariance_epsilon)r+   r&   rA   r,   r.   r/   r"     s    
zGlm4RMSNorm.__init__c                 C   sJ   |j }|tj}|djddd}|t|| j  }| j|| S )Nr   r2   T)Zkeepdim)	ro   rv   r:   ru   powmeanZrsqrtr   r   )r+   r0   Zinput_dtypeZvariancer.   r.   r/   r6     s
    zGlm4RMSNorm.forwardc                 C   s   t | jj d| j S )Nz, eps=)r]   r   r_   r   )r+   r.   r.   r/   
extra_repr   s    zGlm4RMSNorm.extra_repr)r   )r7   r8   r9   r"   r6   r   r<   r.   r.   r,   r/   rE     s   rE   c                       sD   e Zd ZU ejed< ded fddZe e	dd Z
  ZS )	Glm4RotaryEmbeddinginv_freqNr#   c                    s   t    t|dr:t|jtr:|jd|jd| _nd| _|j| _	|j| _
|| _t| j | _| | j|\}| _| jd|dd | j| _d S )Nrope_scaling	rope_typetypedefaultr   F)
persistent)r!   r"   hasattr
isinstancer   dictgetr   Zmax_position_embeddingsZmax_seq_len_cachedZoriginal_max_seq_lenr#   r   Zrope_init_fnattention_scalingZregister_bufferr   Zoriginal_inv_freq)r+   r#   devicer   r,   r.   r/   r"   '  s    
zGlm4RotaryEmbedding.__init__c           
      C   s   | j d d d d f  |jd dd|j}|d d d d d f  }t|jjtrl|jjdkrl|jjnd}t	j
|ddV | |  dd}t	j||fdd	}| | j }| | j }	W d    n1 s0    Y  |j|jd
|	j|jd
fS )Nr   r2   r   ZmpscpuF)device_typeZenabledr   r3   )ro   )r   floatr`   r_   rv   r   r   r   strr:   Zautocastrt   r   r   r   r   ro   )
r+   r   rS   Zinv_freq_expandedZposition_ids_expandedr   ZfreqsZembr   r   r.   r.   r/   r6   8  s    0&,zGlm4RotaryEmbedding.forward)N)r7   r8   r9   r:   rZ   __annotations__r   r"   Zno_gradr   r6   r<   r.   r.   r,   r/   r   $  s
   

r   c                   @   sH   e Zd ZU eed< dZdZdgZdgZdZ	dZ
dZdZdZeedZdS )Glm4PreTrainedModelr#   modelTr=   rM   )r0   
attentionsN)r7   r8   r9   r   r   Zbase_model_prefixZsupports_gradient_checkpointingZ_no_split_modulesZ_skip_keys_device_placementZ_supports_flash_attnZ_supports_sdpaZ_supports_flex_attnZ_can_compile_fullgraphZ_supports_attention_backendr=   rB   Z_can_record_outputsr.   r.   r.   r/   r   H  s   
r   c                       st   e Zd Zed fddZeedeej	 eej
 eej	 ee eej eej	 ee ee ed	ddZ  ZS )		Glm4Modelr   c                    s   t     j| _ j| _t j j| j| _t	 fddt
 jD | _t j jd| _t d| _d| _|   d S )Nc                    s   g | ]}t  |qS r.   )r=   ).0r?   r   r.   r/   
<listcomp>d      z&Glm4Model.__init__.<locals>.<listcomp>r@   r   F)r!   r"   Zpad_token_idZpadding_idx
vocab_sizer$   Z	Embeddingr&   embed_tokensZ
ModuleListrangenum_hidden_layerslayersrE   rF   normr   
rotary_embZgradient_checkpointing	post_initr*   r,   r   r/   r"   ]  s    zGlm4Model.__init__N)		input_idsrR   rS   rM   inputs_embedsrU   rT   rW   r1   c              	   K   s   |d u |d uA rt d|d u r*| |}|rB|d u rBt| jd}|d u rz|d urZ| nd}	tj|	|	|jd  |jd}|d u r|	d}t
| j|||||d}
|}| ||}| jd | jj D ] }||f|
||||d|}q| |}t||dS )	Nz:You must specify exactly one of input_ids or inputs_embedsr   r   r   )r   )r#   Zinput_embedsrR   rU   rM   rS   )rR   rS   rM   rU   rV   )last_hidden_staterM   )
ValueErrorr   r   r#   Zget_seq_lengthr:   Zaranger_   r   r   r   r   r   r   r   r   )r+   r   rR   rS   rM   r   rU   rT   rW   Zpast_seen_tokensr{   r0   rV   Zdecoder_layerr.   r.   r/   r6   m  sP    

	

zGlm4Model.forward)NNNNNNN)r7   r8   r9   r   r"   r   r   r   r:   r[   rZ   r   r;   r\   r   r   r   r6   r<   r.   r.   r,   r/   r   [  s*          r   c                       s   e Zd ZdgZddiZddgdgfiZ fddZeede	e
j e	e
j e	e
j e	e e	e
j e	e
j e	e e	e
j eee
jf ee eeef d
ddZ  ZS )Glm4ForCausalLMzlm_head.weightlm_headZcolwise_repr0   logitsc                    s@   t  | t|| _|j| _tj|j|jdd| _| 	  d S )NFr    )
r!   r"   r   r   r   r$   r%   r&   r   r   r*   r,   r.   r/   r"     s
    
zGlm4ForCausalLM.__init__Nr   )r   rR   rS   rM   r   labelsrT   rU   logits_to_keeprW   r1   c
              
   K   s   | j f |||||||d|
}|j}t|	tr<t|	 dn|	}| |dd|ddf }d}|dur| jf ||| jjd|
}t	|||j
|j|jdS )ah  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Glm4ForCausalLM

        >>> model = Glm4ForCausalLM.from_pretrained("THUDM/GLM-4-9B-0414")
        >>> tokenizer = AutoTokenizer.from_pretrained("THUDM/GLM-4-9B-0414")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```)r   rR   rS   rM   r   rT   rU   N)r   r   r   )lossr   rM   r0   r   )r   r   r   rY   slicer   Zloss_functionr#   r   r   rM   r0   r   )r+   r   rR   rS   rM   r   r   rT   rU   r   rW   outputsr0   Zslice_indicesr   r   r.   r.   r/   r6     s0    %zGlm4ForCausalLM.forward)	NNNNNNNNr   )r7   r8   r9   Z_tied_weights_keysZ_tp_planZ_pp_planr"   r   r   r   r:   r[   rZ   r   r;   r\   r   rY   r   r   r]   r   r6   r<   r.   r.   r,   r/   r     s8   	         
r   c                   @   s   e Zd ZdS )Glm4ForSequenceClassificationNr7   r8   r9   r.   r.   r.   r/   r     s   r   c                   @   s   e Zd ZdS )Glm4ForTokenClassificationNr   r.   r.   r.   r/   r     s   r   )r   r   r   r   r   )rg   )Nr   )@typingr   r   r   r:   Ztorch.nnr$   Zactivationsr   Zcache_utilsr   r   Z
generationr	   Zintegrationsr
   Zmasking_utilsr   Zmodeling_flash_attention_utilsr   Zmodeling_layersr   r   r   Zmodeling_outputsr   r   Zmodeling_rope_utilsr   r   Zmodeling_utilsr   r   Zprocessing_utilsr   utilsr   r   r   Zutils.deprecationr   Zutils.genericr   Zconfiguration_glm4r   Moduler   r=   rZ   rY   rf   r   r}   r   r   rB   rE   r   r   r   r   r   r   __all__r.   r.   r.   r/   <module>   s\   1 
*E$NP