a
    h                     @   s   d dl mZ d dlZd dlmZ d dlZddlmZ ddlm	Z	m
Z
mZmZ ddlmZ dd	lmZ eeZd
ZG dd deZdd ZdddZG dd de	ZG dd de
ZG dd deZG dd deZg dZdS )    )OptionalN   )logging   )LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassification)Phi3MLP   )	GlmConfigzTHUDM/glm-4-9bc                   @   s   e Zd ZdS )GlmMLPN__name__
__module____qualname__ r   r   _/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/glm/modular_glm.pyr   &   s   r   c                 C   s>   | ddddf }| ddddf }t j| |fdddS )	z*Rotates half the hidden dims of the input..r   Nr   r   dim)torchstackflatten)xx1Zx2r   r   r   rotate_half*   s    r   c                 C   s   | |}| |}|dd|jd d f jddd}|dd|jd d f jddd}|jd }| dd|f | d|df  }}|dd|f |d|df  }	}
|| t||  }|	| t|	|  }tj||gdd}tj||
gdd}||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    .Nr   r   r   )Z	unsqueezeshapeZrepeat_interleaver   r   cat)qkcossinZposition_idsZunsqueeze_dimZ
rotary_dimZq_rotZq_passZk_rotZk_passZq_embedZk_embedr   r   r   apply_rotary_pos_emb1   s    

$$
""r$   c                       s*   e Zd Zdeee d fddZ  ZS )GlmAttentionN)config	layer_idxc                    s.   t  || tj|j| j |jdd| _d S )NF)Zbias)super__init__nnZLinearZnum_attention_headsZhead_dimZhidden_sizeZo_proj)selfr&   r'   	__class__r   r   r)   \   s    zGlmAttention.__init__)N)r   r   r   r   r   intr)   __classcell__r   r   r,   r   r%   [   s   r%   c                   @   s   e Zd ZdS )GlmForCausalLMNr   r   r   r   r   r0   a   s   r0   c                   @   s   e Zd ZdS )GlmForSequenceClassificationNr   r   r   r   r   r1   e   s   r1   c                   @   s   e Zd ZdS )GlmForTokenClassificationNr   r   r   r   r   r2   i   s   r2   )ZGlmPreTrainedModelZGlmModelr0   r1   r2   )Nr   )typingr   r   Ztorch.nnr*   Ztorch.utils.checkpointutilsr   Zllama.modeling_llamar   r   r   r	   Zphi3.modeling_phi3r
   Zconfiguration_glmr   Z
get_loggerr   loggerZ_CHECKPOINT_FOR_DOCr   r   r$   r%   r0   r1   r2   __all__r   r   r   r   <module>   s"   

*