a
    h}                  
   @   sV  d dl mZmZmZmZ d dlZd dlm  mZ	 d dlmZ ddl
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZmZ ddlmZ ddlm Z m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* e( rd dl+m,Z,m-Z- nd\Z,Z-edG dd dej.Z/G dd dej.Z0G dd dej.Z1G dd dZ2dd  Z3d9d!d"Z4ej5e6ej5d#d$d%Z7d:ej.ej5ej5ej5eej5 e8e8ee  d'd(d)Z9G d*d+ d+ej.Z:d,d- Z;e,e-fZ<e=e<Z>G d.d/ d/ej.Z?G d0d1 d1eZ@e!G d2d3 d3eZAe!G d4d5 d5eAZBe!G d6d7 d7eAeZCg d8ZDdS );    )AnyCallableOptionalUnionN)nn   )Cache)GenerationMixin)use_kernel_forward_from_hub)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPastCausalLMOutputWithPast)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuple)deprecate_kwarg)check_model_inputs)is_causal_conv1d_available   )
Lfm2Config)causal_conv1d_fncausal_conv1d_update)NNZRMSNormc                       s.   e Zd Zd fdd	Zdd Zdd Z  ZS )	Lfm2RMSNormư>c                    s&   t    tt|| _|| _dS )z:
        Lfm2RMSNorm is equivalent to T5LayerNorm
        N)super__init__r   	ParametertorchZonesweightvariance_epsilon)selfhidden_sizeeps	__class__ b/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/lfm2/modeling_lfm2.pyr!   2   s    
zLfm2RMSNorm.__init__c                 C   sJ   |j }|tj}|djddd}|t|| j  }| j|| S )N   T)Zkeepdim)	dtypetor#   float32powmeanZrsqrtr%   r$   )r&   hidden_statesZinput_dtypeZvariancer+   r+   r,   forward:   s
    zLfm2RMSNorm.forwardc                 C   s   t | jj d| j S )Nz, eps=)tupler$   shaper%   r&   r+   r+   r,   
extra_reprA   s    zLfm2RMSNorm.extra_repr)r   )__name__
__module____qualname__r!   r5   r9   __classcell__r+   r+   r)   r,   r   0   s   r   c                       sD   e Zd ZU ejed< ded fddZe e	dd Z
  ZS )	Lfm2RotaryEmbeddinginv_freqNconfigc                    s   t    t|dr:t|jtr:|jd|jd| _nd| _|j| _	|j| _
|| _t| j | _| | j|\}| _| jd|dd | j| _d S )Nrope_scaling	rope_typetypedefaultr?   F)
persistent)r    r!   hasattr
isinstancerB   dictgetrC   Zmax_position_embeddingsZmax_seq_len_cachedZoriginal_max_seq_lenrA   r   Zrope_init_fnattention_scalingZregister_bufferr?   Zoriginal_inv_freq)r&   rA   devicer?   r)   r+   r,   r!   H   s    
zLfm2RotaryEmbedding.__init__c           
      C   s   | j d d d d f  |jd dd|j}|d d d d d f  }t|jjtrl|jjdkrl|jjnd}t	j
|ddV | |  dd}t	j||fdd	}| | j }| | j }	W d    n1 s0    Y  |j|jd
|	j|jd
fS )Nr   r.   r   ZmpscpuF)device_typeZenabledr-   dim)r/   )r?   floatexpandr7   r0   rL   rH   rD   strr#   Zautocast	transposecatcosrK   sinr/   )
r&   xposition_idsZinv_freq_expandedZposition_ids_expandedrN   ZfreqsZembrV   rW   r+   r+   r,   r5   Y   s    0&,zLfm2RotaryEmbedding.forward)N)r:   r;   r<   r#   Tensor__annotations__r   r!   Zno_gradr   r5   r=   r+   r+   r)   r,   r>   E   s
   

r>   c                       s*   e Zd Zed fddZdd Z  ZS )Lfm2MLPr@   c                    s   t    |j}|jrXtd| d }|jd urXt|j| }|j||j d |j  }tj|j	|dd| _
tj|j	|dd| _tj||j	dd| _d S )Nr-   r   r   Fbias)r    r!   intermediate_sizeZblock_auto_adjust_ff_dimintZblock_ffn_dim_multiplierZblock_multiple_ofr   Linearr'   w1w3w2)r&   rA   r_   r)   r+   r,   r!   j   s    

zLfm2MLP.__init__c                 C   s    |  t| || | S N)rd   FZsilurb   rc   )r&   rX   r+   r+   r,   r5   y   s    zLfm2MLP.forward)r:   r;   r<   r   r!   r5   r=   r+   r+   r)   r,   r\   i   s   r\   c                   @   s   e Zd ZdZdZdZdZdZej	dfe
eejeejedf dddZdejejeeeeef  eejejf ddd	Zejd
ddZdee edddZejeeeef dddZedddZedddZeeejejf dddZdd ZdS ) Lfm2HybridConvCachea  
    Attention and conv cache for Lfm2.

    It stores the Key and Value states as a list of tensors, one for each layer.
    Attention layer cache shape: `[batch_size, num_heads, seq_len, head_dim]`.
    Conv layer cache shape: `[batch_size, hidden_size, L_cache-1]`.
    NFrA   max_batch_sizer/   rL   c                 C   s   g | _ g | _|| _|j| _| jd| _|j| _|| _g | _|d urNt	
|nd }t|jD ]8}t	j| j|j| j| j|d}t	j| | j| q\d S )Nfull_attention)r/   rL   )	key_cachevalue_cacheri   layer_typesindexfirst_attention_layerconv_L_cacheZ_dtype
conv_cacher#   rL   rangenum_hidden_layersZzerosr'   _dynamoZmark_static_addressappend)r&   rA   ri   r/   rL   _
conv_stater+   r+   r,   r!      s&    zLfm2HybridConvCache.__init__)
key_statesvalue_states	layer_idxcache_kwargsreturnc                 C   s   |durt | j|krjtt | j|D ](}| jtg  | jtg  q&| j| | j| n`| j|  s|| j|< || j|< n<tj| j| |gdd| j|< tj| j| |gdd| j|< | j| | j| fS )a  
        Updates the cache with the new `key_states` and `value_states` for the layer `layer_idx`.

        Parameters:
            key_states (`torch.Tensor`):
                The new key states to cache.
            value_states (`torch.Tensor`):
                The new value states to cache.
            layer_idx (`int`):
                The index of the layer to cache the states for.
            cache_kwargs (`Dict[str, Any]`, `optional`):
                Additional arguments for the cache subclass. No additional arguments are used in `DynamicCache`.

        Return:
            A tuple containing the updated key and value states.
        NrO   )	lenrk   rr   ru   r#   Ztensorrl   numelrU   )r&   rx   ry   rz   r{   rv   r+   r+   r,   update   s    
zLfm2HybridConvCache.update)beam_idxc                 C   s   t t| jD ]}| j| j}| j| d||| j|< | j| j}| j| d||| j|< | j| j}| j| d||| j|< qdS )zDReorders the cache for beam search, given the selected beam indices.r   N)rr   r~   rk   rL   Zindex_selectr0   rl   rq   )r&   r   rz   rL   r+   r+   r,   reorder_cache   s    z!Lfm2HybridConvCache.reorder_cacher   )rz   r|   c                 C   sL   | j | dkr| jn|}t| j|ks8| j|  dkr<dS | j| jd S )zYReturns the sequence length of the cached states. A layer index can be optionally passed.rj   r   r}   )rm   ro   r~   rk   r   r7   r&   rz   r+   r+   r,   get_seq_length   s     z"Lfm2HybridConvCache.get_seq_length)cache_positionrz   r|   c                 C   s&   d}|j d }|  }|| }||fS )aB  
        Return a tuple (kv_length, kv_offset) corresponding to the length and offset that will be returned for
        the given layer at `layer_idx`.
        The masks are then prepared according to the given lengths (kv_length, kv_offset) and patterns (i.e. sliding_window, chunk_size),
        for each layer.
        r   )r7   r   )r&   r   rz   Zfull_mask_kv_offsetZquery_lengthpast_seen_tokensZ	kv_lengthr+   r+   r,   get_mask_sizes   s
    
z"Lfm2HybridConvCache.get_mask_sizes)
max_lengthc                 C   s   |dk r|   t| }|   |kr(dS tt| jD ]V}| j|  r6| j| dd|ddf | j|< | j| dd|ddf | j|< q6dS )z"Crop the cache to the given lengthr   N.)r   absrr   r~   rk   r   rl   )r&   r   idxr+   r+   r,   crop   s    "zLfm2HybridConvCache.crop)r|   c                 C   s
   t | jS re   )r~   rk   r8   r+   r+   r,   __len__  s    zLfm2HybridConvCache.__len__c                 C   s   | j | | j| fS re   )rk   rl   r   r+   r+   r,   __getitem__  s    zLfm2HybridConvCache.__getitem__c                 C   s&   t t| jD ]}| j|   qd S re   )rr   r~   rq   Zzero_r   r+   r+   r,   reset  s    zLfm2HybridConvCache.reset)N)r   )r:   r;   r<   __doc__ri   Zis_compileablerk   rl   r#   r1   r   r`   r/   r   rL   rS   r!   rZ   r   rI   r   r6   r   
LongTensorr   r   r   r   r   r   r   r+   r+   r+   r,   rg   }   s8   	" +rg   c                 C   sH   | dd| j d d f }| d| j d d df }tj| |fddS )z*Rotates half the hidden dims of the input..Nr.   r-   rO   )r7   r#   rU   )rX   x1Zx2r+   r+   r,   rotate_half  s    r   c                 C   sD   | |}| |}| | t| |  }|| t||  }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    )	unsqueezer   )qkrV   rW   rY   Zunsqueeze_dimZq_embedZk_embedr+   r+   r,   apply_rotary_pos_emb  s
    

r   )r4   n_repr|   c                 C   s^   | j \}}}}|dkr| S | dddddddddf |||||} | ||| ||S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r   N)r7   rR   reshape)r4   r   batchnum_key_value_headsslenhead_dimr+   r+   r,   	repeat_kv/  s
    0r           )modulequerykeyvalueattention_maskscalingdropoutkwargsc                 K   s   t || j}t || j}	t||dd| }
|d urf|d d d d d d d |jd f }|
| }
tjj|
dtj	d
|j}
tjj|
|| jd}
t|
|	}|dd }||
fS )Nr-   r   r}   r.   )rP   r/   )ptrainingr   )r   num_key_value_groupsr#   matmulrT   r7   r   
functionalZsoftmaxr1   r0   r/   r   r   
contiguous)r   r   r   r   r   r   r   r   rx   ry   attn_weightscausal_maskattn_outputr+   r+   r,   eager_attention_forward;  s    
&r   c                       s~   e Zd ZdZeed fddZedddddej	e
ej	ej	f eej	 ee eej e
ej	eej	 f d
ddZ  ZS )Lfm2Attentionz=Multi-headed attention from 'Attention Is All You Need' paperrA   rz   c                    s   t    || _|| _t|d|j|j | _|j|j | _	| jd | _
d| _tj|j|j| j dd| _tj|j|j| j dd| _tj|j|j| j dd| _tj|j| j |jdd| _t| j|jd| _t| j|jd| _d S )Nr   g      TFr]   r(   )r    r!   rA   rz   getattrr'   Znum_attention_headsr   r   r   r   Z	is_causalr   ra   q_projk_projv_projout_projr   norm_epsq_layernormk_layernormr&   rA   rz   r)   r+   r,   r!   X  s    
zLfm2Attention.__init__past_key_valuepast_key_values4.58new_nameversionN)r4   position_embeddingsr   r   r   r|   c                 K   s$  |j d d }g |d| jR }| | |j| dd}	| | |j| dd}
| |j| dd}|\}}t	|	|
||\}	}
|d ur|||d}|
|
|| j|\}
}t}| jjdkrt| jj }|| |	|
||fd| jd|\}}|jg |dR   }| |}||fS )Nr.   r   r-   )rW   rV   r   eagerr   )r   r   )r7   r   r   r   viewrT   r   r   r   r   r   rz   r   rA   Z_attn_implementationr   r   r   r   r   )r&   r4   r   r   r   r   r   Zinput_shapeZhidden_shapeZquery_statesrx   ry   rV   rW   r{   Zattention_interfacer   r   outputr+   r+   r,   r5   g  s8    



zLfm2Attention.forward)NN)r:   r;   r<   r   r   r`   r!   r   r#   rZ   r6   r   rg   r   r5   r=   r+   r+   r)   r,   r   U  s     r   c                 C   sN   |durJ|j d dkrJ|j d dkrJ| j}| |dddddf  |} | S )zm
    Tunes out the hidden states for padding tokens, see https://github.com/state-spaces/mamba/issues/66
    Nr   r   )r7   r/   r0   )r4   r   r/   r+   r+   r,   apply_mask_to_padding_states  s    $ r   c                       s   e Zd Zeed fddZedddddeje	e
 e	ej e	ej d	d
dZedddddeje	e
 e	ej e	ej d	ddZedddddeje	e
 e	ej e	ej dddZ  ZS )Lfm2ShortConvr   c                    s   t    || _|| _|j| _|j| _tj	|j
|j
| j|j
| j| jd d| _tj|j
d|j
 | jd| _tj|j
|j
| jd| _d S )Nr   )Zin_channelsZout_channelsZkernel_sizegroupsr^   paddingr   r]   )r    r!   rA   rz   rp   L_cacheZ	conv_biasr^   r   ZConv1dr'   convra   in_projr   r   r)   r+   r,   r!     s    
zLfm2ShortConv.__init__r   r   r   r   N)rX   r   r   r   c                 C   s  t ||}| |dd}|jddd\}}}|| }| jj| jjd| jjd}	|d ur|d dkrt|	d|j
| j |	| jjd }
|
d}
nL|d urtj|| j|jd  df}|j
| j | t||	| jjd d}
||
 }| |dd }|S )Nr.   r}   r   rO   r   r-   )Z
activation)r   r   rT   chunkr   r$   r   sizer   Zsqueezerq   rz   r^   r   r   r   padr   r7   copy_r   r   r   )r&   rX   r   r   r   BCxBCBxZconv_weightsconv_outrw   yr+   r+   r,   cuda_kernels_forward  s*    
$
z"Lfm2ShortConv.cuda_kernels_forwardc                 C   s  |j d }t||}| |dd}|jddd\}}}|| }	|d ur
|d dkr
|j| j }
|d| jd }|
j	ddd}
|	j
|
j|
jd|
d d d d |f< |j| j |
 tj|

|	j| jjd d dd d f  dd}| jr|| jj7 }|d}nP|d urDtj|	| j|	j d  df}
|j| j |
 | |	d	d |f }|| }|dd }| |}|S )
Nr   r.   r}   r   rO   r   )Zshiftsdims)rL   r/   .)r7   r   r   rT   r   rq   rz   clampr   Zrollr0   rL   r/   r   r#   sumr   r$   r^   r   r   r   r   r   r   )r&   rX   r   r   r   Zseqlenr   r   r   r   rw   r   r   r+   r+   r,   slow_forward  s.    

$0

zLfm2ShortConv.slow_forwardr4   r   r   r   c                 C   s:   t r*d|jjv r*tj s*| ||||S | ||||S )Ncuda)is_fast_path_availablerL   rD   r#   rt   Zis_compilingr   r   )r&   r4   r   r   r   r+   r+   r,   r5     s    zLfm2ShortConv.forward)NNN)NNN)NNN)r:   r;   r<   r   r`   r!   r   r#   rZ   r   rg   r   r   r   r5   r=   r+   r+   r)   r,   r     sB      "   &   r   c                
       sz   e Zd Zeed fddZedddddeje	ejejf e
ej e
ej e
e	ej  e
ej ejd	d
dZ  ZS )Lfm2DecoderLayerr   c                    sl   t    |j| dk| _| jr.t||| _nt||| _t|| _	t
|j|jd| _t
|j|jd| _d S )Nrj   r   )r    r!   rm   is_attention_layerr   	self_attnr   r   r\   feed_forwardr   r'   r   operator_normffn_normr   r)   r+   r,   r!     s    

zLfm2DecoderLayer.__init__r   r   r   r   N)r4   r   r   rY   r   r   r|   c           
   	   K   sl   |}| j r4| jf | ||||||d|\}}	n| j| ||||d}|| }|| | | }|S )N)r4   r   r   rY   r   r   r   )r   r   r   r   r   r   )
r&   r4   r   r   rY   r   r   r   Zresidualrv   r+   r+   r,   r5     s*    
zLfm2DecoderLayer.forward)NNNN)r:   r;   r<   r   r`   r!   r   r#   rZ   r6   r   r   r5   r=   r+   r+   r)   r,   r     s       r   c                   @   sH   e Zd ZU eed< dZdZdgZdgZdZ	dZ
dZdZdZeedZdS )	Lfm2PreTrainedModelrA   modelTr   r   F)r4   
attentionsN)r:   r;   r<   r   r[   Zbase_model_prefixZsupports_gradient_checkpointingZ_no_split_modulesZ_skip_keys_device_placementZ_supports_flash_attnZ_supports_sdpaZ_supports_flex_attnZ_can_compile_fullgraphZ_supports_attention_backendr   r   Z_can_record_outputsr+   r+   r+   r,   r   ?  s   
r   c                       st   e Zd Zed fddZeedeej	 eej
 eej	 ee eej ee eej	 ee ed	ddZ  ZS )		Lfm2Modelr@   c                    s   t     j| _ j| _t j j| j| _t	 fddt
 jD | _t d| _d| _t | _t j jd| _|   d S )Nc                    s   g | ]}t  |qS r+   )r   ).0rz   r@   r+   r,   
<listcomp>Z      z&Lfm2Model.__init__.<locals>.<listcomp>r@   Fr   )r    r!   Zpad_token_idZpadding_idx
vocab_sizer   Z	Embeddingr'   embed_tokensZ
ModuleListrr   rs   layersr>   Z
rotary_embZgradient_checkpointingpos_embr   r   embedding_norm	post_initr&   rA   r)   r@   r,   r!   S  s    
zLfm2Model.__init__N)		input_idsr   rY   r   inputs_embeds	use_cacher   r   r|   c              	   K   s  |d u |d uA rt d|d u r*| |}|rV|d u rV|jd }	t| j|	| j| jd}|d u r|d urn| nd}
tj	|
|
|jd  |jd}|d u r|
d}t| j|||||d}|}| ||}| jd | jj D ] }||f|||||d|}q| |}t||dS )	Nz:You must specify exactly one of input_ids or inputs_embedsr   rh   r   )rL   )rA   Zinput_embedsr   r   r   rY   )r   rY   r   r   r   )last_hidden_stater   )
ValueErrorr   r7   rg   rA   r/   rL   r   r#   Zaranger   r   r   r   rs   r   r   )r&   r   r   rY   r   r   r   r   r   Z
batch_sizer   r   r4   r   Zdecoder_layerr+   r+   r,   r5   d  sV    


	

zLfm2Model.forward)NNNNNNN)r:   r;   r<   r   r!   r   r   r   r#   r   rZ   rg   FloatTensorboolr   r   r   r5   r=   r+   r+   r)   r,   r   Q  s*          r   c                       s   e Zd ZdgZddiZddgdgfiZ fddZeede	e
j e	e
j e	e
j e	e e	e
j e	e
j e	e e	e
j eee
jf ee ed
ddZ  ZS )Lfm2ForCausalLMzlm_head.weightlm_headZcolwise_repr4   logitsc                    s@   t  | t|| _|j| _tj|j|jdd| _| 	  d S )NFr]   )
r    r!   r   r   r   r   ra   r'   r   r   r   r)   r+   r,   r!     s
    
zLfm2ForCausalLM.__init__Nr   )r   r   rY   r   r   labelsr   r   logits_to_keepr   r|   c
              
   K   s   | j f |||||||d|
}|j}t|	tr<t|	 dn|	}| |dd|ddf }d}|dur| jf ||| jjd|
}t	|||j
|j|jdS )a  
        Example:

        ```python
        >>> from transformers import AutoTokenizer, Lfm2ForCausalLM

        >>> model = Lfm2ForCausalLM.from_pretrained("meta-lfm2/Lfm2-2-7b-hf")
        >>> tokenizer = AutoTokenizer.from_pretrained("meta-lfm2/Lfm2-2-7b-hf")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```)r   r   rY   r   r   r   r   N)r   r   r   )lossr   r   r4   r   )r   r   rH   r`   slicer   Zloss_functionrA   r   r   r   r4   r   )r&   r   r   rY   r   r   r   r   r   r   r   outputsr4   Zslice_indicesr   r   r+   r+   r,   r5     s0     zLfm2ForCausalLM.forward)	NNNNNNNNr   )r:   r;   r<   Z_tied_weights_keysZ_tp_planZ_pp_planr!   r   r   r   r#   r   rZ   r   r   r   r   r`   r   r   r   r5   r=   r+   r+   r)   r,   r     s8   	         r   )r   r   r   )Nr   )r   )Etypingr   r   r   r   r#   Ztorch.nn.functionalr   r   rf   Zcache_utilsr   Z
generationr	   Zintegrationsr
   Zmasking_utilsr   Zmodeling_layersr   Zmodeling_outputsr   r   Zmodeling_rope_utilsr   r   Zmodeling_utilsr   r   Zprocessing_utilsr   utilsr   r   r   Zutils.deprecationr   Zutils.genericr   Zutils.import_utilsr   Zconfiguration_lfm2r   Zcausal_conv1dr   r   Moduler   r>   r\   rg   r   r   rZ   r`   r   rQ   r   r   r   Zkernel_modulesallr   r   r   r   r   r   __all__r+   r+   r+   r,   <module>   sj   $ 
 =n0TK