a
    hQ                     @   s  d dl mZmZmZmZ d dlZd dlm  mZ	 d dlmZ ddl
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% ddl&m'Z' e rd dl(m)Z)m*Z* nd\Z)Z*e)e*fZ+e,e+Z-e.e/Z0G dd de"Z1G dd de#Z2G dd dej3Z4G dd dZ5G dd deZ6G dd dej3Z7G d d! d!eZ8G d"d# d#e!Z9G d$d% d%e Z:G d&d' d'eZ;g d(Z<dS ))    )AnyCallableOptionalUnionN)nn   )create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)deprecate_kwarg)is_causal_conv1d_available   )apply_mask_to_padding_states)LlamaAttentionLlamaForCausalLM
LlamaModelLlamaPreTrainedModelLlamaRMSNormLlamaRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )
Lfm2Config)causal_conv1d_fncausal_conv1d_update)NNc                   @   s   e Zd ZdS )Lfm2RMSNormN__name__
__module____qualname__ r$   r$   a/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/lfm2/modular_lfm2.pyr   7   s   r   c                   @   s   e Zd ZdS )Lfm2RotaryEmbeddingNr    r$   r$   r$   r%   r&   ;   s   r&   c                       s*   e Zd Zed fddZdd Z  ZS )Lfm2MLPconfigc                    s   t    |j}|jrXtd| d }|jd urXt|j| }|j||j d |j  }tj|j	|dd| _
tj|j	|dd| _tj||j	dd| _d S )Nr   r   r   Fbias)super__init__intermediate_sizeZblock_auto_adjust_ff_dimintZblock_ffn_dim_multiplierZblock_multiple_ofr   Linearhidden_sizew1w3w2)selfr)   r.   	__class__r$   r%   r-   @   s    

zLfm2MLP.__init__c                 C   s    |  t| || | S N)r4   FZsilur2   r3   )r5   xr$   r$   r%   forwardO   s    zLfm2MLP.forward)r!   r"   r#   r   r-   r;   __classcell__r$   r$   r6   r%   r'   ?   s   r'   c                   @   s   e Zd ZdZdZdZdZdZej	dfe
eejeejedf dddZdejejeeeeef  eejejf ddd	Zejd
ddZdee edddZejeeeef dddZedddZedddZeeejejf dddZdd ZdS ) Lfm2HybridConvCachea  
    Attention and conv cache for Lfm2.

    It stores the Key and Value states as a list of tensors, one for each layer.
    Attention layer cache shape: `[batch_size, num_heads, seq_len, head_dim]`.
    Conv layer cache shape: `[batch_size, hidden_size, L_cache-1]`.
    NFr)   max_batch_sizedtypedevicec                 C   s   g | _ g | _|| _|j| _| jd| _|j| _|| _g | _|d urNt	
|nd }t|jD ]8}t	j| j|j| j| j|d}t	j| | j| q\d S )Nfull_attention)r@   rA   )	key_cachevalue_cacher?   layer_typesindexfirst_attention_layerconv_L_cacheZ_dtype
conv_cachetorchrA   rangenum_hidden_layersZzerosr1   _dynamoZmark_static_addressappend)r5   r)   r?   r@   rA   _
conv_stater$   r$   r%   r-   b   s&    zLfm2HybridConvCache.__init__)
key_statesvalue_states	layer_idxcache_kwargsreturnc                 C   s   |durt | j|krjtt | j|D ](}| jtg  | jtg  q&| j| | j| n`| j|  s|| j|< || j|< n<tj| j| |gdd| j|< tj| j| |gdd| j|< | j| | j| fS )a  
        Updates the cache with the new `key_states` and `value_states` for the layer `layer_idx`.

        Parameters:
            key_states (`torch.Tensor`):
                The new key states to cache.
            value_states (`torch.Tensor`):
                The new value states to cache.
            layer_idx (`int`):
                The index of the layer to cache the states for.
            cache_kwargs (`Dict[str, Any]`, `optional`):
                Additional arguments for the cache subclass. No additional arguments are used in `DynamicCache`.

        Return:
            A tuple containing the updated key and value states.
        Ndim)	lenrC   rK   rN   rJ   ZtensorrD   numelcat)r5   rQ   rR   rS   rT   rO   r$   r$   r%   update   s    
zLfm2HybridConvCache.update)beam_idxc                 C   s   t t| jD ]}| j| j}| j| d||| j|< | j| j}| j| d||| j|< | j| j}| j| d||| j|< qdS )zDReorders the cache for beam search, given the selected beam indices.r   N)rK   rY   rC   rA   Zindex_selecttorD   rI   )r5   r]   rS   rA   r$   r$   r%   reorder_cache   s    z!Lfm2HybridConvCache.reorder_cacher   )rS   rU   c                 C   sL   | j | dkr| jn|}t| j|ks8| j|  dkr<dS | j| jd S )zYReturns the sequence length of the cached states. A layer index can be optionally passed.rB   r   rV   )rE   rG   rY   rC   rZ   shaper5   rS   r$   r$   r%   get_seq_length   s     z"Lfm2HybridConvCache.get_seq_length)cache_positionrS   rU   c                 C   s&   d}|j d }|  }|| }||fS )aB  
        Return a tuple (kv_length, kv_offset) corresponding to the length and offset that will be returned for
        the given layer at `layer_idx`.
        The masks are then prepared according to the given lengths (kv_length, kv_offset) and patterns (i.e. sliding_window, chunk_size),
        for each layer.
        r   )r`   rb   )r5   rc   rS   Zfull_mask_kv_offsetZquery_lengthpast_seen_tokensZ	kv_lengthr$   r$   r%   get_mask_sizes   s
    
z"Lfm2HybridConvCache.get_mask_sizes)
max_lengthc                 C   s   |dk r|   t| }|   |kr(dS tt| jD ]V}| j|  r6| j| dd|ddf | j|< | j| dd|ddf | j|< q6dS )z"Crop the cache to the given lengthr   N.)rb   absrK   rY   rC   rZ   rD   )r5   rf   idxr$   r$   r%   crop   s    "zLfm2HybridConvCache.crop)rU   c                 C   s
   t | jS r8   )rY   rC   )r5   r$   r$   r%   __len__   s    zLfm2HybridConvCache.__len__c                 C   s   | j | | j| fS r8   )rC   rD   ra   r$   r$   r%   __getitem__   s    zLfm2HybridConvCache.__getitem__c                 C   s&   t t| jD ]}| j|   qd S r8   )rK   rY   rI   Zzero_ra   r$   r$   r%   reset   s    zLfm2HybridConvCache.reset)N)r   )r!   r"   r#   __doc__r?   Zis_compileablerC   rD   rJ   Zfloat32r   r/   r@   r   rA   strr-   Tensorr   dictr   tupler\   
LongTensorr_   rb   re   ri   rj   rk   rl   r$   r$   r$   r%   r=   S   s8   	" +r=   c                       sz   e Zd Zeed fddZedddddeje	ejejf e
ej e
e e
ej e	eje
ej f d	d
dZ  ZS )Lfm2Attentionr)   rS   c                    s   t  || tj|j|j| j dd| _tj|j|j| j dd| _	tj|j|j| j dd| _
tj|j| j |jdd| _t| j|jd| _t| j|jd| _| `| `d S )NFr*   eps)r,   r-   r   r0   r1   Znum_attention_headshead_dimq_projZnum_key_value_headsk_projv_projout_projr   norm_epsq_layernormk_layernormZo_projZattention_dropoutr5   r)   rS   r6   r$   r%   r-      s    zLfm2Attention.__init__past_key_valuepast_key_values4.58new_nameversionN)hidden_statesposition_embeddingsattention_maskr   rc   rU   c                 K   s$  |j d d }g |d| jR }| | |j| dd}	| | |j| dd}
| |j| dd}|\}}t	|	|
||\}	}
|d ur|||d}|
|
|| j|\}
}t}| jjdkrt| jj }|| |	|
||fd| jd|\}}|jg |dR   }| |}||fS )Nr   r   )sincosrc   eagerg        )Zdropoutscaling)r`   rw   r}   rx   view	transposer~   ry   rz   r   r\   rS   r   r)   Z_attn_implementationr   r   Zreshape
contiguousr{   )r5   r   r   r   r   rc   kwargsZinput_shapeZhidden_shapeZquery_statesrQ   rR   r   r   rT   Zattention_interfaceZattn_outputZattn_weightsoutputr$   r$   r%   r;      s8    



zLfm2Attention.forward)NN)r!   r"   r#   r   r/   r-   r   rJ   ro   rq   r   r=   rr   r;   r<   r$   r$   r6   r%   rs      s     rs   c                       s   e Zd Zeed fddZedddddeje	e
 e	ej e	ej d	d
dZedddddeje	e
 e	ej e	ej d	ddZedddddeje	e
 e	ej e	ej dddZ  ZS )Lfm2ShortConvrt   c                    s   t    || _|| _|j| _|j| _tj	|j
|j
| j|j
| j| jd d| _tj|j
d|j
 | jd| _tj|j
|j
| jd| _d S )Nr   )Zin_channelsZout_channelsZkernel_sizegroupsr+   paddingr   r*   )r,   r-   r)   rS   rH   L_cacheZ	conv_biasr+   r   ZConv1dr1   convr0   in_projr{   r   r6   r$   r%   r-     s    
zLfm2ShortConv.__init__r   r   r   r   N)r:   r   rc   r   c                 C   s  t ||}| |dd}|jddd\}}}|| }| jj| jjd| jjd}	|d ur|d dkrt|	d|j
| j |	| jjd }
|
d}
nL|d urtj|| j|jd  df}|j
| j | t||	| jjd d}
||
 }| |dd }|S )Nr   rV   r   rW   r   r   )Z
activation)r   r   r   chunkr   weightr   sizer   ZsqueezerI   rS   r+   	unsqueezer   
functionalpadr   r`   copy_r   r{   r   )r5   r:   r   rc   r   BCxBCBxZconv_weightsconv_outrP   yr$   r$   r%   cuda_kernels_forward1  s*    
$
z"Lfm2ShortConv.cuda_kernels_forwardc                 C   s  |j d }t||}| |dd}|jddd\}}}|| }	|d ur
|d dkr
|j| j }
|d| jd }|
j	ddd}
|	j
|
j|
jd|
d d d d |f< |j| j |
 tj|

|	j| jjd d dd d f  dd}| jr|| jj7 }|d}nP|d urDtj|	| j|	j d  df}
|j| j |
 | |	d	d |f }|| }|dd }| |}|S )
Nr   r   rV   r   rW   r   )Zshiftsdims)rA   r@   .)r`   r   r   r   r   rI   rS   clampr   Zrollr^   rA   r@   r   rJ   sumr   r   r+   r   r   r   r   r   r{   )r5   r:   r   rc   r   Zseqlenr   r   r   r   rP   r   r   r$   r$   r%   slow_forwardT  s.    

$0

zLfm2ShortConv.slow_forwardr   r   rc   r   c                 C   s:   t r*d|jjv r*tj s*| ||||S | ||||S )Ncuda)is_fast_path_availablerA   typerJ   rM   Zis_compilingr   r   )r5   r   r   rc   r   r$   r$   r%   r;   {  s    zLfm2ShortConv.forward)NNN)NNN)NNN)r!   r"   r#   r   r/   r-   r   rJ   ro   r   r=   rr   r   r   r;   r<   r$   r$   r6   r%   r     sB      "   &   r   c                
       sz   e Zd Zeed fddZedddddeje	ejejf e
ej e
ej e
e	ej  e
ej ejd	d
dZ  ZS )Lfm2DecoderLayerrt   c                    sl   t    |j| dk| _| jr.t||| _nt||| _t|| _	t
|j|jd| _t
|j|jd| _d S )NrB   ru   )r,   r-   rE   is_attention_layerrs   	self_attnr   r   r'   feed_forwardr   r1   r|   operator_normffn_normr   r6   r$   r%   r-     s    

zLfm2DecoderLayer.__init__r   r   r   r   N)r   r   r   position_idsr   rc   rU   c           
   	   K   sl   |}| j r4| jf | ||||||d|\}}	n| j| ||||d}|| }|| | | }|S )N)r   r   r   r   r   rc   r   )r   r   r   r   r   r   )
r5   r   r   r   r   r   rc   r   ZresidualrO   r$   r$   r%   r;     s*    
zLfm2DecoderLayer.forward)NNNN)r!   r"   r#   r   r/   r-   r   rJ   ro   rq   r   rr   r;   r<   r$   r$   r6   r%   r     s       r   c                   @   s   e Zd ZdZdS )Lfm2PreTrainedModelFN)r!   r"   r#   Z_can_compile_fullgraphr$   r$   r$   r%   r     s   r   c                       sl   e Zd Zed fddZdeej eej eej ee	 eej
 ee eej ee ed	ddZ  ZS )		Lfm2Modelr(   c                    s4   t  | t|| _t|j|jd| _| `| `	d S )Nru   )
r,   r-   r&   pos_embr   r1   r|   embedding_normZnormZ
rotary_emv)r5   r)   r6   r$   r%   r-     s
    
zLfm2Model.__init__N)		input_idsr   r   r   inputs_embeds	use_cacherc   r   rU   c              	   K   s  |d u |d uA rt d|d u r*| |}|rV|d u rV|jd }	t| j|	| j| jd}|d u r|d urn| nd}
tj	|
|
|jd  |jd}|d u r|
d}t| j|||||d}|}| ||}| jd | jj D ] }||f|||||d|}q| |}t||dS )	Nz:You must specify exactly one of input_ids or inputs_embedsr   r>   r   )rA   )r)   Zinput_embedsr   rc   r   r   )r   r   r   rc   r   )Zlast_hidden_stater   )
ValueErrorZembed_tokensr`   r=   r)   r@   rA   rb   rJ   Zaranger   r   r   ZlayersrL   r   r
   )r5   r   r   r   r   r   r   rc   r   Z
batch_sizerd   Zcausal_maskr   r   Zdecoder_layerr$   r$   r%   r;     sV    


	

zLfm2Model.forward)NNNNNNN)r!   r"   r#   r   r-   r   rJ   rr   ro   r=   ZFloatTensorboolr   r   r
   r;   r<   r$   r$   r6   r%   r     s&   	       r   c                   @   s   e Zd ZdS )Lfm2ForCausalLMNr    r$   r$   r$   r%   r     s   r   )r   r   r   )=typingr   r   r   r   rJ   Ztorch.nn.functionalr   r   r9   Zmasking_utilsr   Zmodeling_layersr	   Zmodeling_outputsr
   Zmodeling_utilsr   Zprocessing_utilsr   utilsr   r   Zutils.deprecationr   Zutils.import_utilsr   Zbamba.modeling_bambar   Zllama.modeling_llamar   r   r   r   r   r   r   r   Zconfiguration_lfm2r   Zcausal_conv1dr   r   Zkernel_modulesallr   Z
get_loggerr!   loggerr   r&   Moduler'   r=   rs   r   r   r   r   r   __all__r$   r$   r$   r%   <module>   s@   (

 7n0H