a
    h                     @   s  d Z ddlZddlmZ ddlmZmZmZ ddlZddl	Zddlm
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZmZmZ ddlm Z  e!e"Z#e rddl$m%Z% ndZ%e rddl&m'Z'm(Z( ddl)m*Z* n
d\Z*Z(Z'e r0ddl+m,Z,m-Z- nd\Z-Z,G dd dZ.G dd de
j/Z0G dd de
j/Z1G dd deZ2eG dd  d eZ3eed!d"G d#d$ d$eZ4eed%d"G d&d' d'eZ5eG d(d) d)e3Z6ed*d"G d+d, d,e3eZ7g d-Z8dS ).zPyTorch MAMBA model.    N)	dataclass)AnyOptionalUnion)nn)CrossEntropyLoss   )ACT2FN)PretrainedConfig)GenerationMixin)GradientCheckpointingLayer)PreTrainedModel)ModelOutputauto_docstringlogging)is_causal_conv1d_availableis_mamba_ssm_availableis_mambapy_available   )MambaConfig)pscan)mamba_inner_fnselective_scan_fn)selective_state_update)NNN)causal_conv1d_fncausal_conv1d_update)NNc                   @   sr   e Zd ZdZdZejdfeeej	e
ejedf dddZeejejejddd	Zeejd
ddZdd ZdS )
MambaCachea  
    Cache for mamba model which does not have attention mechanism and key value states.

    Arguments:
        config (`PretrainedConfig):
            The configuration file defining the shape-related attributes required to initialize the static cache.
        max_batch_size (`int`):
            The maximum batch size with which the model will be used. Note that a new instance must be instantiated if a smaller batch size is used.
        dtype (`torch.dtype`, *optional*, defaults to `torch.float16`):
            The default `dtype` to use when initializing the layer.
        device (`torch.device` or `str`, *optional*):
            The device on which the cache should be initialized. Should be the same as the layer.

    Example:

        ```python
        >>> from transformers import AutoTokenizer, MambaForCausalLM, MambaCache

        >>> model = MambaForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
        >>> tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")

        >>> inputs = tokenizer(text="My name is Mamba", return_tensors="pt")

        >>> # Prepare a cache class and pass it to model's forward
        >>> past_key_values = MambaCache(config=model.config, max_batch_size=1, device=model.device, dtype=model.dtype)
        >>> outputs = model(**inputs, past_key_values=past_key_values, use_cache=True)
        >>> outputs.past_key_values
        MambaCache()
        ```
    TN)configmax_batch_sizedtypedevicec                 C   s   || _ || _|j| _|j| _|j| _g | _g | _|d urBt	
|nd }t|jD ]l}t	j| j | j| j|| jd}t	j| j | j| j|| jd}t	j| t	j| | j| | j| qPd S )Nr    r   )r   Z_dtypeintermediate_size
state_sizessm_state_sizeconv_kernelconv_kernel_sizeconv_states
ssm_statestorchr    rangenum_hidden_layerszeros_dynamoZmark_static_addressappend)selfr   r   r   r    _
conv_state	ssm_state r3   d/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/mamba/modeling_mamba.py__init__^   s6    zMambaCache.__init__)	layer_idxnew_conv_statecache_positionreturnc                 C   s   | j | j|jkr*| j | |j| j |< | j | }|d| jd }|jddd}|j|j|jd|d d d d |f< | j |   | j |  |7  < | j | S )Nr   r   )Zshiftsdimsr!   )r'   r    toclampr&   Zrollr   zero_)r/   r6   r7   r8   r1   r3   r3   r4   update_conv_state   s    
$zMambaCache.update_conv_state)r6   new_ssm_statec                 C   s8   | j |   | j |  || j | j7  < | j | S N)r(   r>   r<   r    )r/   r6   r@   r3   r3   r4   update_ssm_state   s     zMambaCache.update_ssm_statec                 C   s4   t t| jD ] }| j|   | j|   qd S rA   )r*   lenr'   r>   r(   )r/   r6   r3   r3   r4   reset   s    zMambaCache.reset)__name__
__module____qualname____doc__Zis_compileabler)   Zfloat16r
   intr   r   r    strr5   Tensor
LongTensorr?   rB   rD   r3   r3   r3   r4   r   ;   s   &
r   c                       s   e Zd ZdZeed fddZdd Zdej	e
e e
ej e
ej dd	d
Zde
e e
ej e
ej dddZde
e e
ej e
ej dddZ  ZS )
MambaMixeru  
    Compute ∆, A, B, C, and D the state space parameters and compute the `contextualized_states`.
    A, D are input independent (see Mamba paper [1] Section 3.5.2 "Interpretation of A" for why A isn't selective)
    ∆, B, C are input-dependent (this is a key difference between Mamba and the linear time invariant S4,
    and is why Mamba is called **selective** state spaces)
    )r   r6   c                    sh  t    || _|j| _|j| _|j| _|j| _t	|j
| _
|| _|j| _tj| j| j|j|j| j|jd d| _|j| _t|j | _|j| _tj| j| jd |jd| _tj| j| j
| jd  dd| _tj| j
| jdd| _tjd| jd tjdd d d f }|| jd }tt || _!tt"| j| _#tj| j| j|jd| _$|j| _| %  d S )	Nr   )Zin_channelsZout_channelsbiasZkernel_sizegroupspadding   rN   FTr   r:   )&superr5   r   hidden_sizer#   r$   r%   r&   r"   rI   time_step_rankr6   use_conv_biasr   ZConv1dconv1dZ
hidden_act
activationr	   actuse_mambapyLinearuse_biasin_projx_projdt_projr)   arangefloat32expand
contiguous	ParameterlogA_logonesDout_projwarn_slow_implementation)r/   r   r6   A	__class__r3   r4   r5      s<    
	 $zMambaMixer.__init__c                 C   sF   t tttttf}|sB| jr8t r.t	d qBt
dn
t	d d S )Na7  The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)` is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation and https://github.com/Dao-AILab/causal-conv1dzuse_mambapy is set to True but the mambapy package is not installed. To install it follow https://github.com/alxndrTL/mamba.py.a  The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)` is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation and https://github.com/Dao-AILab/causal-conv1d. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py.)allr   r   r   r   r   r[   r   loggerZwarning_onceImportError)r/   is_fast_path_availabler3   r3   r4   rk      s    z#MambaMixer.warn_slow_implementationN)hidden_statescache_paramsr8   attention_maskc                 C   s  |  |dd}| jr|d u rt|| jj| jr8| jjnd | jj| j	j| j
j| jr^| j
j nd t| j  d d | j | j	j dd}nF|jddd\}}|d ur||d }| jj| jjd| jjd}|d ur&|d dkr&t|d|j| j || jj| j}|d}nN|d ur^tj|| j|jd  df}	|| j|	| t ||| jj| jd}|d ur||d }| |dd}
tj!|
| j"| j#| j#gdd\}}}| j	j|dd }t| j  }t$| j	d	r | j	j nd }|d urn|d dkrnt%|j&| j |d
 |d
 ||d d df |d d df | j|d
 |dd
d}nXt'||||dd|dd| j ||ddd
\}}|d ur|d ur|(| j| | 
|dd}|S )Nr   rQ   T)Z
delta_biasdelta_softplusdimr   r:   )rY   rN   ).r   )Zdt_softplus)rv   Zreturn_last_state))r^   	transposetrainingr   rX   weightrW   rN   r_   r`   rj   r]   floatr)   exprg   ri   chunk	unsqueezeviewsizer   squeezer'   r6   rY   r   
functionalpadr&   shaper?   r   splitrV   r$   hasattrr   r(   r   rB   )r/   rs   rt   r8   ru   projected_statescontextualized_statesgateZconv_weightsr'   ssm_parameters	time_stepBCdiscrete_time_steprl   Ztime_proj_biasscan_outputsr2   r3   r3   r4   cuda_kernels_forward   s    

$






zMambaMixer.cuda_kernels_forwardrt   r8   ru   c              	   C   s  |j \}}}|j}| |dd}	|	jddd\}
}|d urL|
|d }
|d urB|j| j  }|	|
j
}|j d | jkrtj|
| j|
j d  df}|| j|| | | |
dd |f }
nr|| j|
|}|	| jjj
}tj|| jjd d dd d f  dd}
| jr*|
| jj7 }
| |
	|d}
n8tj|| j| jf|
j
|d}| | |
dd |f }
|d ur|
|d }
| |
dd}tj|| j| j| jgdd\}}}| |}tj|dd}t| j !  }t|d d d d d d f |d d d d d d d f  }|d d d d d d d f |d d d d d d d f !  }||
d d d d d d d f !  }| j"r| j#r|d u rt$|dd|dd}||d %ddd}||
| j&d d d d f   }|| | }ng }t'|D ]}|d d d d |d d f | |d d d d |d d f  }t(|	||d d |d d f d}|)|d d d d df  qtj*|dd}||
| j&d d d d f   }|| | }|d ur|j| j +| | ,|dd}|S )	Nr   rQ   rw   r   r:   .r!   r   )-r   r   r^   ry   r~   r   r(   r6   cloner<   r    r&   r   r   r   r?   rZ   rX   r{   r)   sumrW   rN   r,   r"   r$   r_   r   rV   r`   Zsoftplusr}   rg   r|   r[   rz   r   r   ri   r*   matmulr.   stackcopy_rj   )r/   Zinput_statesrt   r8   ru   Z
batch_sizeZseq_lenr0   r   r   rs   r   r2   r1   r   r   r   r   r   rl   Z
discrete_AZ
discrete_BZdeltaB_uhsZscan_outputr   ir   r3   r3   r4   slow_forwardP  sp    
(

:<$<* 
zMambaMixer.slow_forwardc                 C   sP   t tttttf}|r@d| jjjj	v r@t
j s@| ||||S | ||||S )Ncuda)ro   r   r   r   r   r   r_   r{   r    typer)   r-   Zis_compilingr   r   )r/   rs   rt   r8   ru   rr   r3   r3   r4   forward  s    zMambaMixer.forward)NNN)NNN)NNN)rE   rF   rG   rH   r   rI   r5   rk   r)   rK   r   r   rL   r   r   r   __classcell__r3   r3   rm   r4   rM      s*   +   f$U   rM   c                       s.   e Zd Zd fdd	Zdd Zdd Z  ZS )	MambaRMSNormư>c                    s&   t    tt|| _|| _dS )zL
        MambaRMSNorm is equivalent to T5LayerNorm and LlamaRMSNorm
        N)rT   r5   r   re   r)   rh   r{   variance_epsilon)r/   rU   epsrm   r3   r4   r5     s    
zMambaRMSNorm.__init__c                 C   sJ   |j }|tj}|djddd}|t|| j  }| j|| S )NrQ   r:   T)Zkeepdim)	r   r<   r)   rb   powmeanZrsqrtr   r{   )r/   rs   Zinput_dtypeZvariancer3   r3   r4   r     s
    zMambaRMSNorm.forwardc                 C   s   | j jd  d| j S )Nr   z, eps=)r{   r   r   r/   r3   r3   r4   
extra_repr  s    zMambaRMSNorm.extra_repr)r   )rE   rF   rG   r5   r   r   r   r3   r3   rm   r4   r     s   r   c                       s@   e Zd Z fddZdee eej eej dddZ  Z	S )
MambaBlockc                    sB   t    || _|| _|j| _t|j|jd| _t	||d| _
d S )Nr   r6   )rT   r5   r   r6   residual_in_fp32r   rU   layer_norm_epsilonnormrM   mixer)r/   r   r6   rm   r3   r4   r5     s    
zMambaBlock.__init__Nr   c                 C   sL   |}|  |j| j jjd}| jr.|tj}| j||||d}|| }|S )NrS   r   )r   r<   r{   r   r   r)   rb   r   )r/   rs   rt   r8   ru   Zresidualr3   r3   r4   r     s    zMambaBlock.forward)NNN)
rE   rF   rG   r5   r   r   r)   rL   r   r   r3   r3   rm   r4   r     s      r   c                   @   s2   e Zd ZU eed< dZddgZdZdZdd Z	dS )	MambaPreTrainedModelr   backboner   rM   Tc                 C   sn  | j j}t|trtjd|jd tjddddf }||j	d
 }|jt| d|j_d|j_|jjd | j jd | j j }| j jdkrtj|jj| n"| j jd	krtj|jj| | tt| j j	t| j jt| j j  t| j j j | j j!d
}|tt"|   }|jj#| d|jj#_$tjj%|j&jt'dd |j&j#durt(|j&j#ddstj)|j&j# tjj%|j*jt'dd | j j+r|j*j}|t'| j j, }t|tj-r.t(|jddstjj.|j|d |j#durjt(|j#ddsjtj)|j# n<t|t/rJ|jjd n t|tj0rjtjj.|j|d dS )zInitialize the weights.r   rS   Nr:   Tg      ?g      Zconstantrandom)min   )a
_no_reinitF)std)1r   Zinitializer_range
isinstancerM   r)   ra   r$   rb   rc   r"   rd   rg   r   rf   Z_no_weight_decayri   dataZfill_rV   Ztime_step_scaleZtime_step_init_schemer   initZ	constant_r`   r{   Zuniform_r}   ZrandmathZtime_step_maxZtime_step_minr=   Ztime_step_floorexpm1rN   r   Zkaiming_uniform_rX   sqrtgetattrZzeros_rj   Zrescale_prenorm_residualr+   r\   Znormal_r   	Embedding)r/   moduler   rl   Zdt_init_stddtZinv_dtpr3   r3   r4   _init_weights  sV    $

z"MambaPreTrainedModel._init_weightsN)
rE   rF   rG   r   __annotations__Zbase_model_prefixZ_no_split_modulesZsupports_gradient_checkpointingZ_is_statefulr   r3   r3   r3   r4   r     s   
r   z,
    Class for the MAMBA model outputs.
    )Zcustom_introc                   @   sJ   e Zd ZU dZdZeej ed< dZ	ee
 ed< dZeeej  ed< dS )MambaOutputa9  
    cache_params (`MambaCache`):
        The state of the model at the last time step. Can be used in a forward method with the next `input_ids` to
        avoid providing the old `input_ids`.

        Includes both the State space model state matrices after the selective scan, and the Convolutional states
    Nlast_hidden_statert   rs   )rE   rF   rG   rH   r   r   r)   FloatTensorr   rt   r   rs   tupler3   r3   r3   r4   r   &  s   
r   zK
    Base class for causal language model (or autoregressive) outputs.
    c                   @   s\   e Zd ZU dZdZeej ed< dZ	eej ed< dZ
ee ed< dZeeej  ed< dS )MambaCausalLMOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Language modeling loss (for next-token prediction).
    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    cache_params (`MambaCache`):
        The state of the model at the last time step. Can be used in a forward method with the next `input_ids` to
        avoid providing the old `input_ids`.

        Includes both the State space model state matrices after the selective scan, and the Convolutional states
    Nlosslogitsrt   rs   )rE   rF   rG   rH   r   r   r)   r   r   r   rt   r   rs   r   r3   r3   r3   r4   r   :  s
   
r   c                       s   e Zd Z fddZdd Zdd Zdd Zedee	j
 ee	j
 ee ee ee ee ee	j
 ee	j
 eeef d
	ddZ  ZS )
MambaModelc                    sn   t    t j j| _t fddt j	D | _
d| _t j jd| _| | j |   d S )Nc                    s   g | ]}t  |d qS )r   )r   ).0idxr   r3   r4   
<listcomp>Y      z'MambaModel.__init__.<locals>.<listcomp>Fr   )rT   r5   r   r   
vocab_sizerU   
embeddingsZ
ModuleListr*   r+   layersgradient_checkpointingr   r   norm_fZ"_register_load_state_dict_pre_hook	load_hook	post_initr/   r   rm   r   r4   r5   U  s     zMambaModel.__init__c                 G   s0   |D ]&}d|v r| |||dd<  q,qd S )Nz
embedding.zembeddings.)popreplace)r/   Z
state_dictprefixargskr3   r3   r4   r   a  s    zMambaModel.load_hookc                 C   s   | j S rA   r   r   r3   r3   r4   get_input_embeddingsg  s    zMambaModel.get_input_embeddingsc                 C   s
   || _ d S rA   r   r/   Znew_embeddingsr3   r3   r4   set_input_embeddingsj  s    zMambaModel.set_input_embeddingsN)		input_idsinputs_embedsrt   	use_cacheoutput_hidden_statesreturn_dictr8   ru   r9   c	                 C   sd  |dur|n| j j}|dur |n| js.| j jnd}|dur>|n| j j}|du |duA r^td|du rp| |}| jr| jr|rd}|r|du rt| j |	d|j
|jd}tjd| j j|j
d}q|du rtdnd}|}	|rdnd}
| jD ]"}||	|||d	}	|r|
|	f }
q| |	}	|r.|
|	f }
|sLtd
d |	||
fD S t|	|rZ|nd|
dS )a  
        cache_params (`MambaCache`, *optional*):
            If passed along, the model uses the previous state in all the blocks (which will give the output for the
            `input_ids` provided as if the model add `state_input_ids + input_ids` as context).
        use_cache (`bool`, *optional*):
            If set to `True`, the `cache_params` is returned and can be used to quickly generate the next logits.
        NFz:You must specify exactly one of input_ids or inputs_embedsr   r!   r    zYou have to specify the `cache_position` manually when `use_cache=True` and `cache_params` is passed, you don't have to pass a `cache_params` if you are in prefilling stage because in that case it will be initialized for you automaticallyr3   r   c                 s   s   | ]}|d ur|V  qd S rA   r3   )r   vr3   r3   r4   	<genexpr>  r   z%MambaModel.forward.<locals>.<genexpr>)r   rt   rs   )r   r   rz   r   use_return_dict
ValueErrorr   r   r   r   r    r   r)   ra   r%   r   r   r   r   )r/   r   r   rt   r   r   r   r8   ru   rs   Zall_hidden_statesZmixer_blockr3   r3   r4   r   m  sT    



zMambaModel.forward)NNNNNNNN)rE   rF   rG   r5   r   r   r   r   r   r)   rL   r   boolr   r   r   r   r   r3   r3   rm   r4   r   S  s0           
r   z
    The MAMBA Model transformer with a language modeling head on top (linear layer with weights tied to the input
    embeddings).
    c                       s   e Zd ZdgZ fddZdd Zdd Zdeee	e
f eee	e
f d	d
dZdee eej eej dddZedeej eej eej ee eej ee ee ee eej eeef d
ddZ  ZS )MambaForCausalLMzlm_head.weightc                    s8   t  | t|| _tj|j|jdd| _| 	  d S )NFrR   )
rT   r5   r   r   r   r\   rU   r   lm_headr   r   rm   r3   r4   r5     s    
zMambaForCausalLM.__init__c                 C   s
   | j  S rA   )r   r   r   r3   r3   r4   r     s    z%MambaForCausalLM.get_input_embeddingsc                 C   s   | j |S rA   )r   r   r   r3   r3   r4   r     s    z%MambaForCausalLM.set_input_embeddingsr   )outputsmodel_kwargsnum_new_tokensr9   c                 K   s   | dd |d< | ddrHd|v rH|d d urH|d dd  | |d< d|v r~|d }tj|||jd dfgdd	|d< |S )
Nrt   r   Tr8   r:   ru   r   r   rw   )getr)   catZnew_onesr   )r/   r   r   r   kwargsru   r3   r3   r4   #_update_model_kwargs_for_generation  s    


z4MambaForCausalLM._update_model_kwargs_for_generationNr   c           
      K   s   d|  i}|rn|d u rntjd| jjj|jd}|d urLd|i}|d}	n
|d}	t| jj|	| j| j	d}|r|d dkr|d d df 
d  |d< d }|s|d urd|i}|||||d |S )Nr   r   r   r   r!   r:   )rt   r   r8   ru   )rd   r)   ra   r   r   r%   r    r   r   r   r   update)
r/   r   r   r   rt   r8   ru   r   Zmodel_inputsr   r3   r3   r4   prepare_inputs_for_generation  s*    
z.MambaForCausalLM.prepare_inputs_for_generation)
r   ru   r   rt   labelsr   r   r   r8   r9   c
              
   K   s   |dur|n| j j}| j|||||||	|d}|d }| || jjj }d}|dur||j}|dddddf 	 }|dddf 	 }t
 }||d|d|d}|s|f|dd  }|dur|f| S |S t|||j|jdS )aS  
        cache_params (`MambaCache`, *optional*):
            If passed along, the model uses the previous state in all the blocks (which will give the output for the
            `input_ids` provided as if the model add `state_input_ids + input_ids` as context).
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set
            `labels = input_ids` Indices are selected in `[-100, 0, ..., config.vocab_size]` All labels set to `-100`
            are ignored (masked), the loss is only computed for labels in `[0, ..., config.vocab_size]`
        use_cache (`bool`, *optional*):
            If set to `True`, the `cache_params` is returned and can be used to quickly generate the next logits.
        N)rt   r   r   r   r   r8   ru   r   .r:   r   )r   r   rt   rs   )r   r   r   r   r<   r{   r   r|   r    rd   r   r   r   r   rt   rs   )r/   r   ru   r   rt   r   r   r   r   r8   r   Zmamba_outputsrs   r   r   Zshift_logitsZshift_labelsZloss_fctoutputr3   r3   r4   r     s:    
zMambaForCausalLM.forward)r   )NNNNN)	NNNNNNNNN)rE   rF   rG   Z_tied_weights_keysr5   r   r   r   dictrJ   r   rI   r   r   r   r)   rL   r   r   r   r   rK   r   r   r   r   r   r3   r3   rm   r4   r     sR    
     *         
r   )r   r   r   r   )9rH   r   dataclassesr   typingr   r   r   r)   Ztorch.utils.checkpointr   Ztorch.nnr   Zactivationsr	   Zconfiguration_utilsr
   Z
generationr   Zmodeling_layersr   Zmodeling_utilsr   utilsr   r   r   Zutils.import_utilsr   r   r   Zconfiguration_mambar   Z
get_loggerrE   rp   Zmambapy.pscanr   Z&mamba_ssm.ops.selective_scan_interfacer   r   Z+mamba_ssm.ops.triton.selective_state_updater   Zcausal_conv1dr   r   r   ModulerM   r   r   r   r   r   r   r   __all__r3   r3   r3   r4   <module>   sh   

e  Di 