a
    h                     @   s  d dl mZmZ d dlZd dlmZ d dlmZ ddlmZm	Z	 ddl
mZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' ddl(m)Z) e*e+Z,G dd de#Z-G dd deZ.G dd deZ/G dd de%Z0G dd de$Z1G dd de Z2G dd  d e"Z3G d!d" d"e!Z4G d#d$ d$ee0Z5g d%Z6dS )&    )CallableOptionalN)nn)check_model_inputs   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)GenericForQuestionAnswering)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)deprecate_kwarg   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward   )MistralConfigc                       s   e Zd Z fddZ  ZS )
MistralMLPc                    sR   t  | tj| j| jdd| _tj| j| jdd| _tj| j| jdd| _d S )NFZbias)	super__init__r   Linearhidden_sizeZintermediate_sizeZ	gate_projZup_projZ	down_proj)selfconfig	__class__ g/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/mistral/modular_mistral.pyr$   &   s    zMistralMLP.__init__)__name__
__module____qualname__r$   __classcell__r+   r+   r)   r,   r!   %   s   r!   c                       s   e Zd Zeed fddZedddddeje	ejejf e
ej e
e e
ej ee e	eje
ej f d	d
dZ  ZS )MistralAttentionr(   	layer_idxc                    s   t  || t|dd p$|j|j | _tj|j|j| j dd| _tj|j|j	| j dd| _
tj|j|j	| j dd| _tj|j| j |jdd| _d S )Nhead_dimFr"   )r#   r$   getattrr&   Znum_attention_headsr4   r   r%   q_projZnum_key_value_headsk_projv_projo_projr'   r(   r3   r)   r+   r,   r$   .   s    zMistralAttention.__init__Zpast_key_valuepast_key_valuesz4.58)new_nameversionN)hidden_statesposition_embeddingsattention_maskr;   cache_positionkwargsreturnc                 K   s0  |j d d }g |d| jR }| ||dd}	| ||dd}
| ||dd}|\}}t|	|
||\}	}
|d ur|||d}||
|| j	|\}
}t
}| jjdkrt| jj }|| |	|
||f| jsdn| j| jt| jdd d|\}}|jg |dR   }| |}||fS )	Nr   r   )sincosrA   eagerg        sliding_window)ZdropoutscalingrH   )shaper4   r6   viewZ	transposer7   r8   r   updater3   r   r(   Z_attn_implementationr   ZtrainingZattention_dropoutrI   r5   Zreshape
contiguousr9   )r'   r>   r?   r@   r;   rA   rB   Zinput_shapeZhidden_shapeZquery_statesZ
key_statesZvalue_statesrF   rE   Zcache_kwargsZattention_interfaceZattn_outputZattn_weightsr+   r+   r,   forward6   s:    
	

zMistralAttention.forward)NN)r-   r.   r/   r    intr$   r   torchTensortupler   r   
LongTensorr   r   rN   r0   r+   r+   r)   r,   r1   -   s     r1   c                       s$   e Zd Zeed fddZ  ZS )MistralDecoderLayerr2   c                    s*   t  || t||d| _t|| _d S )Nr2   )r#   r$   r1   Z	self_attnr!   Zmlpr:   r)   r+   r,   r$   e   s    zMistralDecoderLayer.__init__)r-   r.   r/   r    rO   r$   r0   r+   r+   r)   r,   rT   d   s   rT   c                   @   s   e Zd ZeedZdS )MistralPreTrainedModel)r>   Z
attentionsN)r-   r.   r/   rT   r1   Z_can_record_outputsr+   r+   r+   r,   rU   k   s   rU   c                   @   s^   e Zd Zeedeej eej eej ee	 eej
 ee eej ee ed	ddZdS )MistralModelN)		input_idsr@   position_idsr;   inputs_embeds	use_cacherA   rB   rC   c              
   K   s  |d u |d uA rt d|d u r*| |}|rB|d u rBt| jd}|d u rz|d urZ| nd}	tj|	|	|jd  |jd}|d u r|	d}| jj
d u rtnt}
|
| j|||||d}|}| ||}| jd | jj D ]"}||f||||||d|}q| |}t||r|nd dS )	Nz:You must specify exactly one of input_ids or inputs_embeds)r(   r   r   )device)r(   Zinput_embedsr@   rA   r;   rX   )r@   rX   r;   rZ   rA   r?   )Zlast_hidden_stater;   )
ValueErrorZembed_tokensr   r(   Zget_seq_lengthrP   ZarangerJ   r[   Z	unsqueezerH   r	   r
   Z
rotary_embZlayersZnum_hidden_layersZnormr   )r'   rW   r@   rX   r;   rY   rZ   rA   rB   Zpast_seen_tokensZmask_functionZcausal_maskr>   r?   Zdecoder_layerr+   r+   r,   rN   s   sT    

	

zMistralModel.forward)NNNNNNN)r-   r.   r/   r   r   r   rP   rS   rQ   r   ZFloatTensorboolr   r   r   rN   r+   r+   r+   r,   rV   r   s(          rV   c                   @   s   e Zd ZdS )MistralForCausalLMNr-   r.   r/   r+   r+   r+   r,   r^      s   r^   c                   @   s   e Zd ZdS )MistralForTokenClassificationNr_   r+   r+   r+   r,   r`      s   r`   c                   @   s   e Zd ZdS ) MistralForSequenceClassificationNr_   r+   r+   r+   r,   ra      s   ra   c                   @   s   e Zd ZdS )MistralForQuestionAnsweringNr_   r+   r+   r+   r,   rb          rb   )r^   rb   rV   rU   ra   r`   )7typingr   r   rP   r   Ztransformers.utils.genericr   Zcache_utilsr   r   Zmasking_utilsr	   r
   Zmodeling_flash_attention_utilsr   Zmodeling_layersr   Zmodeling_outputsr   Zmodeling_utilsr   Zprocessing_utilsr   utilsr   r   r   Zutils.deprecationr   Zllama.modeling_llamar   r   r   r   r   r   r   r   r   r   Zconfiguration_mistralr    Z
get_loggerr-   loggerr!   r1   rT   rU   rV   r^   r`   ra   rb   __all__r+   r+   r+   r,   <module>   s2   0
7?