a
    h&                     @   s  d Z ddlmZmZmZ ddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddlm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( ddl)m*Z* e+e,Z-G dd dej.Z/G dd de Z0G dd de!Z1G dd de&Z2G dd de%Z3G dd de"Z4G d d! d!e#Z5G d"d# d#e$Z6g d$Z7dS )%zPyTorch Starcoder2 model.    )CallableOptionalUnionN)nn)check_model_inputs   )ACT2FN)CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)deprecate_kwarg   )	MistralAttentionMistralDecoderLayerMistralForCausalLM MistralForSequenceClassificationMistralForTokenClassificationMistralModelMistralRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )Starcoder2Configc                       s>   e Zd Zed fddZeeej  ejdddZ	  Z
S )Starcoder2MLPconfigc                    sT   t    |j}tj||j|jd| _tj|j||jd| _t	|j
 | _|j| _d S )NZbias)super__init__hidden_sizer   LinearZintermediate_sizeuse_biasc_fcc_projr   Z
hidden_actactresidual_dropout)selfr"   Z	embed_dim	__class__ m/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/starcoder2/modular_starcoder2.pyr%   9   s    
zStarcoder2MLP.__init__)hidden_statesreturnc                 C   s8   |  |}| |}| |}tjj|| j| jd}|S )Nptraining)r)   r+   r*   r   
functionaldropoutr,   r6   )r-   r2   r0   r0   r1   forwardA   s
    


zStarcoder2MLP.forward)__name__
__module____qualname__r   r%   r   tupletorchFloatTensorr9   __classcell__r0   r0   r.   r1   r    8   s   r    c                       s   e Zd Zdeee d fddZedddddej	e
ej	ej	f eej	 ee eej ee e
ej	eej	 ee
ej	  f d	d
dZ  ZS )Starcoder2AttentionNr"   	layer_idxc                    s   t  j||d |j| _tj|j|j| j |jd| _	tj|j|j
| j |jd| _tj|j|j
| j |jd| _tj|j| j |j|jd| _d S )NrB   r#   )r$   r%   r,   r   r'   r&   Znum_attention_headshead_dimr(   q_projZnum_key_value_headsk_projv_projo_projr-   r"   rC   r.   r0   r1   r%   J   s    zStarcoder2Attention.__init__Zpast_key_valuepast_key_valuesz4.58)new_nameversion)r2   position_embeddingsattention_maskrJ   cache_positionkwargsr3   c                 K   sF  |j d d }g |d| jR }| ||dd}	| ||dd}
| ||dd}|\}}t|	|
||\}	}
|d ur|||d}||
|| j	|\}
}t
}| jjdkrt| jj }|| |	|
||f| jsdn| j| jt| jdd d|\}}|jg |dR   }| |}tjj|| j| jd	}||fS )
Nr   r   )sincosrO   eagerg        sliding_window)r8   scalingrU   r4   )shaperD   rE   viewZ	transposerF   rG   r   updaterC   r   r"   Z_attn_implementationr   r6   Zattention_dropoutrV   getattrZreshape
contiguousrH   r   r7   r8   r,   )r-   r2   rM   rN   rJ   rO   rP   Zinput_shapeZhidden_shapeZquery_statesZ
key_statesZvalue_statesrS   rR   Zcache_kwargsZattention_interfaceZattn_outputZattn_weightsr0   r0   r1   r9   R   s@    
	


zStarcoder2Attention.forward)N)NN)r:   r;   r<   r   r   intr%   r   r>   Tensorr=   r	   
LongTensorr   r   r9   r@   r0   r0   r.   r1   rA   I   s     rA   c                       s$   e Zd Zeed fddZ  ZS )Starcoder2DecoderLayerrB   c                    sR   t  || t||d| _t|| _tj|j|j	d| _
tj|j|j	d| _d S )NrB   eps)r$   r%   rA   Z	self_attnr    Zmlpr   	LayerNormr&   norm_epsilonZinput_layernormZpost_attention_layernormrI   r.   r0   r1   r%      s
    
zStarcoder2DecoderLayer.__init__)r:   r;   r<   r   r\   r%   r@   r0   r0   r.   r1   r_      s   r_   c                   @   s   e Zd ZdS )Starcoder2RotaryEmbeddingNr:   r;   r<   r0   r0   r0   r1   rd      s   rd   c                       s~   e Zd Zed fddZedeej eej	 eej ee
eeej f  eej ee eej ee ed	ddZ  ZS )	Starcoder2Modelr!   c                    sL   t    t fddt jD | _tj j j	d| _
 j| _d S )Nc                    s   g | ]}t  |qS r0   )r_   ).0rC   r!   r0   r1   
<listcomp>       z,Starcoder2Model.__init__.<locals>.<listcomp>r`   )r$   r%   r   Z
ModuleListrangenum_hidden_layerslayersrb   r&   rc   normembedding_dropout)r-   r"   r.   r!   r1   r%      s    zStarcoder2Model.__init__N)		input_idsrN   position_idsrJ   inputs_embeds	use_cacherO   rP   r3   c              
   K   s2  |d u |d uA rt d|d u r*| |}|rB|d u rBt| jd}|d u rz|d urZ| nd}	tj|	|	|jd  |jd}|d u r|	d}| jj
d u rtnt}
|
| j|||||d}|}tjj|| j| jd}| ||}| jd | jj D ]"}||f||||||d|}q| |}t||r*|nd d	S )
Nz:You must specify exactly one of input_ids or inputs_embedsr!   r   r   )device)r"   Zinput_embedsrN   rO   rJ   rp   r4   )rN   rp   rJ   rr   rO   rM   )Zlast_hidden_staterJ   )
ValueErrorZembed_tokensr
   r"   Zget_seq_lengthr>   ZarangerW   rs   Z	unsqueezerU   r   r   r   r7   r8   rn   r6   Z
rotary_embrl   rk   rm   r   )r-   ro   rN   rp   rJ   rq   rr   rO   rP   Zpast_seen_tokensZmask_functionZcausal_maskr2   rM   Zdecoder_layerr0   r0   r1   r9      sZ    

	

zStarcoder2Model.forward)NNNNNNN)r:   r;   r<   r   r%   r   r   r>   r^   r]   r   r	   listr?   boolr   r   r   r9   r@   r0   r0   r.   r1   rf      s(          rf   c                   @   s   e Zd ZdS )Starcoder2ForCausalLMNre   r0   r0   r0   r1   rw      s   rw   c                   @   s   e Zd ZdS )#Starcoder2ForSequenceClassificationNre   r0   r0   r0   r1   rx      s   rx   c                   @   s   e Zd ZdS ) Starcoder2ForTokenClassificationNre   r0   r0   r0   r1   ry      s   ry   )rw   rf   ZStarcoder2PreTrainedModelrx   ry   )8__doc__typingr   r   r   r>   Ztorch.utils.checkpointr   Ztransformers.utils.genericr   Zactivationsr   Zcache_utilsr	   r
   Zmasking_utilsr   r   Zmodeling_flash_attention_utilsr   Zmodeling_outputsr   Zmodeling_utilsr   Zprocessing_utilsr   utilsr   r   Zutils.deprecationr   Zmistral.modeling_mistralr   r   r   r   r   r   r   r   r   Zconfiguration_starcoder2r   Z
get_loggerr:   loggerModuler    rA   r_   rd   rf   rw   rx   ry   __all__r0   r0   r0   r1   <module>   s4   ,
;	L