a
    hp-                     @   sj  d dl mZmZ d dlZd dlmZ ddlmZmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZmZmZmZm Z m!Z!m"Z" ddl#m$Z$ e%e&Z'dZ(dZ)G dd deZ*G dd deZ+G dd deZ,G dd de Z-G dd deZ.G dd deZ/G dd  d eZ0G d!d" d"eZ1g d#Z2dS )$    )CallableOptionalN   )CacheDynamicCache)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)deprecate_kwarg   )CLIPMLP)LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassification
LlamaModelLlamaRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )	PhiConfigzmicrosoft/phi-1r   c                       sz   e Zd Zeed fddZedddddeje	ejejf e
ej e
e e
ej e	eje
ej f d	d
dZ  ZS )PhiAttentionconfig	layer_idxc                    s   t  || tj|j|j| j dd| _tj|j|j| j dd| _	tj|j|j| j dd| _
tj|j| j |jdd| _| `t| j|j | _|j| _| jrtj|j|j |jdd| _tj|j|j |jdd| _d S )NTZbias)epsZelementwise_affine)super__init__nnLinearhidden_sizeZnum_attention_headshead_dimq_projZnum_key_value_headsk_projv_projdenseZo_projintZpartial_rotary_factorrotary_ndimsqk_layernorm	LayerNormlayer_norm_epsq_layernormk_layernormselfr   r   	__class__ _/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/phi/modular_phi.pyr"   %   s    zPhiAttention.__init__past_key_valuepast_key_values4.58new_nameversionN)hidden_statesposition_embeddingsattention_maskr9   cache_positionreturnc                 K   s  |j d d }g |d| jR }| ||dd}	| ||dd}
| ||dd}| jr| |	}	| 	|
}
|\}}|	dd | j
f |	d| j
d f  }}|
dd | j
f |
d| j
d f  }}t||||\}}tj||fdd}	tj||fdd}
|d ur:|||d}||
|| j|\}
}t}| jjdkrXt| jj }|| |	|
||f| jsrdn| j| jd	|\}}|jg |dR   }| |}||fS )
Nr   r   .)dim)sincosrA   eagerg        )Zdropoutscaling)shaper&   r'   viewZ	transposer(   r)   r-   r0   r1   r,   r   torchcatupdater   r   r   Z_attn_implementationr
   trainingZattention_dropoutrH   Zreshape
contiguousr*   )r3   r>   r?   r@   r9   rA   kwargsZinput_shapeZhidden_shapeZquery_statesZ
key_statesZvalue_statesrF   rE   Z	query_rotZ
query_passZkey_rotZkey_passZcache_kwargsZattention_interfaceZattn_outputZattn_weightsr6   r6   r7   forward6   sN    





zPhiAttention.forward)NN)__name__
__module____qualname__r   r+   r"   r   rK   Tensortupler   r   
LongTensorrQ   __classcell__r6   r6   r4   r7   r   $   s     r   c                   @   s   e Zd ZdS )PhiMLPNrR   rS   rT   r6   r6   r6   r7   rY   u   s   rY   c                       s   e Zd Zeed fddZedddddeje	ej e	ej
 e	eej  e	e e	e e	ej
 e	eejejf  eeje	eejejf  f d
	ddZ  ZS )PhiDecoderLayerr   c                    sH   t    t||d| _t|| _tj|j|j	d| _
t|j| _d S )N)r   r    )r!   r"   r   	self_attnrY   mlpr#   r.   r%   r/   input_layernormDropoutZresid_pdropresid_dropoutr2   r4   r6   r7   r"   z   s
    

zPhiDecoderLayer.__init__r8   r9   r:   r;   NF)	r>   r@   position_idsr9   output_attentions	use_cacherA   r?   rB   c	                 K   sr   |}
|  |}| jf ||||||||d|	\}}| |}| | |}|| |
 }|f}|rn||f7 }|S )N)r>   r@   rb   r9   rc   rd   rA   r?   )r_   r]   ra   r^   )r3   r>   r@   rb   r9   rc   rd   rA   r?   rP   ZresidualZattn_outputsZself_attn_weightsZfeed_forward_hidden_statesoutputsr6   r6   r7   rQ      s*    
	


zPhiDecoderLayer.forward)NNNFFNN)rR   rS   rT   r   r+   r"   r   rK   rU   r   rW   rV   boolFloatTensorrQ   rX   r6   r6   r4   r7   r[   y   s(          r[   c                   @   s   e Zd ZdS )PhiRotaryEmbeddingNrZ   r6   r6   r6   r7   rh      s   rh   c                       sx   e Zd Zed fddZdeej eej eej ee	 eej
 ee ee ee eej ee edddZ  ZS )	PhiModelr   c                    sV   t    t fddt jD | _t j| _	tj
 j jd| _| `d S )Nc                    s   g | ]}t  |qS r6   )r[   ).0r   rj   r6   r7   
<listcomp>       z%PhiModel.__init__.<locals>.<listcomp>r\   )r!   r"   r#   Z
ModuleListrangenum_hidden_layerslayersr`   Z
embd_pdropembed_dropoutr.   r%   r/   final_layernormZnormr3   r   r4   rj   r7   r"      s    zPhiModel.__init__N)	input_idsr@   rb   r9   inputs_embedsrd   rc   output_hidden_statesrA   rP   rB   c
                 K   s  |d ur|n| j j}|d ur |n| j j}|d ur4|n| j j}|d u |d uA rTtd| jrr| jrr|rrtd d}|d u r| 	|}|r|d u rt
| j d}|	d u r|d ur| nd}tj|||jd  |jd}	|d u r|	d}t| j |||	||d}| |}|}| ||}|r d	nd }|r.d	nd }| jd | j j D ]R}|rX||f7 }||f||||||	|d
|
}|d }|rD||d f7 }qD| |}|r||f7 }t||r|nd ||dS )Nz:You must specify exactly one of input_ids or inputs_embedszX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.Frj   r   r   )device)r   Zinput_embedsr@   rA   r9   rb   r6   )r@   rb   r9   rc   rd   rA   r?   )Zlast_hidden_stater9   r>   Z
attentions)r   rc   rv   rd   
ValueErrorZgradient_checkpointingrN   loggerZwarning_onceZembed_tokensr   Zget_seq_lengthrK   ZarangerI   rw   Z	unsqueezer   rq   Z
rotary_embrp   ro   rr   r	   )r3   rt   r@   rb   r9   ru   rd   rc   rv   rA   rP   Zpast_seen_tokensZcausal_maskr>   r?   Zall_hidden_statesZall_self_attnsZdecoder_layerZlayer_outputsr6   r6   r7   rQ      s~    

	

	

zPhiModel.forward)	NNNNNNNNN)rR   rS   rT   r   r"   r   rK   rW   rU   r   rg   rf   r   r   r	   rQ   rX   r6   r6   r4   r7   ri      s.            ri   c                       s   e Zd Z fddZ  ZS )PhiForCausalLMc                    s&   t  | tj|j|jdd| _d S )NTr   )r!   r"   r#   r$   r%   Z
vocab_sizeZlm_headrs   r4   r6   r7   r"     s    zPhiForCausalLM.__init__)rR   rS   rT   r"   rX   r6   r6   r4   r7   rz     s   rz   c                   @   s   e Zd ZdS )PhiForSequenceClassificationNrZ   r6   r6   r6   r7   r{     s   r{   c                   @   s   e Zd ZdS )PhiForTokenClassificationNrZ   r6   r6   r6   r7   r|   #  s   r|   )ZPhiPreTrainedModelri   rz   r{   r|   )3typingr   r   rK   Ztorch.nnr#   Zcache_utilsr   r   Zmasking_utilsr   Zmodeling_layersr   Zmodeling_outputsr	   Zmodeling_utilsr
   Zprocessing_utilsr   utilsr   r   Zutils.deprecationr   Zclip.modeling_clipr   Zllama.modeling_llamar   r   r   r   r   r   r   r   Zconfiguration_phir   Z
get_loggerrR   ry   Z_CHECKPOINT_FOR_DOCZ_CONFIG_FOR_DOCr   rY   r[   rh   ri   rz   r{   r|   __all__r6   r6   r6   r7   <module>   s2   (

Q1k