a
    h                  	   @   sd   d dl mZ d dlZd dlmZ ejeejdddZdejejejejeej eedd	d
Z	dS )    )OptionalN)nn)hidden_statesn_repreturnc                 C   s^   | j \}}}}|dkr| S | dddddddddf |||||} | ||| ||S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandZreshape)r   r   batchZnum_key_value_headsslenZhead_dim r   a/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/integrations/eager_paged.py	repeat_kv   s
    0r           )modulequerykeyvalueattention_maskscalingdropoutc                 K   s   | dd }|d ur0|j||| jfi |\}}t|| j}	t|| j}
t||	dd| }|d ur|d d d d d d d |	jd f }|| }t	j
j|dtjd|j}t||
}|dd }||fS )Ncache      )dimdtyper   )popupdateZ	layer_idxr   Znum_key_value_groupstorchmatmulZ	transposer   r   Z
functionalZsoftmaxZfloat32tor   
contiguous)r   r   r   r   r   r   r   kwargsr   Z
key_statesZvalue_statesZattn_weightsZcausal_maskZattn_outputr   r   r   eager_paged_attention_forward   s    
&r%   )r   )
typingr   r    r   ZTensorintr   Modulefloatr%   r   r   r   r   <module>   s    