a
    hB6                     @   s  U d dl Z d dlZd dlmZmZ d dlZd dlmZ ejdedZi Z	e
ejjef ed< eh dZeeeegef dd	d
Zeddd'dd d dejejejeej eeeejdddZee eedddZejeeejdddZejejeeee eejdddZeeddddZejejeeee ejddd Zed!dd(dd d d dd"dd#ejejejeej eej eej eeeeee eee eejejejejf d$d%d&ZdS ))    N)CallableOptional)_dtype_mappings_T)boundONNX_ATEN_DECOMP_TABLE>         
      )op_typeopset_versionreturnc                    s   t t d fdd}|S )zDDecorator to register an ONNX operator with a custom implementation.)funcr   c                    sP   d }t jjd  d| dd| }| tttt jj |< ||  |S )NZopsetzonnx::. )Zmutates_args)torchlibraryZ	custom_opr   getattropsZonnxZregister_fake)r   overloadZtorch_opr   r   r   P/var/www/html/assistant/venv/lib/python3.9/site-packages/torch/onnx/ops/_impl.py	decorator   s    

z_onnx_op.<locals>.decorator)r   )r   r   r   r   r   r   _onnx_op   s    r   ZRotaryEmbedding   F)interleaved	num_headsrotary_embedding_dim)x	cos_cache	sin_cacheposition_idsr   r   r   r   c                   sP   j d } j d }t j dkrd j d }	t|dk fdd |	| }
||||
g}t | tt j dkdd   j d }
|dkr|
} d	d	d	d	d	d	d	|f } d	d	d	d	d	d	|d	f }|d }|d	ur|| }|| }n|}|}|d	d	d	d	d	|f }|d	d	d	d	d	|f }t|d}t|d}|r|d	d	d	d	d	d	dd	df }|d	d	d	d	d	d	dd	df }ntj|dd
d\}}|| ||  }|| ||  }|r
t|d
}t|d
}tj||fd
d}t||j }ntj||fd
d}tj||fd
d}t j dkrLt| j }|S )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   r	         c                      s   d j  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape )shaper   r   r   r   <lambda>>       z%rotary_embedding_23.<locals>.<lambda>   c                   S   s   dS )Nzx should be a 4D tensor by nowr   r   r   r   r   r'   C   r(   Ndim)r%   lenr   _checkZreshapeZ	unsqueezechunkcat)r   r    r!   r"   r   r   r   
batch_sizesequence_lengthhidden_size	head_sizeZ	new_shapeZx_rotateZx_not_rotateZrotary_embedding_dim_halfcossinx1Zx2realimagZx_rotate_concatoutputr   r&   r   rotary_embedding_23+   sn    




  "$r;   )scaler4   r   c                 C   s   | dur| S dt | S )z/Get the scale factor for attention computation.Ng      ?)mathsqrt)r<   r4   r   r   r   _get_scale_factor   s    r?   )tensorr1   r   r   c                 C   s:   | j d | j d  }}|| }| ||||dd S )z1Reshape 3D tensor to 4D for multi-head attention.r	   r$   )r%   view	transpose
contiguous)r@   r1   r   r2   r3   r4   r   r   r   _reshape_3d_to_4d   s    rD   )QKcurrent_q_num_headscurrent_kv_num_headsr<   qk_matmul_output_moder   c              	   C   s6   |dkrt | ||||S tt| |ddS dS )z1Get QK output tensor based on the specified mode.r   r*   N)_compute_qk_output_for_mode_0r   Z
zeros_likematmulrB   )rE   rF   rG   rH   r<   rI   r   r   r   _get_qk_output_for_aten_spda   s
    	
rM   )rG   rH   r   c                    s"   t   dk fdd dS )z-Validate Group Query Attention configuration.r   c                      s   d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   r   rH   rG   r   r   r'      r(   z-_validate_gqa_configuration.<locals>.<lambda>N)r   r.   )rG   rH   r   rN   r   _validate_gqa_configuration   s    
rO   )rE   rF   rG   rH   r<   r   c                 C   s`   |}||kr"|| }|j |dd}t|| jd }t|}| | }	|| }
t|	|
ddS )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r	   r+   r#   rJ   r*   )repeat_interleaver?   r%   r=   r>   r   rL   rB   )rE   rF   rG   rH   r<   ZK_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaledr   r   r   rK      s    	
rK   Z	Attention        )	is_causalkv_num_headsq_num_headsrI   r<   softcapsoftmax_precision)rE   rF   V	attn_maskpast_key
past_valuerW   rX   rY   rI   r<   rZ   r[   r   c          (      C   s  d\}}}t | j}| jd }t | jdkrvt|dko>|dkdd  | jd }t| ||} t|||}t|||}tt | jdkot |jdkot |jdkdd  | j| }t|
|}
|d	urtj||g|d
n| }|d	urtj||g|d
n| }|| }}| j| }|j| }| j| }|j| }|dkoh|	dkoh|d	u oh|d	u ph|jtj	k}t
|| |rd	}|d	ur|jtj	kr| n|}tjjj| |||d||
t	||kd}t| ||||
|	}n||kr
|| }|j||d
}|j||d
}tj||| j| jd}|rht|d	u dd  ttj||tj	| jd}|| td}|d	ur|jtj	kr|| td}n|| }t|
| jd } t| }!| |! }"||! }#t|"|#dd}$|$}|$| }%|	dkr|%}|dkr|t|%|  }%|	dkr|%}|d	urr|tv rb|%j}&|%tj| }%tj|%dd
}'|'|&}'ntj|%dd
}'ntj|%dd
}'|	dkr|'}t|'|}|dkr|dd  !||d}||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r	   r$   r#   r   r#   c                   S   s   dS )Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r   r   r   r'      r(   zattention_23.<locals>.<lambda>r	   r)   c                   S   s   dS )Nz'Q, K, and V should be 4D tensors by nowr   r   r   r   r   r'      r(   Nr+   rV   )r]   Z	dropout_prW   r<   Z
enable_gqa)dtypedevicec                   S   s   dS )Nz'Cannot use both is_causal and attn_maskr   r   r   r   r   r'   @  r(   z-infrJ   r*   r$   )"r-   r%   r   r.   rD   r?   r0   cloner`   boolrO   nnZ
functionalZscaled_dot_product_attentionrM   rP   Zzerosra   ZtrilZonesZmasked_fillfloatr=   r>   rL   rB   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ZONNX_DTYPE_TO_TORCH_DTYPEZsoftmaxrC   rA   )(rE   rF   r\   r]   r^   r_   rW   rX   rY   rI   r<   rZ   r[   Znum_head_dimZsequence_dimZhead_dimZinput_shape_lenr1   Zq_sequence_lengthZq_head_sizeZpresent_keyZpresent_valuerG   rH   Zkv_sequence_lengthZcan_use_sdpaZsdpa_attn_maskr:   Z	qk_outputrQ   Z	attn_biasZcausal_maskrR   rS   rT   rU   Zqk_matmul_outputZqk_with_biasZoriginal_dtypeZ
qk_softmaxr   r   r   attention_23   s    



(



















ri   )N)NNN)r=   typingr   r   r   Ztorch.onnx.opsr   TypeVarr   r   dictZ_opsZ
OpOverload__annotations__	frozensetrg   strintr   ZTensorrc   r;   re   r?   rD   rM   rO   rK   tupleri   r   r   r   r   <module>   s   

 U   