a
    h5                     @   s   d dl mZ d dlZddlmZmZ ddlmZ ee	Z
e Zd
ejjejejejeej eee ee ee eejdf d
dd	ZdS )    )OptionalN   )_flash_attention_forward!flash_attn_supports_top_left_mask)logging        )
modulequerykeyvalueattention_maskdropoutscalingsliding_windowsoftcapreturnc	                 K   s$  |	 dds|	 dd ur$td |jd }
tdd |jD rJtd|d	d}|d	d}|d	d}d }|jtj	krt
 rt }n0t| jd
r| jj}ntdd |  D jj}|	dd }|d u r| j}t||||f|
|||||t|| jjt| dr| jnd d
|	}|d fS )NZoutput_attentionsFZ	head_maskz`flash_attention_2` does not support `output_attentions=True` or `head_mask`. Please set your attention to `eager` if you want any of these features.r   c                 s   s   | ]}|d kV  qdS )r   N ).0dimr   r   e/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/integrations/flash_attention.py	<genexpr>#       z*flash_attention_forward.<locals>.<genexpr>zTensor query has shape  with a zero dimension.
FlashAttention does not support inputs with dim=0.
Please check your input shapes or use SDPA instead.   _pre_quantization_dtypec                 s   s    | ]}t |tjjr|V  qd S )N)
isinstancetorchnnZLinear)r   layerr   r   r   r   ;   r   	is_causal	layer_idx)
Zquery_lengthr   r   Zsoftmax_scaler   r   Zuse_top_left_masktarget_dtypeZattn_implementationr   )getloggerZwarning_onceshapeany
ValueErrorZ	transposeZdtyper   Zfloat32Zis_autocast_enabledZget_autocast_gpu_dtypehasattrconfigr   nextmodulesweightpopr   r   _use_top_left_maskZ_attn_implementationr   )r   r	   r
   r   r   r   r   r   r   kwargsZseq_lenr    r   Zattn_outputr   r   r   flash_attention_forward   sT    


r.   )r   NNN)typingr   r   Zmodeling_flash_attention_utilsr   r   utilsr   Z
get_logger__name__r"   r,   r   ModuleZTensorfloatinttupler.   r   r   r   r   <module>   s*   
	    