a
    h                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlmZ d dlmZmZ d dlmZmZmZ g dZg dZee Zg dZed	g Zee Zed	g Zd
dgZdge dg e d	g Zeeeh d Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'dd  Z(d!d" Z)d#d$ Z*d%d& Z+d@ee d'd(d)Z,dAej-eeej.f d*d+d,Z/e0ej1d*d-d.Z2e0d/d0d1Z3d2d3 Z4dBd5d6Z5dCd7d8Z6e
j7j8e5  e6 d9Z9e:e:d:d;d<Z;eej.ej<j=jf ej.d=d>d?Z>dS )D    N)knobs)RandomState)OptionalUnion)TensorWrapperreinterprettype_canonicalisation_dict)int8Zint16Zint32int64)uint8Zuint16uint32uint64)Zfloat16float32float64bfloat16Zfloat8_e4m3fnZfloat8_e5m2boolr   >   r   r   r
   c                   C   s   t jdddkS )NZTRITON_INTERPRET01)osenvironget r   r   T/var/www/html/assistant/venv/lib/python3.9/site-packages/triton/_internal_testing.pyis_interpreter   s    r   c                   C   s   t  r
d S tjjj S N)r   tritonruntimeZdriverZactiveget_current_targetr   r   r   r   r      s    r   c                  C   s   t  } | d u rdS | jdkS )NFcudar   backendtargetr   r   r   is_cuda$   s    r#   c                   C   s   t  otj d dkS )Nr   	   )r#   torchr   get_device_capabilityr   r   r   r   	is_hopper)   s    r'   c                  C   s   t  } | d u rdS | jdkS )NFhipr   r!   r   r   r   is_hip-   s    r)   c                  C   s"   t  } | d uo | jdko | jdkS )Nr(   Zgfx90ar   r    archr!   r   r   r   is_hip_cdna22   s    r,   c                  C   s"   t  } | d uo | jdko | jdkS )Nr(   Zgfx942r*   r!   r   r   r   is_hip_cdna37   s    r-   c                  C   s"   t  } | d uo | jdko | jdkS )Nr(   Zgfx950r*   r!   r   r   r   is_hip_cdna4<   s    r.   c                  C   s,   t  } t| j | d uo*| jdko*d| jv S )Nr(   Zgfx12)r   printr+   r    r!   r   r   r   is_hip_gfx12A   s    
r0   c                   C   s   t  pt pt S r   )r,   r-   r.   r   r   r   r   is_hip_cdnaG   s    r1   c                  C   s   t  } | d u rdS | jdkS )NFZxpur   r!   r   r   r   is_xpuK   s    r2   c                  C   s   t  } | d u rdS t| jS )N )r   strr+   r!   r   r   r   get_archP   s    r5   )rsc                 C   s@  t | tr| f} |du r"tdd}|tt v rttt|}|du rL|jn
t	||j}|du rf|j	n
t||j	}tt|}|j
||| |d}d||dk< |S |rd|v r|j
dd	| tjd}|S |tv r|dd| |S |d
kr|dd| ddtd@ dS |dv r.|dd| dkS td| dS )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
    N   )seed)dtype   r   float8   (   r   r   r   l      )r   Zint1Zbool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr	   float_dtypesnormalastypeviewr   RuntimeError)shapeZ	dtype_strr6   lowhighrC   r9   xr   r   r   numpy_randomU   s,    



*
rQ   )rP   returnc                 C   s   | j j}|tv rD|d}| tt|}ttj	||dtt
|S |rjd|v rjttj	| |dtt
|S |dkr|dkrtj	| |d S tj	| |dS dS )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicer;   r   r   N)r9   namerA   lstriprJ   rD   rB   r   r%   Ztensortlr   )rP   rT   Zdst_typetZsigned_type_nameZx_signedr   r   r   	to_tritons   s    
rY   c                 C   s   t t|  S r   )rW   Z	str_to_tyr   rP   r   r   r   str_to_triton_dtype   s    r[   )rR   c                 C   sP   t | tjjr| jS t | tjr:tdt| }|	dS t
dt|  d S )Nz^torch\.(\w+)$r:   znot a triton or torch dtype: )r>   r   languager9   rU   r%   rematchr4   group	TypeErrortype)r9   mr   r   r   torch_dtype_name   s    
rc   c                 C   sp   t | tr*| j  ttt| j	S t | t
jr^| j	t
ju rR|    S |   S td|  d S )Nz Not a triton-compatible tensor: )r>   r   basecpunumpyrJ   rD   rB   rc   r9   r%   Tensorr   float
ValueErrorrZ   r   r   r   to_numpy   s    
 rj   Fc                 C   sl   t  r
dS t sdS tjjj}| r&dnd}ttt|	d}t
|dksRJ |tj d dkoj||kS )	NTF)   r   )rk      .   r   r$   )r   r#   r   ZnvidiaZptxasversiontuplemapr?   splitlenr%   r   r&   )
byval_onlyZcuda_versionZmin_cuda_versionZcuda_version_tupler   r   r   supports_tma   s    
ru   c                 C   s   | rdS dS d S )NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r   )rt   r   r   r   tma_skip_msg   s    rv   )reason)sizealignc                 C   s   t j| t jddS )Nr   )r9   rT   )r%   emptyr	   )rx   ry   _r   r   r   default_alloc_fn   s    r|   )rX   rR   c                 C   s   t | tjjjr| jS | S r   )r>   r   r   jitr   rd   )rX   r   r   r   unwrap_tensor   s    r~   )NNN)N)F)F)?r   r]   rf   rB   r%   r   Ztriton.languager\   rW   r   ZpytestZnumpy.randomr   typingr   r   Ztriton.runtime.jitr   r   r   r@   rA   Zintegral_dtypesrH   Zfloat_dtypes_with_bfloat16ZdtypesZdtypes_with_bfloat16Ztorch_float8_dtypesZtorch_dtypessortedsetZ
tma_dtypesr   r   r#   r'   r)   r,   r-   r.   r0   r1   r2   r5   rQ   Zndarrayrg   rY   r4   r9   r[   rc   rj   ru   rv   markZskipifZrequires_tmar?   r|   r   r}   r~   r   r   r   r   <module>   sT   



