a
    h-                     @   s,   d Z ddlZG dd dZG dd dZdS )aN  
Helper classes for working with low precision floating point types that
align with the opencompute (OCP) microscaling (MX) specification.
  * MXFP4Tensor: 4-bit E2M1 floating point data
  * MXScaleTensor: 8-bit E8M0 floating point data
Reference: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
    Nc                   @   s>   e Zd ZdddZdd Zdd Zdd	 Zd
d Zdd ZdS )MXFP4TensorNc                 C   sd   || _ |dur8t|tjs"J d|j | _ | || _n(|durXt|trN|n|f| _ntddS )at  
        Tensor class for working with four bit E2M1 floating point data as defined by the
        opencompute microscaling specification.


        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp4e2m1 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        N%Parameter data must be a torch tensor.Either parameter data or size must be provided	device
isinstancetorchZTensor_from_floatdatatuplesize
ValueErrorselfr
   r   r    r   M/var/www/html/assistant/venv/lib/python3.9/site-packages/triton/tools/mxfp.py__init__   s    zMXFP4Tensor.__init__c                 C   sp   t jdd| jt j| jd}t jdd| jt j| jd}t jdd| jt j| jd}|d> |d> B |B t j| _| S )Nr      r   dtyper            )r   randintr   uint8r   typer
   )r   SEMr   r   r   random#   s
    zMXFP4Tensor.randomc                 C   s
  |t jksJ d| j}|d? d@ |}|d? d@ |}|d@ |}t |}|dk|dk@ }| }| r|| }	|| }
|| }t d|	}t |
dk|
|
d }t |
dk|d d|d  }|t d| | }|||< |||dk@   d9  < |t jS )	z
        Convert fp4e2m1 data to float32.

        Returns:
        - A torch tensor of type dtype representing the fp4e2m1 data.
        zCCurrently only float32 is supported for fp4e2m1 to float conversionr   r   r         ?      ?r   )r   float32r
   r   Z
zeros_likeanypowwhere)r   r   r
   r   r   r   valueis_zeroZnon_zero_maskZS_nzZE_nzZM_nzsignexponentZmantissaZvalue_nzr   r   r   to+   s&    
zMXFP4Tensor.toc                 C   sV  t |t j}t |}|dk}t |t |B }t jg dt j| jd}t jddgt j| jd}g }g }	g }
|D ]}|dkrd}|D ]6}|d }|d|  }|	| |		| |
	| qqx|
 d }|D ]:}d|d  }|d|  }|	| |		| |
	| qqxt j|t j| jd}t j|	t j| jd}	t j|
t j| jd}
|d}|jd }|d}| 
 }|||d< t ||d }t j|dd	d
\}}||k}| dkr|
d|d}|dkt j}||d  }t j|dd}|	| }|
| }||j}||j}d||< d||< |d> |d> B |B t jS )a5  
        Convert float32 numbers to mxf4 e2m1 format.
        * No encodings are reserved for Inf or NaN in mxf4.
        * Conversion from float supports roundTiesToEven rounding mode.
        * If a value exceeds the mxf4 representable range after rounding,
          clamps to the maximum mxf4 magnitude, preserving the sign.
        * If a value has magnitude less than the minimum subnormal magnitude
          in mxf4 after rounding, converts to zero.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to fp4 format.
        r   )r   r   r   r   r   r   r   r!   r   r"   r    T)dimZkeepdimgư>r-   r   )r   Zsignbitr   r   absisnanisinftensorr   appenditemr#   viewshapeZ	unsqueezemaxminsumexpandint32Zargmin)r   valuesr   Z
abs_valuesr(   
is_invalidZE_bitsZM_bitsZcandidate_valuesZcandidate_EZcandidate_Mr   r*   r   Zsignificandr'   
candidatesZabs_values_flatNZabs_values_expandedZmax_candidate_valueerrorsZ
min_errors_Zis_tieZM_bits_expandedZtie_breakerZbest_indicesZ
E_selectedZ
M_selectedr   r   r   r	   N   s`    







zMXFP4Tensor._from_floatc                 C   s   | j }d|  kr|jk s&n J d||}|d d }|d dkrdgd|j  }|j| d d d }d||< tjjj||ddd}t|j}|||< |	|d d |j
| }||d d}||d d}	|	d> |B }
|
S )a  
        Packs two e2m1 elements into a single uint8 along the specified dimension.

        Parameters:
        - dim: The dimension along which to pack the elements.

        Returns:
        - A torch tensor of dtype uint8 with two e2m1 elements packed into one uint8.
        r   zHThe dimension to pack along is not within the range of tensor dimensionsr   r   Zconstant)moder'   r   )r
   ndimr   r   nnZ
functionalpadlistr6   insertreshapeselect)r   r-   r
   Zsize_along_dimZnew_size_along_dimZ	pad_sizesZ	pad_index	new_shapelowhighpackedr   r   r   to_packed_tensor   s&    



zMXFP4Tensor.to_packed_tensorc                 C   s   |d? d@ }|d@ }t j||f|d d}t|j}|d| || d g ||d d  }|j| }	|| d dkrtdg|	j }
td|| |
|< |	t|
 }	|	t j	S )a  
        Unpacks a tensor where two fp4 elements are packed into a single uint8.

        Parameters:
        - packed_tensor: The packed tensor
        - dim: The dimension along which the tensor was packed.
        - original_shape: The shape of the original tensor before packing.

        Returns:
        - A tensor with the original data unpacked into uint8 elements containing one
          fp4e2m1 element in the least significant bits.
        r      r   r.   Nr   r   )
r   stackrF   r6   rH   slicerC   r   r   r   )r   Zpacked_tensorr-   Zoriginal_shaperL   rK   Zstackedr6   rJ   r
   indicesr   r   r   unpack_packed_tensor   s    
*
z MXFP4Tensor.unpack_packed_tensor)NNN)	__name__
__module____qualname__r   r   r+   r	   rN   rS   r   r   r   r   r      s   
#X#r   c                   @   s0   e Zd Zd
ddZdddZdd Zdd	 ZdS )MXScaleTensorNc                 C   sd   || _ |dur8t|tjs"J d|j | _ | || _n(|durXt|trN|n|f| _ntddS )a6  
        Tensor class for working with microscaling E8M0 block scale factors.

        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp8e8m0 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        Nr   r   r   r   r   r   r   r      s    	zMXScaleTensor.__init__c              
   C   s   d}|du rdnt dttt|| }|du r:dn"tdt dttt|| }||ksnJ dtj||d | jtj| j	d}|| _
| S )zp
        Generate random E8M0 data within a specified range.
        * Excludes the NaN encoding (255).
           Nr      z&Low must be less than or equal to highr   r   )r7   intr   log2r2   r8   r   r   r   r   r
   )r   rK   rL   ZbiasZmin_exponentZmax_exponentr   r   r   r   r      s    *0zMXScaleTensor.randomc                 C   s^   |t jksJ d| j|}|dk}| }d||< |d }t d|}t j||< ||S )NzBCurrently only float32 is supported for f8e8m0 to float conversion   r   rX   g       @)r   r#   r
   r   cloner%   nan)r   r   r
   is_nane_biaseder'   r   r   r   r+     s    
zMXScaleTensor.toc           	      C   s   t j|t j| jd}t |t |B |dkB }d||< ||  }t t |}|d }|t j	}t 
|dd}|t j|| < |S )aO  
        Convert float32 numbers to E8M0 format.
        * Values <= 0, NaNs, and Infs are converted to the NaN encoding (255).
        * Positive values are converted by computing the floor of log2(value) to get the exponent.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to E8M0 format.
        r,   r   r\   rX   rY   )r   Z
empty_liker   r   r0   r1   floorr[   r   r;   clamp)	r   r<   resultr=   Zvalid_valuesra   r`   Ze_biased_intZe_biased_clampedr   r   r   r	     s    	
zMXScaleTensor._from_float)NNN)NN)rT   rU   rV   r   r   r+   r	   r   r   r   r   rW      s   

rW   )__doc__r   r   rW   r   r   r   r   <module>   s    ^