a
    h!                     @   s   d dl mZmZmZ ddlmZ ddlmZ ddlm	Z	 erHddl
mZ ddlmZmZmZmZmZ dd	lmZ e r~d d
lZeeZG dd deZd
S )    )TYPE_CHECKINGAnyOptional   )tqdm   )HfQuantizer)get_module_from_name)PreTrainedModel)is_accelerate_availableis_flute_availableis_hadamard_availableis_torch_availablelogging)QuantizationConfigMixinNc                	       s   e Zd ZdZdZdZddgZed fddZd	d
 Z	dddddZ
d)ddedeeef eee  dddZd*deee  dddZddddZee eee dddZeedd d!Zd+d"d#Zddeeeef ed$d%d&Zd'd( Z  ZS ),HiggsHfQuantizerz
    Quantizer of the HIGGS method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTzflute-kernelZfast_hadamard_transform)quantization_configc                    s   t  j|fi | || _d S N)super__init__r   )selfr   kwargs	__class__ c/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/quantizers/quantizer_higgs.pyr   +   s    zHiggsHfQuantizer.__init__c                 K   s|   t j stdt s tdt s.tdt s<td|d u rNtdn*t	|t
rxd| v spd| v rxtdd S )	NzNHIGGS quantization is only supported on GPU. Please use a different quantizer.zHUsing `higgs` quantization requires Accelerate: `pip install accelerate`zLUsing `higgs` quantization requires FLUTE: `pip install flute-kernel>=0.3.0`zbUsing `higgs` quantization requires fast_hadamard_transform: `pip install fast_hadamard_transform`zwYou are attempting to load a HIGGS model without setting device_map. Please set device_map comprised of 'cuda' devices.cpuZdiskzYou are attempting to load a HIGGS model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudaZis_availableNotImplementedErrorr   ImportErrorr   r   
ValueError
isinstancedictvalues)r   Z
device_mapr   r   r   r   validate_environment/   s$    
"z%HiggsHfQuantizer.validate_environmentztorch.dtype)dtypereturnc                 C   sB   |d u rt d tj}n$|tjkr>|tjkr>td| d|S )NzG`dtype` is None. Setting `dtype=torch.float16` for FLUTE compatibility.zInvalid `dtype` zS. HIGGS quantization only supports `dtype=torch.float16` or `dtype=torch.bfloat16`.)loggerinfor   Zfloat16Zbfloat16r!   )r   r&   r   r   r   update_dtypeI   s    

zHiggsHfQuantizer.update_dtypeNr
   ztorch.Tensorztorch.device)modelparam_value
param_nametarget_device
state_dictunexpected_keysc                 C   s   ddl m} |||| jj| jj| jj| jj}~t||\}	}
d	|
dd d }| D ]|\}}||	jv rtjj|dd|	j|< qb||	jv rtj||	j|< qb|dkr||	_| | jj|< qbtd| d	|	 qb|d ur||v r|| d S )
Nr   )quantize_with_higgs.F)Zrequires_gradtune_metadatazUnexpected key z in module )integrationsr1   tor   bitspZ
group_sizeZhadamard_sizer	   joinsplititems_parametersr   nn	Parameter_buffersZBufferr4   to_dictr!   remove)r   r+   r,   r-   r.   r/   r0   r1   Z
flute_dictmodule_module_namekeyvaluer   r   r   create_quantized_paramT   s,    	

z'HiggsHfQuantizer.create_quantized_param)r+   keep_in_fp32_modulesc                 K   s@   ddl m} | || jj|| _||| j| jd | j|j_d S )Nr   )replace_with_higgs_linear)r   modules_to_not_convert)r5   rI   Zget_modules_to_not_convertr   rJ   config)r   r+   rH   r   rI   r   r   r   $_process_model_before_weight_loading{   s    
z5HiggsHfQuantizer._process_model_before_weight_loading)r+   c           
         s   ddl m}m} ddlm} ddlm  i } fdd| D }t|	 dd	d
D ]\}}	|	j
j|vr~||	j
jd||	j
j< ||	j
j |	_|| jj| |	_||	j
j|	jj|	jd\|	j
_|	_|	j | jj|< qTd S )Nr   )TuneMetaDatamaybe_tune_and_repack)make_workspace_streamkr   HiggsLinearc                    s    i | ]\}}t | r||qS r   r"   .0namerB   rP   r   r   
<dictcomp>       zHHiggsHfQuantizer._process_model_after_weight_loading.<locals>.<dictcomp>zRepacking HIGGS modulesF)descZleave)device)weightscalesmetadata)Z
flute.tunerM   rN   Zflute.utilsrO   r5   rQ   named_modulesr   r;   rZ   rY   Z	workspace	from_dictr   r4   datar[   r@   )
r   r+   r   rM   rN   rO   Zflute_workspacesZflute_modulesrU   rB   r   rP   r   #_process_model_after_weight_loading   s     z4HiggsHfQuantizer._process_model_after_weight_loading)missing_keysprefixr'   c                    sJ   ddl m   fdd| D ttdfddfdd	|D S )
Nr   rP   c                    s   h | ]\}}t | r|qS r   rR   rS   rP   r   r   	<setcomp>   rW   z7HiggsHfQuantizer.update_missing_keys.<locals>.<setcomp>)rE   r'   c                    s>    ds drdS  d  t fddD S )Nz.weightz.biasFr2   c                 3   s   | ]}|v p| v V  qd S r   r   )rT   rU   Zfull_keyrE   r   r   	<genexpr>   rW   zNHiggsHfQuantizer.update_missing_keys.<locals>.should_update.<locals>.<genexpr>)endswithany)rE   )higgs_namesrb   rd   r   should_update   s    z;HiggsHfQuantizer.update_missing_keys.<locals>.should_updatec                    s   g | ]} |s|qS r   r   )rT   rE   )ri   r   r   
<listcomp>   rW   z8HiggsHfQuantizer.update_missing_keys.<locals>.<listcomp>)r5   rQ   r]   strbool)r   r+   ra   rb   r   )rQ   rh   rb   ri   r   update_missing_keys   s    z$HiggsHfQuantizer.update_missing_keys)r'   c                 C   s   dS )NFr   )r   r   r   r   is_trainable   s    zHiggsHfQuantizer.is_trainablec                 C   s   dS )NTr   )r   Zsafe_serializationr   r   r   is_serializable   s    z HiggsHfQuantizer.is_serializable)r+   r,   r-   r/   r'   c           	      K   sD   ddl m} t||\}}t||r<|dkr<|jtjkr<dS dS d S )Nr   rP   rZ   TF)r5   rQ   r	   r"   r&   r   Zint16)	r   r+   r,   r-   r/   r   rQ   rB   Ztensor_namer   r   r   check_quantized_param   s
    z&HiggsHfQuantizer.check_quantized_paramc                 C   s   ddl m} ||}|S )Nr   )dequantize_higgs)r5   rq   )r   r+   rq   r   r   r   _dequantize   s    zHiggsHfQuantizer._dequantize)N)N)N)__name__
__module____qualname____doc__Zrequires_calibrationZ requires_parameters_quantizationZrequired_packagesr   r   r%   r*   rk   r#   r   r   listrG   rL   r`   rm   propertyrl   rn   ro   rp   rr   __classcell__r   r   r   r   r   "   sB    

* 


r   )typingr   r   r   Zutils.loggingr   baser   Zquantizers_utilsr	   Zmodeling_utilsr
   utilsr   r   r   r   r   Zutils.quantization_configr   r   Z
get_loggerrs   r(   r   r   r   r   r   <module>   s   
