a
    h                     @   s   d dl mZmZmZ ddlmZ ddlmZ er<ddlm	Z	 ddl
mZmZmZmZ ddlmZ e rnd d	lZeeZG d
d deZd	S )    )TYPE_CHECKINGAnyOptional   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_fp_quant_availableis_qutlass_availableis_torch_availablelogging)QuantizationConfigMixinNc                	       s   e Zd ZdZdZdZdZdgZed fddZ	dd	 Z
d
d
dddZd$ddedeeef eee  dddZddddZddddZee eee dddZed%ed dddZd&dd Zddeeeef ed!d"d#Z  ZS )'FPQuantHfQuantizerz
    Quantizer for the FP-Quant method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTfp_quant)quantization_configc                    s   t  j|fi | || _d S N)super__init__r   )selfr   kwargs	__class__ f/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/quantizers/quantizer_fp_quant.pyr   +   s    zFPQuantHfQuantizer.__init__c                 K   s   t j stdt s(| jjs(td| jjr:t	d t
 sHtd|d u rZtdn*t|trd| v s|d| v rtdd S )	NzPFPQuant quantization is only supported on GPU. Please use a different quantizer.a  Using `fp_quant` with real quantization requires a **Blackwell GPU** and qutlass: `git clone https://github.com/IST-DASLab/qutlass.git && cd qutlass && pip install --no-build-isolation .`. You can use `FPQuantConfig(pseudoquantization=True, ...)` to use Triton-based pseudo-quantization. It doesn't provide any speedups but emulates the quantization behavior of the real quantization.zUsing pseudo-quantization for FP-Quant. This doesn't provide any speedups but emulates the quantization behavior of the real quantization.zGUsing `fp_quant` quantization requires fp_quant: `pip install fp_quant`zyYou are attempting to load a FPQuant model without setting device_map. Please set device_map comprised of 'cuda' devices.cpuZdiskzYou are attempting to load a FPQuant model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudaZis_availableNotImplementedErrorr   r   ZpseudoquantizationImportErrorloggerwarningr
   
ValueError
isinstancedictvalues)r   Z
device_mapr   r   r   r   validate_environment/   s,    
"z'FPQuantHfQuantizer.validate_environmentztorch.dtype)dtypereturnc                 C   s8   |d u rt d tj}n|tjkr4td| d|S )NzJ`dtype` is None. Setting `dtype=torch.bfloat16` for qutlass compatibility.zInvalid `dtype` z=. fp_quant quantization only supports `dtype=torch.bfloat16`.)r    infor   Zbfloat16r"   )r   r'   r   r   r   update_dtypeM   s    

zFPQuantHfQuantizer.update_dtypeNr	   ztorch.Tensorztorch.device)modelparam_value
param_nametarget_device
state_dictunexpected_keysc           	      C   s   t ||\}}|dr@tjj||dd|_d |_d |_d S |drttj|||_d |_d |_d |_	d S tj|||_|
  |d ur||v r|| d S )Nz.qweightF)Zrequires_gradz	.dqweight)r   endswithr   nn	ParametertoqweightweightdqweightscalesZpre_forwardremove)	r   r+   r,   r-   r.   r/   r0   module_r   r   r   create_quantized_paramV   s&    	

z)FPQuantHfQuantizer.create_quantized_param)r+   c                 K   s8   ddl m} ddlm} |||| jd | j|j_d S )Nr   )replace_with_fp_quant_linearr   )adapt_fp_quant_config)Zfp_quant_linear_config)r   r=   Zintegrations.fp_quantr>   r   config)r   r+   r   r=   r>   r   r   r   $_process_model_before_weight_loading   s    z7FPQuantHfQuantizer._process_model_before_weight_loadingc                 K   s   |S r   r   )r   r+   r   r   r   r   #_process_model_after_weight_loading   s    z6FPQuantHfQuantizer._process_model_after_weight_loading)missing_keysprefixr(   c                    sJ   ddl m   fdd| D ttdfddfdd	|D S )
Nr   FPQuantLinearc                    s   h | ]\}}t | r|qS r   )r#   ).0namer:   rD   r   r   	<setcomp>       z9FPQuantHfQuantizer.update_missing_keys.<locals>.<setcomp>)keyr(   c                    s>    ds drdS  d  t fddD S )Nz.weightz.biasF.c                 3   s   | ]}|v p| v V  qd S r   r   )rF   rG   Zfull_keyrJ   r   r   	<genexpr>   rI   zQFPQuantHfQuantizer.update_missing_keys.<locals>.should_exclude.<locals>.<genexpr>)r1   any)rJ   )fp_quant_namesrC   rL   r   should_exclude   s    z>FPQuantHfQuantizer.update_missing_keys.<locals>.should_excludec                    s   g | ]} |s|qS r   r   )rF   rJ   )rP   r   r   
<listcomp>   rI   z:FPQuantHfQuantizer.update_missing_keys.<locals>.<listcomp>)r   rE   Znamed_modulesstrbool)r   r+   rB   rC   r   )rE   rO   rC   rP   r   update_missing_keys   s    z&FPQuantHfQuantizer.update_missing_keysc                 C   s   dS )NFr   )r   r+   r   r   r   is_trainable   s    zFPQuantHfQuantizer.is_trainablec                 C   s   dS )NTr   )r   Zsafe_serializationr   r   r   is_serializable   s    z"FPQuantHfQuantizer.is_serializable)r+   r,   r-   r/   r(   c           	      K   s8   ddl m} t||\}}t||r0|dv r0dS dS d S )Nr   rD   )r6   r5   r7   TF)r   rE   r   r#   )	r   r+   r,   r-   r/   r   rE   r:   Ztensor_namer   r   r   check_quantized_param   s
    z(FPQuantHfQuantizer.check_quantized_param)N)N)N)__name__
__module____qualname____doc__Zrequires_calibrationZ requires_parameters_quantizationZis_qat_trainableZrequired_packagesr   r   r&   r*   rR   r$   r   r   listr<   r@   rA   rT   propertyrU   rV   rS   rW   __classcell__r   r   r   r   r   !   s<    

,

r   )typingr   r   r   baser   Zquantizers_utilsr   Zmodeling_utilsr	   utilsr
   r   r   r   Zutils.quantization_configr   r   Z
get_loggerrX   r    r   r   r   r   r   <module>   s   
