a
    h                     @   s   d dl mZmZmZ ddlmZ er0ddlmZ ddlm	Z	m
Z
mZmZ ddlmZ e rbd dlZeeZG d	d
 d
eZdS )    )TYPE_CHECKINGAnyOptional   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_eetq_availableis_torch_availablelogging)get_module_from_nameNc                	       s   e Zd ZdZdZdZddgZ fddZdd	 Zd
d
dddZ	dde
ee
ef dddZd#dde
dee
ef eee
  dddZddddZd$deee
  dddZd%ddZeed d!d"Z  ZS )&EetqHfQuantizera  
    8-bit quantization from EETQ quantization method:
        before loading: converts transformer layers into W8A16Linear during loading: load 16bit weight and pass to the
        layer object after: quantizes individual weights in Linear8bitLt into 8bit at first .cuda() call
    TFeetqZ
acceleratec                    s   t  j|fi | || _d S N)super__init__quantization_config)selfr   kwargs	__class__ b/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/quantizers/quantizer_eetq.pyr   -   s    zEetqHfQuantizer.__init__c              
   O   s   t  stdzdd l}W n> tyX } z&dt|v rBtd|n W Y d }~n
d }~0 0 t shtd|dds|ddrtd	tj	 st
d
|d}|d u rtd n2|d urt|trd| v sd| v rtdd S )NzUsing `eetq` 8-bit quantization requires eetq.Please install the latest version of eetq from : https://github.com/NetEase-FuXi/EETQr   Zshard_checkpointzYou are using a version of EETQ that is incompatible with the current transformers version. Either downgrade transformers to <= v4.46.3 or, if available, upgrade EETQ to > v1.0.0.zNLoading an EETQ quantized model requires accelerate (`pip install accelerate`)Zfrom_tfFZ	from_flaxzConverting into 8-bit weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.
device_mapzYou have loaded an EETQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpuZdiskzYou are attempting to load an EETQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)r
   ImportErrorr   strr	   get
ValueErrortorchcudaZis_availableRuntimeErrorloggerZwarning_once
isinstancedictvalues)r   argsr   r   excr   r   r   r   validate_environment1   s@    

"z$EetqHfQuantizer.validate_environmentztorch.dtype)dtypereturnc                 C   s4   |d u rt j}td| n|t jkr0td |S )NzOverriding dtype=%s with `dtype=torch.float16` due to requirements of `eetq` to enable model loading in 8-bit. Pass your own dtype to specify the dtype of the remaining non-linear layers or pass dtype=torch.float16 to remove this warning.zLWe suggest you to set `dtype=torch.float16` for better efficiency with EETQ.)r    Zfloat16r#   info)r   r*   r   r   r   update_dtype_   s    

zEetqHfQuantizer.update_dtyper   ztorch.Tensor)modelparam_value
param_name
state_dictc           	      K   sj   ddl m} t||\}}t||rf| js2|dkrR|dkrN|jtjkrNtddS |dkrbtdd	S dS )
Nr   )
EetqLinearZbiasweightz6Expect quantized weights but got an unquantized weightFweight_scalez;Expect unquantized weights but got a quantized weight_scaleT)	r   r2   r   r$   pre_quantizedr*   r    Zint8r   )	r   r.   r/   r0   r1   r   r2   moduletensor_namer   r   r   check_quantized_paramm   s    
z%EetqHfQuantizer.check_quantized_paramNztorch.device)r.   r/   r0   target_devicer1   unexpected_keysc                 C   sL   ddl m} t||\}}	||\}
}|
||j|	< |d|| dS )zB
        quantizes weights into qweight and weight_scales
        r   )quantize_and_preprocess_weightsZweight_scalesN)r   r;   r   to_buffersregister)r   r.   r/   r0   r9   r1   r:   r;   r6   r7   	new_valuer4   r   r   r   create_quantized_param   s
    z&EetqHfQuantizer.create_quantized_param)r.   c                 K   s   |S r   r   )r   r.   r   r   r   r   #_process_model_after_weight_loading   s    z3EetqHfQuantizer._process_model_after_weight_loading)r.   keep_in_fp32_modulesc                 K   sD   ddl m} | || jj|| _||| j| j| jd}| j|j_d S )Nr   )replace_with_eetq_linear)modules_to_not_convertr   r5   )ZintegrationsrC   Zget_modules_to_not_convertr   rD   r5   config)r   r.   rB   r   rC   r   r   r   $_process_model_before_weight_loading   s    
z4EetqHfQuantizer._process_model_before_weight_loadingc                 C   s   dS NTr   )r   Zsafe_serializationr   r   r   is_serializable   s    zEetqHfQuantizer.is_serializable)r+   c                 C   s   dS rG   r   )r   r   r   r   is_trainable   s    zEetqHfQuantizer.is_trainable)N)N)N)__name__
__module____qualname____doc__Z requires_parameters_quantizationZrequires_calibrationZrequired_packagesr   r)   r-   r   r%   r   r8   r   listr@   rA   rF   rH   propertyboolrI   __classcell__r   r   r   r   r   !   s<   .
 

 

r   )typingr   r   r   baser   Zmodeling_utilsr   utilsr	   r
   r   r   Zquantizers_utilsr   r    Z
get_loggerrJ   r#   r   r   r   r   r   <module>   s   
