a
    h                     @   s   d dl mZmZ ddlmZ er,ddlmZ ddlmZ ddl	m
Z
mZmZmZ ddlmZ e rjd d	lZeeZG d
d deZd	S )    )TYPE_CHECKINGOptional   )HfQuantizer   )PreTrainedModel)replace_with_spqr_linear)is_accelerate_availableis_spqr_availableis_torch_availablelogging)QuantizationConfigMixinNc                       s   e Zd ZdZdZed fddZdd Zddd	d
dZdde	e
e  dddZddddZedd ZdddZ  ZS )SpQRHfQuantizerzS
    Quantizer of the SpQR method. Enables the loading of prequantized models.
    T)quantization_configc                    s   t  j|fi | || _d S N)super__init__r   )selfr   kwargs	__class__ b/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/quantizers/quantizer_spqr.pyr   (   s    zSpQRHfQuantizer.__init__c                 O   s2   t j stdt s tdt s.tdd S )Nz,GPU is required to run SpQR quantized model.zGUsing `spqr` quantization requires Accelerate: `pip install accelerate`zFUsing `spqr` quantization requires SpQR: `pip install spqr_quant[gpu]`)torchcudaZis_availableRuntimeErrorr	   ImportErrorr
   )r   argsr   r   r   r   validate_environment,   s    
z$SpQRHfQuantizer.validate_environmentztorch.dtype)dtypereturnc                 C   s0   |d u rt j}td n|t jkr,td|S )NzHAssuming SpQR inference on GPU and loading the model in `torch.float16`.z|You cannot use any type other than torch.float16 for SpQR. Please either leave it None or set it totorch.float16 explicitly.)r   Zfloat16loggerinfo
ValueError)r   r   r   r   r   update_dtype6   s    
zSpQRHfQuantizer.update_dtypeNr   )modelkeep_in_fp32_modulesc                 K   s4   |  || jj|| _t|| j| jd | j|j_d S )N)r   modules_to_not_convert)Zget_modules_to_not_convertr   r'   r   config)r   r%   r&   r   r   r   r   $_process_model_before_weight_loadingA   s    
z4SpQRHfQuantizer._process_model_before_weight_loading)r%   c                 K   s   |S r   r   )r   r%   r   r   r   r   #_process_model_after_weight_loadingR   s    z3SpQRHfQuantizer._process_model_after_weight_loadingc                 C   s   dS )NFr   )r   r   r   r   is_trainableU   s    zSpQRHfQuantizer.is_trainablec                 C   s   dS )NTr   )r   Zsafe_serializationr   r   r   is_serializableY   s    zSpQRHfQuantizer.is_serializable)N)N)__name__
__module____qualname____doc__Zrequires_calibrationr   r   r   r$   r   liststrr)   r*   propertyr+   r,   __classcell__r   r   r   r   r   !   s   
 

r   )typingr   r   baser   Zmodeling_utilsr   Zintegrationsr   utilsr	   r
   r   r   Zutils.quantization_configr   r   Z
get_loggerr-   r!   r   r   r   r   r   <module>   s   
