a
    h                     @   s   d dl Z d dlmZ d dlmZ ddlmZ er<ddlmZ ddl	m
Z
 dd	lmZmZmZmZ dd
lmZ e rzd dlZeeZG dd deZdS )    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)replace_with_aqlm_linear)is_accelerate_availableis_aqlm_availableis_torch_availablelogging)QuantizationConfigMixinc                       s   e Zd ZdZdZdgZdZed fddZdd	 Z	d
d
dddZ
ddddZddddZeedddZdddZ  ZS )AqlmHfQuantizerzS
    Quantizer of the AQLM method. Enables the loading of prequantized models.
    TaqlmN)quantization_configc                    s   t  j|fi | || _d S N)super__init__r   )selfr   kwargs	__class__ b/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/quantizers/quantizer_aqlm.pyr   -   s    zAqlmHfQuantizer.__init__c                 O   s    t  stdt stdd S )NzGUsing `aqlm` quantization requires Accelerate: `pip install accelerate`zDUsing `aqlm` quantization requires AQLM: `pip install aqlm[gpu,cpu]`)r	   ImportErrorr
   )r   argsr   r   r   r   validate_environment1   s    z$AqlmHfQuantizer.validate_environmentztorch.dtype)dtypereturnc                 C   s8   |d u r4t j r$t j}td nt j}td |S )NzCUDA available. Assuming AQLM inference on GPU and loading the model in `torch.float16`. To overwrite it, set `dtype` manually.zCUDA is unavailable. Assuming AQLM inference on CPU and loading the model in `torch.float32`. To overwrite it, set `dtype` manually.)torchcudaZis_availableZfloat16loggerinfoZfloat32)r   r   r   r   r   update_dtype8   s    
zAqlmHfQuantizer.update_dtyper   )modelc                 K   s"   t || j| jjd | j|j_d S )N)r   linear_weights_not_to_quantize)r   r   r%   configr   r$   r   r   r   r   $_process_model_before_weight_loadingF   s    z4AqlmHfQuantizer._process_model_before_weight_loadingc                 K   s   |S r   r   r'   r   r   r   #_process_model_after_weight_loadingR   s    z3AqlmHfQuantizer._process_model_after_weight_loading)r   c                 C   sF   t tj dt dk}|r$dS tdtj d d dS d S )Nr   z1.0.2Tz$Currently installed `aqlm` version (zw) doesn't support training. If you wish to train a quantized model, please update `aqlm` with `pip install aqlm>=1.0.2`F)r   parse	importlibmetadatar!   warning)r   Zaqlm_supports_trainingr   r   r   is_trainableU   s    zAqlmHfQuantizer.is_trainablec                 C   s   dS )NTr   )r   Zsafe_serializationr   r   r   is_serializable`   s    zAqlmHfQuantizer.is_serializable)N)__name__
__module____qualname____doc__Zrequires_calibrationZrequired_packagesZoptimum_quantizerr   r   r   r#   r(   r)   propertyboolr.   r/   __classcell__r   r   r   r   r   $   s   
r   )r+   typingr   	packagingr   baser   Zmodeling_utilsr   Zintegrationsr   utilsr	   r
   r   r   Zutils.quantization_configr   r   Z
get_loggerr0   r!   r   r   r   r   r   <module>   s   
