a
    ¾ÀhÑ  ã                   @   s”   d dl Z d dlmZ d dlmZ ddlmZ er<ddlmZ ddl	m
Z
mZmZmZmZ dd	lmZmZ eƒ rvd dlZe e¡ZG d
d„ deƒZdS )é    N)ÚTYPE_CHECKING)Úversioné   )ÚHfQuantizeré   )ÚPreTrainedModel)Úis_auto_gptq_availableÚis_gptqmodel_availableÚis_optimum_availableÚis_torch_availableÚlogging)Ú
GPTQConfigÚQuantizationConfigMixinc                       sŽ   e Zd ZdZdZg d¢ZdZedœ‡ fdd„Zdd	„ Z	d
d
dœdd„Z
dd„ Zddœdd„Zddœdd„Zeedœdd„ƒZddd„Z‡  ZS )ÚGptqHfQuantizerzå
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    `auto_gptq` or `gptqmodel` package. Quantization is done under the hood for users if they load a non-prequantized model.
    F)ÚoptimumÚ	auto_gptqÚ	gptqmodelN)Úquantization_configc                    sD   t ƒ j|fi |¤Ž tƒ s"tdƒ‚ddlm} | | j ¡ ¡| _	d S )NúGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )ÚGPTQQuantizer)
ÚsuperÚ__init__r
   ÚImportErrorZoptimum.gptqr   Ú	from_dictr   Zto_dict_optimumÚoptimum_quantizer)Úselfr   Úkwargsr   ©Ú	__class__© úb/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/quantizers/quantizer_gptq.pyr   -   s
    zGptqHfQuantizer.__init__c                 O   sð   t ƒ stdƒ‚tƒ r$tƒ r$t d¡ tƒ rFt tj	 d¡¡t d¡kpJtƒ }|sdt
j ¡ sdtdƒ‚nˆtƒ sztƒ sztdƒ‚nrtƒ r¦t tj	 d¡¡t d¡k r¦tdƒ‚nFtƒ rìt tj	 d	¡¡t d
¡k sät tj	 d¡¡t d¡k rìtdƒ‚d S )Nr   z4Detected gptqmodel and auto-gptq, will use gptqmodelz	auto-gptqz0.4.2z2GPU is required to quantize or run quantize model.z|Loading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) or auto-gptq (`pip install auto-gptq`) library. r   z‹You need a version of auto_gptq >= 0.4.2 to use GPTQ: `pip install --upgrade auto-gptq` or use gptqmodel by `pip install gptqmodel>=1.4.3`.r   z1.4.3r   ú1.23.99zJThe gptqmodel version should be >= 1.4.3, optimum version should >= 1.24.0)r
   r   r   r	   ÚloggerÚwarningr   ÚparseÚ	importlibÚmetadataÚtorchÚcudaZis_availableÚRuntimeError)r   Úargsr   Zgptq_supports_cpur   r   r    Úvalidate_environment6   s6    
þý
ÿÿÿÿþz$GptqHfQuantizer.validate_environmentztorch.dtype)ÚdtypeÚreturnc                 C   s2   |d u rt j}t d¡ n|t jkr.t d¡ |S )NzLLoading the model in `torch.float16`. To overwrite it, set `dtype` manually.zLWe suggest you to set `dtype=torch.float16` for better efficiency with GPTQ.)r'   Zfloat16r"   Úinfo)r   r,   r   r   r    Úupdate_dtypeR   s    

zGptqHfQuantizer.update_dtypec                 C   sB   |d u rdt  d¡i}tƒ s>|ddt  d¡ifv r>|ddik |S )NÚ Úcpur   )r'   Zdevicer	   )r   Z
device_mapr   r   r    Úupdate_device_mapZ   s
    z!GptqHfQuantizer.update_device_mapr   )Úmodelc                 K   s\   |j jdkrtdƒ‚| jrXt tj d¡¡t d¡krD| j 	|¡}n| jj	|fi |¤Ž}d S )NZ	input_idsz%We can only quantize pure text model.r   r!   )
r   Zmain_input_namer)   Úpre_quantizedr   r$   r%   r&   r   Zconvert_model©r   r3   r   r   r   r    Ú$_process_model_before_weight_loadingb   s    z4GptqHfQuantizer._process_model_before_weight_loadingc                 K   sT   | j r| j |¡}n<| jjd u r*|j| j_| j || jj¡ t | j 	¡ ¡|j
_d S )N)r4   r   Zpost_init_modelr   Ú	tokenizerZname_or_pathZquantize_modelr   r   Úto_dictÚconfigr5   r   r   r    Ú#_process_model_after_weight_loadingm   s    
z3GptqHfQuantizer._process_model_after_weight_loading)r-   c                 C   s   dS ©NTr   )r   r   r   r    Úis_trainablew   s    zGptqHfQuantizer.is_trainablec                 C   s   dS r;   r   )r   Zsafe_serializationr   r   r    Úis_serializable{   s    zGptqHfQuantizer.is_serializable)N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Zrequires_calibrationZrequired_packagesr   r   r   r+   r/   r2   r6   r:   ÚpropertyÚboolr<   r=   Ú__classcell__r   r   r   r    r   #   s   	
r   )r%   Útypingr   Ú	packagingr   Úbaser   Zmodeling_utilsr   Úutilsr   r	   r
   r   r   Zutils.quantization_configr   r   r'   Z
get_loggerr>   r"   r   r   r   r   r    Ú<module>   s   
