a
    h                     @   s   d dl Z d dlmZmZmZmZ d dlmZ ddlm	Z	 ddl
mZ erTddlmZ dd	lmZmZmZmZ dd
lmZ e rd dlZeeZG dd de	ZdS )    N)TYPE_CHECKINGAnyOptionalUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_torch_availablelogging)QuantoConfigc                       s(  e Zd ZdZddgZdZdZed fddZd	d
 Z	dd Z
dd ZdddddZee eee dddZddeeeef edddZeeeeef f eeeeef f dddZddeddd d!Zddd"d#d$Zd0deee  d&d'd(Zd)d* Zeed+d,d-Zd1d.d/Z  ZS )2QuantoHfQuantizerz*
    Quantizer for the quanto library
    Zquanto
accelerateTF)quantization_configc                    s    t  j|fi | |   d S N)super__init__	post_init)selfr   kwargs	__class__ d/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/quantizers/quantizer_quanto.pyr   2   s    zQuantoHfQuantizer.__init__c                 C   s   | j jdur| jstddS )z 
        Safety checker
        NzWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r   ZactivationsZpre_quantized
ValueErrorr   r   r   r   r   6   s    zQuantoHfQuantizer.post_initc                 O   s    t  stdt stdd S )NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`))r   ImportErrorr   )r   argsr   r   r   r   validate_environment@   s    z&QuantoHfQuantizer.validate_environmentc                 C   s   |d u rddi}t d |S )N cpuzThe device_map was not initialized. Setting device_map to {'':'cpu'}. If you want to use the model for inference, please set device_map ='auto')loggerinfo)r   
device_mapr   r   r   update_device_mapJ   s    z#QuantoHfQuantizer.update_device_mapztorch.dtype)dtypereturnc                 C   s   |d u rt d tj}|S )NzPYou did not specify `dtype` in `from_pretrained`. Setting it to `torch.float32`.)r%   r&   torchZfloat32)r   r)   r   r   r   update_dtypeT   s    
zQuantoHfQuantizer.update_dtype)missing_keysprefixr*   c                    s   t  rddlm} g  | D ]T\}}t||r|D ]<}||v sR|| d| v r4|ds4|ds4 | q4q fdd|D S )Nr   QModuleMixin.z.weightz.biasc                    s   g | ]}| vr|qS r   r   ).0kZnot_missing_keysr   r   
<listcomp>h       z9QuantoHfQuantizer.update_missing_keys.<locals>.<listcomp>)r   optimum.quantor0   Znamed_modules
isinstanceendswithappend)r   modelr-   r.   r0   namemodulemissingr   r4   r   update_missing_keysZ   s     
z%QuantoHfQuantizer.update_missing_keysr   ztorch.Tensor)r;   param_value
param_name
state_dictr*   c                 K   s   t  rddlm} |d}|d}|durp|durpt| }	|dkrpt|	dkrp|	dhksp|	ddhkspd	S t||\}
}t|
|rd
|v r|
j	 S d	S dS )z=
        Check if a parameter needs to be quantized.
        r   r/   r'   param_deviceNr$   r   ZdiskFweight)
r   r7   r0   getsetvalueslenr	   r8   frozen)r   r;   r@   rA   rB   r   r0   r'   rC   Zdevice_map_valuesr=   Ztensor_namer   r   r   check_quantized_paramj   s    

z'QuantoHfQuantizer.check_quantized_param)
max_memoryr*   c                 C   s   dd |  D }|S )Nc                 S   s   i | ]\}}||d  qS )g?r   )r2   keyvalr   r   r   
<dictcomp>   r6   z7QuantoHfQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   rK   r   r   r   adjust_max_memory   s    z#QuantoHfQuantizer.adjust_max_memoryztorch.device)r;   r@   rA   target_devicec           
      O   s<   ddl m} ||||| t||\}}	|  d|j_dS )ze
        Create the quantized parameter by calling .freeze() after setting it to the module.
        r   )set_module_tensor_to_deviceFN)accelerate.utilsrR   r	   freezerD   Zrequires_grad)
r   r;   r@   rA   rQ   r!   r   rR   r=   _r   r   r   create_quantized_param   s
    z(QuantoHfQuantizer.create_quantized_param)target_dtyper*   c                 C   sZ   t tj dt dkrNddlm} tj|j|j	|j
d}|| jj }|S tdd S )Nr   z0.27.0r   )CustomDtype)int8Zfloat8Zint4Zint2zYou are using `device_map='auto'` on an optimum-quanto quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source.)r   parse	importlibmetadatarS   rX   r+   rY   ZFP8ZINT4ZINT2r   weightsr   )r   rW   rX   mappingr   r   r   adjust_target_dtype   s    z%QuantoHfQuantizer.adjust_target_dtypeN)r;   keep_in_fp32_modulesc                 K   sD   ddl m} | || jj|| _||| j| jd\}}| j|j_d S )Nr
   )replace_with_quanto_layers)modules_to_not_convertr   )Zintegrationsra   Zget_modules_to_not_convertr   rb   config)r   r;   r`   r   ra   rU   r   r   r   $_process_model_before_weight_loading   s    


z6QuantoHfQuantizer._process_model_before_weight_loadingc                 K   s   |S r   r   )r   r;   r   r   r   r   #_process_model_after_weight_loading   s    z5QuantoHfQuantizer._process_model_after_weight_loading)r*   c                 C   s   dS )NTr   r   r   r   r   is_trainable   s    zQuantoHfQuantizer.is_trainablec                 C   s   dS )NFr   )r   Zsafe_serializationr   r   r   is_serializable   s    z!QuantoHfQuantizer.is_serializable)N)N) __name__
__module____qualname____doc__Zrequired_packagesZ requires_parameters_quantizationZrequires_calibrationr   r   r   r"   r(   r,   liststrr?   dictr   boolrJ   r   intrP   rV   r_   r   rd   re   propertyrf   rg   __classcell__r   r   r   r   r   )   s>   



0 r   )r[   typingr   r   r   r   	packagingr   baser   Zquantizers_utilsr	   Zmodeling_utilsr   utilsr   r   r   r   Zutils.quantization_configr   r+   Z
get_loggerrh   r%   r   r   r   r   r   <module>   s   
