a
    h5                     @   s2  d dl Z d dlmZmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ dd	l m!Z! dd
l"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z; ddl<m=Z= ddl>m?Z? ddl@mAZA ddlBmCZC ddlDmEZE ddlFmGZG e%e)e+e7e!e?eAe5e/e9e;e-e1eEe'eGeCe3e#e=dZHeeeeee	eeeeeeeeeeeee
edZIeJeKZLG dd  d ZMG d!d" d"ZNeOd#d$d%ZPeOd&d'd(ZQd)d* ZRdS )+    N)OptionalUnion   )
AutoConfig)logging)
AqlmConfigAutoRoundConfig	AwqConfigBitNetQuantConfigBitsAndBytesConfigCompressedTensorsConfig
EetqConfigFbgemmFp8ConfigFineGrainedFP8ConfigFPQuantConfig
GPTQConfigHiggsConfig	HqqConfigMxfp4ConfigQuantizationConfigMixinQuantizationMethodQuantoConfigQuarkConfig
SpQRConfigTorchAoConfig
VptqConfig   )HfQuantizer)AqlmHfQuantizer)AutoRoundQuantizer)AwqQuantizer)BitNetHfQuantizer)Bnb4BitHfQuantizer)Bnb8BitHfQuantizer)CompressedTensorsHfQuantizer)EetqHfQuantizer)FbgemmFp8HfQuantizer)FineGrainedFP8HfQuantizer)FPQuantHfQuantizer)GptqHfQuantizer)HiggsHfQuantizer)HqqHfQuantizer)Mxfp4HfQuantizer)QuantoHfQuantizer)QuarkHfQuantizer)SpQRHfQuantizer)TorchAoHfQuantizer)VptqHfQuantizer)awqbitsandbytes_4bitbitsandbytes_8bitgptqaqlmquantoquarkfp_quanteetqhiggshqqcompressed-tensors
fbgemm_fp8torchaobitnetvptqspqrfp8
auto-roundmxfp4)r2   r3   r4   r:   r5   r6   r7   r8   r9   r<   r=   r>   r;   r?   r@   rA   rB   rC   rD   rE   c                   @   s.   e Zd ZdZeedddZedd ZdS )AutoQuantizationConfigz
    The Auto-HF quantization config class that takes care of automatically dispatching to the correct
    quantization config given a quantization config stored in a dictionary.
    )quantization_config_dictc                 C   s   | d}| dds"| ddrB| ddr2dnd}tj| }n|d u rRtd|tvrvtd| d	tt  t| }||S )
Nquant_methodload_in_8bitFload_in_4bit_4bit_8bitThe model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantizedUnknown quantization type, got  - supported types are: )	getr   BITS_AND_BYTES
ValueError AUTO_QUANTIZATION_CONFIG_MAPPINGlistAUTO_QUANTIZER_MAPPINGkeys	from_dict)clsrG   rH   suffix
target_cls r[   X/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/quantizers/auto.pyrW   y   s     

z AutoQuantizationConfig.from_dictc                 K   sV   t j|fi |}t|dd d u r2td| d|j}| |}|jf i | |S )Nquantization_configz)Did not found a `quantization_config` in z2. Make sure that the model is correctly quantized.)r   from_pretrainedgetattrrR   r]   rW   update)rX   pretrained_model_name_or_pathkwargsZmodel_configrG   r]   r[   r[   r\   r^      s    

z&AutoQuantizationConfig.from_pretrainedN)__name__
__module____qualname____doc__classmethoddictrW   r^   r[   r[   r[   r\   rF   s   s
   rF   c                   @   sb   e Zd ZdZeeeef dddZedd Z	eeeef e
e ddd	Zed
d ZdS )AutoHfQuantizerz
     The Auto-HF quantizer class that takes care of automatically instantiating to the correct
    `HfQuantizer` given the `QuantizationConfig`.
    )r]   c                 K   sx   t |trt|}|j}|tjkr<|jr4|d7 }n|d7 }|tvr`t	d| dt
t  t| }||fi |S )NrL   rK   rN   rO   )
isinstancerh   rF   rW   rH   r   rQ   rI   rU   rR   rT   rV   )rX   r]   rb   rH   rZ   r[   r[   r\   from_config   s    




zAutoHfQuantizer.from_configc                 K   s   t j|fi |}| |S )N)rF   r^   rk   )rX   ra   rb   r]   r[   r[   r\   r^      s    zAutoHfQuantizer.from_pretrained)r]   quantization_config_from_argsc                 C   s   |durd}nd}t |tr<t |tr2t|}n
t|}|durr|jj|jjkrrtd|jj d|jj dt |tt	tt
ttfr|dur| }| D ]\}}t||| q|dt|  d7 }|dkrt |tst| n
t| |S )	z
        handles situations where both quantization_config from args and quantization_config from model config are present.
        NzYou passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used. zThe model is quantized with z but you are passing a z| config. Please make sure to pass the same quantization config class to `from_pretrained` with different loading attributes.z"However, loading attributes (e.g. z]) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.)rj   rh   r   rW   rF   	__class__rc   rR   r   r	   r   r   r   Zget_loading_attributesitemssetattrrT   rV   warningswarnloggerinfo)rX   r]   rl   Zwarning_msgZloading_attr_dictattrvalr[   r[   r\   merge_quantization_configs   s<    	



z*AutoHfQuantizer.merge_quantization_configsc                 C   s   |  dd }|  dds$|  ddrD|  ddr4dnd}tj| }n|d u rTtd|tvrtd| d	tt	  d
 dS dS )NrH   rI   FrJ   rK   rL   rM   rN   rO   z~. Hence, we will skip the quantization. To remove the warning, you can delete the quantization_config attribute in config.jsonT)
rP   r   rQ   rR   rS   rs   warningrT   rU   rV   )rG   rH   rY   r[   r[   r\   supports_quant_method   s     
z%AutoHfQuantizer.supports_quant_methodN)rc   rd   re   rf   rg   r   r   rh   rk   r^   r   rw   staticmethodry   r[   r[   r[   r\   ri      s   

4ri   methodc                    s    fdd}|S )z-Register a custom quantization configuration.c                    s6    t v rtd  dt| ts*td| t  < | S )NzConfig '' already registeredz*Config must extend QuantizationConfigMixin)rS   rR   
issubclassr   	TypeErrorrX   r{   r[   r\   register_config_fn  s    
z8register_quantization_config.<locals>.register_config_fnr[   )r|   r   r[   r{   r\   register_quantization_config
  s    
r   namec                    s    fdd}|S )zRegister a custom quantizer.c                    s6    t v rtd  dt| ts*td| t  < | S )NzQuantizer 'r}   z!Quantizer must extend HfQuantizer)rU   rR   r~   r   r   r   r[   r\   register_quantizer_fn  s    
z1register_quantizer.<locals>.register_quantizer_fnr[   )r   r   r[   r   r\   register_quantizer  s    
r   c                 C   s   t | d}|rt| jsd}|s*|d urX|r@t| j|| _n|| _tj| j|d}	nd }	|	d ur|	j|||||d |	|}|	|}|		| } t
|	jdds|	jj}
t
|
d|
|d< |	| ||fS )Nr]   F)pre_quantized)dtypefrom_tf	from_flax
device_mapweights_onlyZ
dequantizevalueZquant)hasattrri   ry   r]   rw   rk   Zvalidate_environmentZupdate_dtypeZupdate_device_mapZupdate_tp_planr_   rH   )configr]   r   r   r   r   r   
user_agentr   Zhf_quantizerrH   r[   r[   r\   get_hf_quantizer*  s:    



r   )Srq   typingr   r   Zmodels.auto.configuration_autor   utilsr   Zutils.quantization_configr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   baser   Zquantizer_aqlmr   Zquantizer_auto_roundr   Zquantizer_awqr    Zquantizer_bitnetr!   Zquantizer_bnb_4bitr"   Zquantizer_bnb_8bitr#   Zquantizer_compressed_tensorsr$   Zquantizer_eetqr%   Zquantizer_fbgemm_fp8r&   Zquantizer_finegrained_fp8r'   Zquantizer_fp_quantr(   Zquantizer_gptqr)   Zquantizer_higgsr*   Zquantizer_hqqr+   Zquantizer_mxfp4r,   Zquantizer_quantor-   Zquantizer_quarkr.   Zquantizer_spqrr/   Zquantizer_torchaor0   Zquantizer_vptqr1   rU   rS   Z
get_loggerrc   rs   rF   ri   strr   r   r   r[   r[   r[   r\   <module>   s   \
)n