a
    h=                     @   sp   d dl mZmZmZ ddlmZ er0ddlmZ ddlm	Z	m
Z
mZ e
 rRd dlZeeZG dd	 d	eZdS )
    )TYPE_CHECKINGOptionalUnion   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableloggingNc                       s   e Zd ZdZdZdZdgZ fddZdd Zd	d
ddZ	dd	e
ee  dddZeeeeef f eeeeef f dddZdddddZd ddZeedddZeedddZ  ZS )!BitNetHfQuantizerz
    1.58-bit quantization from BitNet quantization method:
    Before loading: it converts the linear layers into BitLinear layers during loading.

    Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
    FTZ
acceleratec                    s   t  j|fi | || _d S N)super__init__quantization_config)selfr   kwargs	__class__ d/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/quantizers/quantizer_bitnet.pyr   -   s    zBitNetHfQuantizer.__init__c                 O   s   t  std|dds&|ddr.tdtj sFtd d S |d}|d u rdtd n2|d urt	|t
rd	| v sd
| v rtdd S )NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)Zfrom_tfFZ	from_flaxztLoading ternary weights from tf/flax is currently not supported, please make sure the weights are in PyTorch format.zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpuZdiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r	   ImportErrorget
ValueErrortorchcudaZis_availableloggerZwarning_once
isinstancedictvalues)r   argsr   r   r   r   r   validate_environment1   s*    

"z&BitNetHfQuantizer.validate_environmentr   )modelc                 K   s   |S r   r   )r   r$   r   r   r   r   #_process_model_after_weight_loadingN   s    z5BitNetHfQuantizer._process_model_after_weight_loadingN)r$   keep_in_fp32_modulesc                 K   s:   ddl m} | || jj|| _||| j| j| jd}d S )Nr   )replace_with_bitnet_linear)modules_to_not_convertr   pre_quantized)Zintegrationsr'   Zget_modules_to_not_convertr   r(   r)   )r   r$   r&   r   r'   r   r   r   $_process_model_before_weight_loadingQ   s    
z6BitNetHfQuantizer._process_model_before_weight_loading)
max_memoryreturnc                 C   s   dd |  D }|S )Nc                 S   s   i | ]\}}||d  qS )g?r   ).0keyvalr   r   r   
<dictcomp>e       z7BitNetHfQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   r+   r   r   r   adjust_max_memoryd   s    z#BitNetHfQuantizer.adjust_max_memoryztorch.dtype)target_dtyper,   c                 C   s
   t j}|S r   )r   Zint8)r   r4   r   r   r   adjust_target_dtypeh   s    z%BitNetHfQuantizer.adjust_target_dtypec                 C   s   dS )NTr   )r   Zsafe_serializationr   r   r   is_serializablel   s    z!BitNetHfQuantizer.is_serializable)r,   c                 C   s   | j jdko| j jdkS )Nautobitlinearonliner   Zlinear_classZquantization_moder   r   r   r   is_trainableo   s    
zBitNetHfQuantizer.is_trainablec                 C   s   | j jdko| j jdkS )zUFlag indicating whether the quantized model can carry out quantization aware trainingr7   r8   r9   r:   r   r   r   is_qat_trainablev   s    
z"BitNetHfQuantizer.is_qat_trainable)N)N)__name__
__module____qualname____doc__Z requires_parameters_quantizationZrequires_calibrationZrequired_packagesr   r#   r%   r   liststrr*   r    r   intr3   r5   r6   propertyboolr;   r<   __classcell__r   r   r   r   r       s&    
0
r   )typingr   r   r   baser   Zmodeling_utilsr   utilsr	   r
   r   r   Z
get_loggerr=   r   r   r   r   r   r   <module>   s   
