a
    h2                     @   s   d dl mZmZ ddlmZ ddlmZmZmZm	Z	 ddl
mZ ddlmZ er\ddlmZ e rnd d	lmZ e r|d d
lZe	eZdd ZG dd deZd
S )    )TYPE_CHECKINGAny   )prepare_for_hqq_linear)is_accelerate_availableis_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModel)remove_hook_from_moduleNc                 C   s.   | dd d }| }|D ]}|j| }q|S )N.)splitZ_modules)modelnameZmodule_treeparentm r   a/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/quantizers/quantizer_hqq.pyfind_parent%   s
    r   c                       s   e Zd ZdZdZdZdZdgZ fddZdd Z	d	e
e ee
e d
ddZd	e
e e
e e
e dddZd	deeeef edddZd	dedeeef e
e dddZdd Zd	dddZd	dddZd%d d!Zeed"d#d$Z  ZS )&HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    The actual quantization and offloading to the GPU is done in check_quantized_param().
    FTZhqqc                    s$   t  j|fi | d | _d| _d S )NF)super__init__dtypeusing_multi_gpu)selfquantization_configkwargs	__class__r   r   r   9   s    zHqqHfQuantizer.__init__c                 O   s   t  std|dds&|ddr.td| jd u r^d|v rL|d | _ntj| _td |d}t	|t
rd	| v sd
| v rtdntt| dk| _d S )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.Zfrom_tfFZ	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.r   zOSetting dtype to torch.float32 as the default value since it was not specified.
device_mapcpuZdiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r
   )r   ImportErrorget
ValueErrorr   torchZfloat32loggerinfo
isinstancedictvalueslensetr   )r   argsr    r#   r   r   r   validate_environment>   s(    



z#HqqHfQuantizer.validate_environmentr   )r   missing_keysprefixreturnc                 K   s   | j rdd |D S |S d S )Nc                 S   s   g | ]}d |vr|qS )weightr   ).0keyr   r   r   
<listcomp>_       z6HqqHfQuantizer.update_missing_keys.<locals>.<listcomp>)pre_quantized)r   r   r2   r3   r    r   r   r   update_missing_keys[   s    z"HqqHfQuantizer.update_missing_keys)r   expected_keysloaded_keysr4   c                    sN  | j s
|S  fdd t|}t rFddlm} | D ]\}}||_q:t } || t }	|D ](|jjd D ]}
|
v rt|		 qtqd||	8 }|d d t
jddd d	h }t }|D ]$tfd
d|D r|	 q||8 }|D ]Td |v r|	d  n|fdd|D  d |v r|	d  qt|S )Nc                    s:   |   D ],\}}t|tjjr*||j  || qd S N)Znamed_childrenr+   r(   nnLinearaddr   )r   Zlayersr   module)_find_hqq_quantizable_layersr   r   rC   k   s    zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersr   	HQQLinearskip_modulesr$   FZlinear_layerquant_configcompute_dtypedevicedel_origbiasc                 3   s   | ]}| v V  qd S r>   r   )r6   _module)r7   r   r   	<genexpr>   r9   z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>z.weightc                    s   h | ]} d  | qS )r   r   )r6   Z_ref_key)rM   r   r   	<setcomp>   r9   z6HqqHfQuantizer.update_expected_keys.<locals>.<setcomp>z.bias)r:   r/   r   hqq.core.quantizerE   Znamed_modulesr   configr   rA   r(   Zfloat16Zstate_dict_keysanyupdatelist)r   r   r<   r=   Znew_keysrE   r   rB   Z_valid_modulesZ_skipped_modulesZ_skip_moduleZ	_ref_keysZ_rm_keysr   )rC   rM   r7   r   update_expected_keysd   sJ    
	z#HqqHfQuantizer.update_expected_keysztorch.Tensor)r   param_value
param_name
state_dictr4   c           	      K   sl   t  rddlm} t||\}}| jr@t|tjj|fo>|dkS t|tjjrV|dkpft||of|dkS d S )Nr   rD   r5   rL   )	r   rP   rE   r   r:   r+   r(   r?   r@   )	r   r   rV   rW   rX   r    rE   rB   tensor_namer   r   r   check_quantized_param   s    z$HqqHfQuantizer.check_quantized_paramztorch.device)r   rV   rW   target_devicerX   unexpected_keysc                 C   s  t  r*ddlm} t|ddd}||_t||\}	}
d|ddd }t||}|dd }|
d	krtdS i }|	 D ]@\}}|d |v r|||dd < |dur||v r|
| q| jrTt|	|rdS |dd| j|d
d}|| |jdur"t|jtjr"tj|j|_| jr4| |}t||| |	`~	tj  dS |	 D ]\}}t|	|tj| q\|jjd }|jjd }d|	jddd }d}d|v r|}n||v r|| }|D ]}||	jv rd} qq|dur`||	|| j|dd}|jdur@t|jtjr@tj|j|_| jrR| |}t||| n|	j| j|d}	t|||	 tj  dS )a  
        Each nn.Linear layer is processed here.
        We first check if the corresponding module state_dict contains already HQQ quantized parameters.
        If not, we create a temp linear layer with the module state_dict params and use it for quantization
        r   rD   _selfc                 S   s   t jd| j| jdS )Nr   r   rJ   )r(   emptyrI   rJ   r]   r   r   r   r5      s    z5HqqHfQuantizer.create_quantized_param.<locals>.weightr   Nr   rL   FrG   rH   rF   Zweight_quant_paramsT)rH   rI   rJ   rK   r_   )r   rP   rE   propertyr5   r   joinr   r   itemsremover:   r+   r   Zload_state_dictrL   r(   ZTensorr?   	Parameterr   _patch_layer_for_multigpusetattr__dict__cudaZempty_cacherQ   r   r   to)r   r   rV   rW   r[   rX   r\   rE   r5   rB   rY   Z
layer_nameparent_modulenodeZmodule_state_dictkv	hqq_layerr7   ZtensorrH   rF   Z
module_tagZmodule_quant_configZskip_moduler   r   r   create_quantized_param   s    









z%HqqHfQuantizer.create_quantized_paramc                    s$   t dd   fdd_S )Nc                 S   s4   t || j|   }| jd ur0|| j7 }|S r>   )r(   matmulrk   rJ   Z
dequantizetrL   )r   xoutr   r   r   forward_with_device.  s    

zEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_devicec                    s
    | S r>   r   )rt   rv   rp   r   r   <lambda>4  r9   z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>)r   forward)r   rp   r   rw   r   rg   +  s    z(HqqHfQuantizer._patch_layer_for_multigpu)r   c                 K   s   t || jd}d S )N)r   )r   r   r   r   r    r   r   r   $_process_model_before_weight_loading7  s    z3HqqHfQuantizer._process_model_before_weight_loadingc                 K   s   d|_ |  |_|S NT)Zis_hqq_quantizedis_serializableZis_hqq_serializablerz   r   r   r   #_process_model_after_weight_loading@  s    
z2HqqHfQuantizer._process_model_after_weight_loadingNc                 C   s   dS r|   r   )r   Zsafe_serializationr   r   r   r}   E  s    zHqqHfQuantizer.is_serializable)r4   c                 C   s   dS r|   r   )r   r   r   r   is_trainableH  s    zHqqHfQuantizer.is_trainable)N)__name__
__module____qualname____doc__Zuse_keep_in_fp32_modulesZ requires_parameters_quantizationZrequires_calibrationZrequired_packagesr   r1   rT   strr;   rU   r,   r   boolrZ   rq   rg   r{   r~   r}   rb   r   __classcell__r   r   r!   r   r   -   sB   

?

s	
r   )typingr   r   Zintegrationsr   utilsr   r   r   r	   baser   Zquantizers_utilsr   Zmodeling_utilsr   Zaccelerate.hooksr   r(   Z
get_loggerr   r)   r   r   r   r   r   r   <module>   s   
