a
    h                     @   s`   d Z ddlmZmZmZ e r&ddlZeeZdd Z	dd Z
d	d
 ZdddZdddZdS )z2HQQ (Half-Quadratic Quantization) integration file   )is_hqq_availableis_torch_availablelogging    Nc                 C   s   |   D ]\}}||_qd S N)named_modulesname)modelr   module r   Y/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/integrations/hqq.pyautoname_modules   s    r   c                 C   s   d dd | dD S )N.c                 S   s    g | ]}|d vr|  s|qS ))r	   Zlayers)	isnumeric).0nr   r   r   
<listcomp>!       z&name_to_linear_tag.<locals>.<listcomp>)joinsplit)r   r   r   r   name_to_linear_tag    s    r   c                 C   sR   t  rddlm} t }|  D ](\}}t|tjj|fr |	t
| q t|S )Nr   )	HQQLinear)r   Zhqq.core.quantizer   setr   
isinstancetorchnnLinearaddr   list)r	   r   linear_tagsr   r
   r   r   r   get_linear_tags%   s    r    c           	      C   s   |   D ]\}}|d u rg }|| t|tjjrt|j}||v r|| d ur|| | j| _	t
|| j| _| j| d d}dD ]}t||d  qtt| dkrt|||d\}}|d q| |fS )NFT)ZW_qmetar   patch_paramshas_been_replaced)Znamed_childrenappendr   r   r   r   r   r   Z_modulesquant_configtypeZ
source_clsZrequires_grad_setattrlenr   children_prepare_for_hqq_linearpop)	r	   r#   r$   Zcurrent_key_namer   r
   Z
linear_tagZatt_r   r   r   r,   0   s,    


r,   Fc                    s   |du rg n|}t |  t|  |j}|j}tt t| t|  t fdd|D rtt }|	| nt |}t
| ||d\} }||j|d| j_|std | S )a  
    Prepares nn.Linear layers for HQQ quantization.
    Since each layer type can have separate quantization parameters, we need to do the following:
    1- tag each module with its name via autoname_modules()
    2- Extract linear_tags (e.g. ['self_attn.q_proj', ...])
    3- Map quantization parameters as a dictionary linear_tag -> quant_params as HQQLinear expects it, this is referred to as patch_params
    Nc                 3   s   | ]}| v V  qd S r   r   )r   keyr   r   r   	<genexpr>k   r   z)prepare_for_hqq_linear.<locals>.<genexpr>r"   )r'   quant_methodskip_modulesz<No linear modules were found in your model for quantization.)r   r    r3   r'   r   r   anydictfromkeysupdater,   r2   configquantization_configloggerwarning)r	   r9   Zmodules_to_not_convertr$   r3   r'   r#   r   r0   r   prepare_for_hqq_linearU   s(    	



r<   )N)NNF)__doc__utilsr   r   r   r   Z
get_logger__name__r:   r   r   r    r,   r<   r   r   r   r   <module>   s   

%