a
    h'                     @   s   d dl Z d dlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
mZ ddlmZmZ dd	lmZmZ d
dlmZ e rd dlZd dlmZ dZdZdZdZdZdddZG dd dZdS )    N)Image   )create_causal_mask)_get_model_class)
AutoConfig)MODEL_FOR_PRETRAINING_MAPPINGMODEL_MAPPING)PROCESSOR_MAPPING_NAMESAutoProcessor)TOKENIZER_MAPPING_NAMESAutoTokenizer   )is_torch_availablez[92mz[93mz[0mu   ■u   ⬚<img>c              
      s|    jdkr(dddddf jdkrJddddddf ttdd D }d}g }tD ]l\ }	|	kr|s }d  f< |dkrt|	ks d krt d krȈ d7  d| | f< d}qtddurfd	d
tD dfddtD }
|dur||dk}t|	dd | 
 |	ddtd|d |}tj|dt t t dt t t d}|d|  d|d  dt|
d  }dur|d7 }|| g }tD ]h\}||f dkr6|dd
 tt|ttD  n |tt|tt qtttt| }|D ]>}|dur|d d d| d d| nd qptD ]\ t|}v rt | t n|}d fddtD }d}durDd fddtD }|| dt d d| d|  qd|S )z
    Generates an attention matrix from a given attention mask.

    Optionally applies a sliding window mask (e.g., for Gemma2/3) and
    marks regions where image tokens occur based on the specified `img_token`.
       r   N   c                 s   s   | ]}t t|V  qd S N)lenrepr).0word r   c/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/utils/attention_visualizer.py	<genexpr>8       z6generate_attention_matrix_from_mask.<locals>.<genexpr>r   r   c                    s$   g | ]  fd dt D qS )c                    s0   g | ](}d  |   kr k r(n ndnd qS )r   r   r   r   j)isliding_windowr   r   
<listcomp>I   r   zBgenerate_attention_matrix_from_mask.<locals>.<listcomp>.<listcomp>)range)r   )nr   )r   r   r   I   r   z7generate_attention_matrix_from_mask.<locals>.<listcomp> c                 3   sV   | ]N} d |f r"t  t t n*d |kr:t t t n d |f rJtntV  qdS r   NYELLOWBLACK_SQUARERESETGREENWHITE_SQUAREr   )maskr   r   r   K   s   

   )
boundariesz: i == j (diagonal)   z: token_type_idszAttention MatrixzSliding Window Maskc                 S   s   g | ]}t  | t qS r   )r%   r'   )r   kr   r   r   r   i   r   z	    |     c                 3   sj   | ]b}| v r6 |f r6v r6t  t t n* |krNt t t n |f r^tntV  qd S r   r$   r   )r   	img_tokenr*   r   wordsr   r   r   x   s   


c                 3   sv   | ]n}| v rBv rBd  f d |f krBt  t t n* |krZt t t n  | rjtntV  qdS r#   r$   r   )r   r0   sliding_window_masktoken_type_bucketsr   r1   r   r   r      s   

z: 
)intndimr   max	enumerater    jointorchwhereZcumsumboolarangeZ	bucketizer(   r&   r'   r%   appendljustliststrrjustmapzipr   )r1   r*   r0   r   token_type_idsimage_seq_lengthZmax_word_lengthZfirst_img_idxoutputr.   Z	row_dummyZ
is_specialr-   Zlegendf_stringZvertical_headeridxrowZ	word_reprZcolored_wordZrow_displayZsliding_window_rowr   )	r   r0   r*   r!   r   r2   r3   r   r1   r   #generate_attention_matrix_from_mask(   s|    	



 

,$(

.rK   c                   @   s:   e Zd ZedddZdedddZdeddd	Zd
S )AttentionMaskVisualizer)
model_namec                 C   s   t |}d| _t| dr0t| dd | _zt|t}W n t	yZ   t|t
}Y n0 |d u rttd| d|| _G dd d|tj}|||| _| j|j || _|| _d S )Nr   r   zModel name z- is not supported for attention visualizationc                   @   s   e Zd Zdd ZdS )z7AttentionMaskVisualizer.__init__.<locals>._ModelWrapperc                 S   s$   t j|  t dd| _|| _d S )Nr   )nnModule__init__ZLinearZdummy_moduleconfig)selfrQ   rM   r   r   r   rP      s    z@AttentionMaskVisualizer.__init__.<locals>._ModelWrapper.__init__N)__name__
__module____qualname__rP   r   r   r   r   _ModelWrapper   s   rV   )r   from_pretrainedimage_tokenhasattrZget_text_configgetattrr   r   r   	Exceptionr   
ValueError
mapped_clsrN   rO   modeltodtyperepo_idrQ   )rR   rM   rQ   r]   rV   r   r   r   rP      s     
z AttentionMaskVisualizer.__init__r/   )input_sentencec                 C   s   | j ||d d S )N)suffix)visualize_attention_mask)rR   rb   rc   r   r   r   __call__   s    z AttentionMaskVisualizer.__call__c                 C   sZ  | j }i }d }| jjtv rd}ttj|ddj}d}t	j
| j|d}t|drZ|j}n|j|jgd }|r~|d|}||||d	d
}	|j|jgd | _|	d }
d|	v r|	d |d< |j|	d d }nJ| jjtv rt
| j}||}||d	dd }
ntd|jj dd|j_|  |
j\}}tj|||jjf| j jd}t|}t|j||
|d d}|d ur|  }
n|
 d d!|d||}
dt"d| jj d| j# d  }d}t$d|  t$dd| jj d| j d| j#j% &t"| d |  t$|  t'||
| jt(| jdd |d|d}t$| t$|  d S ) Nzchttps://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=trueT)streamr,   )rF   rX   r   r   pt)Zimagestextrc   return_tensorsattention_maskrE   Z	input_ids)ri   zModel type z) does not support attention visualizationeager)r`   )rQ   input_embedsrj   cache_positionZpast_key_valuesr   z##zAttention visualization for z | r   r4   z"  Attention visualization for [1m:z[0m z    r   )r0   r   rE   rF   ))r^   rQ   Z
model_typer	   r   openrequestsgetrawr
   rW   ra   rY   rX   	tokenizerZconvert_ids_to_tokensZimage_token_idreplacer   r   tokenizer\   Z_attn_implementationtrainshaper:   ZzerosZhidden_sizer`   r=   r   r<   Z	unsqueezeexpandr   r]   printrS   centerrK   rZ   )rR   rb   rc   r^   kwargsrF   Zimg	processorrX   inputsrj   tokensrs   Z
batch_sizeZ
seq_lengthrl   rm   Zcausal_maskZtop_bottom_borderZside_borderrH   r   r   r   rd      s    




 
z0AttentionMaskVisualizer.visualize_attention_maskN)r/   )r/   )rS   rT   rU   rA   rP   re   rd   r   r   r   r   rL      s   rL   )r   NNN)rp   ZPILr   Zmasking_utilsr   Zmodels.auto.auto_factoryr   Zmodels.auto.configuration_autor   Zmodels.auto.modeling_autor   r   Zmodels.auto.processing_autor	   r
   Zmodels.auto.tokenization_autor   r   Zimport_utilsr   r:   Ztorch.nnrN   r(   r%   r'   r&   r)   rK   rL   r   r   r   r   <module>   s&    
l