a
    h                     @  sx   d dl mZ zd dlmZ W n ey:   d dlmZ Y n0 d dlZd dlZd dlm	Z	 d dl
mZ G dd deZdS )    )annotations)SelfN)Image)InputModulec                
      s   e Zd ZU dZded< d*ddd fd	d
ZddddZeddddZej	dddddZdddddZ
d+dddddZeddddZddddddd d!Zed,ddd$d%d%dd&d'd(d)Z  ZS )-	CLIPModelTboolsave_in_rootopenai/clip-vit-base-patch32NstrNone)
model_namereturnc                   s6   t    |d u r|}tj|| _tj|| _d S N)super__init__transformersr   Zfrom_pretrainedmodelZCLIPProcessor	processor)selfr   Zprocessor_name	__class__ b/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/models/CLIPModel.pyr      s
    
zCLIPModel.__init__)r   c                 C  s   dS )NzCLIPModel()r   r   r   r   r   __repr__   s    zCLIPModel.__repr__intc                 C  s
   | j jjS r   r   	tokenizerZmodel_max_lengthr   r   r   r   max_seq_length   s    zCLIPModel.max_seq_length)valuer   c                 C  s   || j j_d S r   r   )r   r   r   r   r   r   "   s    zdict[str, torch.Tensor])featuresr   c              	   C  s   g }g }d|v r2| j j|d d}| j |d }d|v r| j j|d|dd |dd |dd |dd d	}| j |d }g }t|}t|}t|d
 D ].\}	}
|
dkr|t	| q|t	| qt
| |d< |S )Npixel_values)r!      	input_idsattention_maskposition_idsoutput_attentionsoutput_hidden_states)r#   r$   r%   r&   r'   image_text_infor   sentence_embedding)r   Zvision_modelZvisual_projectionZ
text_modelgetZtext_projectioniter	enumerateappendnexttorchstackfloat)r   r    Zimage_embedsZtext_embedsZvision_outputsZtext_outputsr)   image_featuresZtext_featuresidxZ
input_typer   r   r   forward&   s.    



zCLIPModel.forwardz
str | bool)paddingr   c           
      C  s   g }g }g }t |D ]>\}}t|tjr>|| |d q|| |d qi }t|rt| jj||ddd}t|r| jj|dd}	|	j|d< ||d< t	|S )	Nr   r"   Tpt)r5   Z
truncationreturn_tensors)r7   r!   r(   )
r,   
isinstancer   r-   lenr   r   Zimage_processorr!   dict)
r   Ztextsr5   ZimagesZtexts_valuesr(   r3   dataencodingr2   r   r   r   tokenizeF   s"    


zCLIPModel.tokenizeztransformers.CLIPProcessorc                 C  s   | j S r   )r   r   r   r   r   r   ^   s    zCLIPModel.tokenizersafe_serialization)output_pathr?   r   c                O  s    | j j||d | j| d S )Nr>   )r   Zsave_pretrainedr   )r   r@   r?   argskwargsr   r   r   saveb   s    zCLIPModel.save Fzbool | str | Nonez
str | Noner   )model_name_or_path	subfoldertokencache_folderrevisionlocal_files_onlyr   c           	      K  s   | j ||||||d}| |S )N)rE   rF   rG   rH   rI   rJ   )Zload_dir_path)	clsrE   rF   rG   rH   rI   rJ   rB   Z
local_pathr   r   r   loadf   s    zCLIPModel.load)r	   N)T)rD   NNNF)__name__
__module____qualname__r   __annotations__r   r   propertyr   setterr4   r=   r   rC   classmethodrL   __classcell__r   r   r   r   r      s&   
	      r   )
__future__r   typingr   ImportErrorZtyping_extensionsr/   r   ZPILr   Z#sentence_transformers.models.Routerr   r   r   r   r   r   <module>   s   