a
    hH                     @  s  d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
mZmZ zd dlmZ W n eyz   d dlmZ Y n0 d dlZd dlmZmZmZmZmZmZ d dlmZ d d	lmZ d d
lmZ eeZ ere rd dl!m"Z" ddddddZ#G dd deZ$dS )    )annotationsN)Path)TYPE_CHECKINGAnyCallable)load_onnx_modelload_openvino_model)Self)
AutoConfig	AutoModelAutoTokenizer	MT5ConfigPretrainedConfigT5Config)is_peft_available)find_adapter_config_file)InputModule
PeftConfigr   strzCallable[..., None])_save_pretrained_fn	subfolderreturnc                   s   ddd fdd}|S )Nz
str | PathNone)save_directoryr   c                   s.   t jt|  dd  t|  fi |S )NT)exist_ok)osmakedirsr   )r   kwargsr   r    d/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/models/Transformer.pywrapper   s    z)_save_pretrained_wrapper.<locals>.wrapperr    )r   r   r"   r    r   r!   _save_pretrained_wrapper   s    r#   c                      s  e Zd ZU dZdZded< ddgZded< d	Zd
ed< dBddddddd
dddd
 fddZddddddddZ	ddddd
ddddZ
ddddd d!d"Zddddd d#d$Zdd%d&d'Zd(d(d)d*d+Zd,d%d-d.ZdCd/d0d(d1d2d3ZdDdd
dd4d5d6ZedEddd8ddd
d
ddddd9d:d;d<ZedFddd8ddd
d
dddddd:d=d>ZedGdddd8ddd
dd? fd@dAZ  ZS )HTransformera  Hugging Face AutoModel to generate token embeddings.
    Loads the correct class, e.g. BERT / RoBERTa etc.

    Args:
        model_name_or_path: Hugging Face models name
            (https://huggingface.co/models)
        max_seq_length: Truncate any inputs longer than max_seq_length
        model_args: Keyword arguments passed to the Hugging Face
            Transformers model
        tokenizer_args: Keyword arguments passed to the Hugging Face
            Transformers tokenizer
        config_args: Keyword arguments passed to the Hugging Face
            Transformers config
        cache_dir: Cache dir for Hugging Face Transformers to store/load
            models
        do_lower_case: If true, lowercases the input (independent if the
            model is cased or not)
        tokenizer_name_or_path: Name or path of the tokenizer. When
            None, then model_name_or_path is used
        backend: Backend used for model inference. Can be `torch`, `onnx`,
            or `openvino`. Default is `torch`.
    sentence_bert_config.jsonr   config_file_namemax_seq_lengthdo_lower_casez	list[str]config_keysTboolsave_in_rootNFtorchz
int | Nonezdict[str, Any] | Nonez
str | Noner   )
model_name_or_pathr'   
model_argstokenizer_argsconfig_args	cache_dirr(   tokenizer_name_or_pathbackendr   c
                   s
  t    || _|	| _|d u r"i }|d u r.i }|d u r:i }| |||	|\}
}| j||
||	|fi | |d urd|vr||d< tj|d ur|n|fd|i|| _|d u rt	| j
drt	| j
jdrt	| jdrt| j
jj| jj}|| _|d ur| jjj| j
j_d S )Nmodel_max_lengthr1   configmax_position_embeddings)super__init__r(   r3   _load_config_load_modelr   from_pretrained	tokenizerhasattr
auto_modelr5   minr6   r4   r'   	__class____name__Ztokenizer_class)selfr-   r'   r.   r/   r0   r1   r(   r2   r3   r5   is_peft_modelr@   r    r!   r8   @   s>    



zTransformer.__init__zdict[str, Any]z*tuple[PeftConfig | PretrainedConfig, bool])r-   r1   r3   r0   r   c              	   C  s   t |||d|d|ddddurrt s8td|dkrHtd	d
dlm} |j|fi |d|idfS tj|fi |d|idfS )a  Loads the transformers or PEFT configuration

        Args:
            model_name_or_path (str): The model name on Hugging Face (e.g. 'sentence-transformers/all-MiniLM-L6-v2')
                or the path to a local model directory.
            cache_dir (str | None): The cache directory to store the model configuration.
            backend (str): The backend used for model inference. Can be `torch`, `onnx`, or `openvino`.
            config_args (dict[str, Any]): Keyword arguments passed to the Hugging Face Transformers config.

        Returns:
            tuple[PretrainedConfig, bool]: The model configuration and a boolean indicating whether the model is a PEFT model.
        tokenrevisionlocal_files_onlyF)r1   rE   rF   rG   NzgLoading a PEFT model requires installing the `peft` package. You can install it via `pip install peft`.r,   a  PEFT models can currently only be loaded with the `torch` backend. To use other backends, load the model with `backend="torch"`, call `model.transformers_model.merge_and_unload()`, save that model with `model.save_pretrained()` and then load the model with the desired backend.r   r   r1   T)	r   getr   	Exception
ValueErrorpeftr   r;   r
   )rB   r-   r1   r3   r0   r   r    r    r!   r9   o   s*    

zTransformer._load_configzPeftConfig | PretrainedConfig)r-   r5   r1   r3   rC   r   c                 K  s   |dkr|r"dD ]}| |d qt|trD| j|||fi | qt|trf| j|||fi | qtj|f||d|| _nT|dkrt	f ||dd|| _n2|dkrt
f ||dd|| _ntd	| d
dS )a  Loads the transformers or PEFT model into the `auto_model` attribute

        Args:
            model_name_or_path (str): The model name on Hugging Face (e.g. 'sentence-transformers/all-MiniLM-L6-v2')
                or the path to a local model directory.
            config ("PeftConfig" | PretrainedConfig): The model configuration.
            cache_dir (str | None): The cache directory to store the model configuration.
            backend (str): The backend used for model inference. Can be `torch`, `onnx`, or `openvino`.
            is_peft_model (bool): Whether the model is a PEFT model.
            model_args (dict[str, Any]): Keyword arguments passed to the Hugging Face Transformers model.
        r,   )rF   Nr5   r1   Zonnxzfeature-extraction)r-   r5   Z	task_nameZopenvinozUnsupported backend 'z6'. `backend` should be `torch`, `onnx`, or `openvino`.)pop
isinstancer   _load_t5_modelr   _load_mt5_modelr   r;   r>   r   r   rJ   )rB   r-   r5   r1   r3   rC   r.   Zadapter_only_kwargr    r    r!   r:      s@    




zTransformer._load_modelr   )r-   r5   r1   r   c                 K  s2   ddl m} dg|_|j|f||d|| _dS )Loads the encoder model from T5r   )T5EncoderModel	decoder.*rL   N)transformersrR   "_keys_to_ignore_on_load_unexpectedr;   r>   )rB   r-   r5   r1   r.   rR   r    r    r!   rO      s    zTransformer._load_t5_modelc                 K  s2   ddl m} dg|_|j|f||d|| _dS )rQ   r   )MT5EncoderModelrS   rL   N)rT   rV   rU   r;   r>   )rB   r-   r5   r1   r.   rV   r    r    r!   rP      s    zTransformer._load_mt5_model)r   c                 C  s   dt |  | jjjd dS )NzTransformer()architecture))dictZget_config_dictr>   r@   rA   rB   r    r    r!   __repr__   s    zTransformer.__repr__zdict[str, torch.Tensor])featuresr   c           
      K  s   dd |  D }| jf i ||ddi}|d }||d< t rddlm} t| j|r| jjjr|d}|d }t	j
|| jjj|jd	}	t	j|	|fd
d|d< | jjjrd|v r|d |d< |S )z#Returns token_embeddings, cls_tokenc                 S  s   i | ]\}}|d v r||qS ))Z	input_idsattention_maskZtoken_type_idsZinputs_embedsr    ).0keyvaluer    r    r!   
<dictcomp>   s   z'Transformer.forward.<locals>.<dictcomp>Zreturn_dictTr   token_embeddings)PeftModelForFeatureExtractionr]   )device   )dimZhidden_statesZall_layer_embeddings)itemsr>   r   rK   rc   rN   Zactive_peft_configZis_prompt_learningsizer,   ZonesZnum_virtual_tokensrd   catr5   Zoutput_hidden_states)
rB   r\   r   Ztrans_featuresoutputsrb   rc   Z
batch_sizer]   Zprefix_attention_maskr    r    r!   forward   s*    

zTransformer.forwardintc                 C  s
   | j jjS )N)r>   r5   Zhidden_sizerZ   r    r    r!   get_word_embedding_dimension  s    z(Transformer.get_word_embedding_dimensionz.list[str] | list[dict] | list[tuple[str, str]]z
str | bool)textspaddingr   c              	   C  s   i }t |d tr|g}nt |d trrg }g |d< |D ]0}tt| \}}|| |d | q8|g}n8g g  }}	|D ] }
||
d  |	|
d  q||	g}dd |D }| jrdd |D }|| j	||dd| j
d	 |S )
z-Tokenizes a text and maps tokens to token-idsr   Z	text_keysre   c                 S  s   g | ]}d d |D qS )c                 S  s   g | ]}t | qS r    )r   stripr^   sr    r    r!   
<listcomp>      3Transformer.tokenize.<locals>.<listcomp>.<listcomp>r    r^   colr    r    r!   rs     rt   z(Transformer.tokenize.<locals>.<listcomp>c                 S  s   g | ]}d d |D qS )c                 S  s   g | ]}|  qS r    )lowerrq   r    r    r!   rs   !  rt   ru   r    rv   r    r    r!   rs   !  rt   Zlongest_firstpt)ro   Z
truncationZreturn_tensors
max_length)rN   r   rY   nextiterrg   appendr(   updater<   r'   )rB   rn   ro   outputZto_tokenizelookupZtext_keytextZbatch1Zbatch2Z
text_tupler    r    r!   tokenize  s:    

	zTransformer.tokenize)output_pathsafe_serializationr   c                 K  s*   | j j||d | j| | | d S )N)r   )r>   Zsave_pretrainedr<   Zsave_config)rB   r   r   r   r    r    r!   save.  s    zTransformer.save zbool | str | Noner	   )r-   r   rE   cache_folderrF   rG   trust_remote_codemodel_kwargstokenizer_kwargsconfig_kwargsr3   r   c                 K  s2   | j |||||||||	|
|d}| f d|i|S )N)r-   r   rE   r   rF   rG   r   r   r   r   r3   r-   )_load_init_kwargs)clsr-   r   rE   r   rF   rG   r   r   r   r   r3   r   Zinit_kwargsr    r    r!   load3  s    zTransformer.loadc                 K  s   | j ||||||d}|||||d}d|vr6i |d< d|vrFi |d< d|vrVi |d< |d | |d | |d | |r|d | |	r|d |	 |
r|d |
 i |||dS )N)r-   r   rE   r   rF   rG   )r   rE   rF   rG   r   r.   r/   r0   )r1   r3   )load_configr~   )r   r-   r   rE   r   rF   rG   r   r   r   r   r3   r   r5   Z
hub_kwargsr    r    r!   r   T  s<    
	zTransformer._load_init_kwargs)r-   r   config_filenamerE   r   rF   rG   r   c           
   
     s   |r
|gng d}|D ]&}t  j|||||||d}	|	r q>qd|	v r`d|	d v r`|	d d d|	v rd|	d v r|	d d d|	v rd|	d v r|	d d |	S )N)r%   zsentence_roberta_config.jsonzsentence_distilbert_config.jsonzsentence_camembert_config.jsonzsentence_albert_config.jsonz sentence_xlm-roberta_config.jsonzsentence_xlnet_config.json)r-   r   r   rE   r   rF   rG   r.   r   r/   r0   )r7   r   rM   )
r   r-   r   r   rE   r   rF   rG   Zconfig_filenamesr5   rD   r    r!   r     s.    	zTransformer.load_config)NNNNNFNr,   )T)T)
r   NNNFFNNNr,   )
r   NNNFFNNNr,   )r   NNNNF)rA   
__module____qualname____doc__r&   __annotations__r)   r+   r8   r9   r:   rO   rP   r[   rk   rm   r   r   classmethodr   r   r   __classcell__r    r    rD   r!   r$   $   sn   
        &/*4		! (          (           (9      r$   )%
__future__r   loggingr   pathlibr   typingr   r   r   Zsentence_transformers.backendr   r   r	   ImportErrorZtyping_extensionsr,   rT   r
   r   r   r   r   r   Ztransformers.utils.import_utilsr   Ztransformers.utils.peft_utilsr   Z(sentence_transformers.models.InputModuler   	getLoggerrA   loggerrK   r   r#   r$   r    r    r    r!   <module>   s&    

