a
    hw                  	   @  s   d dl mZ d dlZd dlmZmZ d dlmZ ee	Z
ervd dlmZmZmZ zd dlmZ W n eyt   Y n0 ddd	d
dddddddZdS )    )annotationsN)TYPE_CHECKINGLiteral)save_or_push_to_hub_modelCrossEncoderSentenceTransformerSparseEncoder)OptimizationConfigFz2SentenceTransformer | SparseEncoder | CrossEncoderz4OptimizationConfig | Literal['O1', 'O2', 'O3', 'O4']strboolz
str | NoneNone)modeloptimization_configmodel_name_or_pathpush_to_hub	create_prfile_suffixreturnc                   sl  ddl m}m}m} z ddlm}	m}
 ddlm} W n t	yN   t	dY n0 t
| |o~t| o~t| d do~t
| d j|	}t
| |ot| ot| d dot
| d j|	}t
| |ot
| j|	}|s|s|std|s|r| d j}n| j}|
|t
tr4|jvrtd p& t|  d	u rBd
 t fddd||| d| d	 d	S )a  
    Export an optimized ONNX model from a SentenceTransformer, SparseEncoder, or CrossEncoder model.

    The O1-O4 optimization levels are defined by Optimum and are documented here:
    https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/optimization

    The optimization levels are:

    - O1: basic general optimizations.
    - O2: basic and extended general optimizations, transformers-specific fusions.
    - O3: same as O2 with GELU approximation.
    - O4: same as O3 with mixed precision (fp16, GPU-only)

    See the following pages for more information & benchmarks:

    - `Sentence Transformer > Usage > Speeding up Inference <https://sbert.net/docs/sentence_transformer/usage/efficiency.html>`_
    - `Cross Encoder > Usage > Speeding up Inference <https://sbert.net/docs/cross_encoder/usage/efficiency.html>`_

    Args:
        model (SentenceTransformer | SparseEncoder | CrossEncoder): The SentenceTransformer, SparseEncoder,
            or CrossEncoder model to be optimized. Must be loaded with `backend="onnx"`.
        optimization_config (OptimizationConfig | Literal["O1", "O2", "O3", "O4"]): The optimization configuration or level.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the optimized model will be saved.
        push_to_hub (bool, optional): Whether to push the optimized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str | None, optional): The suffix to add to the optimized model file name. Defaults to None.

    Raises:
        ImportError: If the required packages `optimum` and `onnxruntime` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer, SparseEncoder, or CrossEncoder model loaded with `backend="onnx"`.
        ValueError: If the provided optimization_config is not valid.

    Returns:
        None
    r   r   )ORTModelORTOptimizer)AutoOptimizationConfigzPlease install Optimum and ONNX Runtime to use this function. You can install them with pip: `pip install optimum[onnxruntime]` or `pip install optimum[onnxruntime-gpu]`
auto_modelz}The model must be a Transformer-based SentenceTransformer, SparseEncoder, or CrossEncoder model loaded with `backend="onnx"`.z\optimization_config must be an OptimizationConfig instance or one of 'O1', 'O2', 'O3', 'O4'.NZ	optimizedc                   s   j |  dS )N)r   )optimize)save_dirr   r   Z	optimizer b/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/backend/optimize.py<lambda>o       z-export_optimized_onnx_model.<locals>.<lambda>export_optimized_onnx_modelZonnx)	Zexport_functionZexport_function_nameconfigr   r   r   r   backendr   )sentence_transformersr   r   r	   Zoptimum.onnxruntimer   r   !optimum.onnxruntime.configurationr   ImportError
isinstancelenhasattrr   r   
ValueErrorZfrom_pretrainedr   Z_LEVELSgetattrr   )r   r   r   r   r   r   r   r   r	   r   r   r   Zviable_st_modelZviable_se_modelZviable_ce_modelZ	ort_modelr   r   r   r       sf    +





r    )FFN)
__future__r   loggingtypingr   r   Z#sentence_transformers.backend.utilsr   	getLogger__name__loggerr#   r   r   r	   r$   r
   r%   r    r   r   r   r   <module>   s   
   