a
    h*                     @  s   d dl mZ d dlmZmZ d dlZd dlZd dlm	Z	 d dlm
Z
mZ dddd	d
ZdddddZdddddZdddddZeddddddZeddddddZddddddZddddddZdddd d!d"Zdd#d$d%d&ZdS )'    )annotations)AnyoverloadN)
coo_matrix)Tensordevicezlist | np.ndarray | Tensorr   )areturnc                 C  sh   t | tr<tdd | D r0tdd | D S t| } nt | tsPt| } | jrd| jtj	dS | S )a  
    Converts the input `a` to a PyTorch tensor if it is not already a tensor.
    Handles lists of sparse tensors by stacking them.

    Args:
        a (Union[list, np.ndarray, Tensor]): The input array or tensor.

    Returns:
        Tensor: The converted tensor.
    c                 s  s   | ]}t |to|jV  qd S N)
isinstancer   	is_sparse.0x r   ]/var/www/html/assistant/venv/lib/python3.9/site-packages/sentence_transformers/util/tensor.py	<genexpr>       z%_convert_to_tensor.<locals>.<genexpr>c                 S  s   g | ]}|  jtjd qS )Zdtype)coalescetotorchfloat32r   r   r   r   
<listcomp>   r   z&_convert_to_tensor.<locals>.<listcomp>r   )
r   listallr   stacktensorr   r   r   r   r   r   r   r   _convert_to_tensor   s    


r   c                 C  s   |   dkr| d} | S )z
    If the tensor `a` is 1-dimensional, it is unsqueezed to add a batch dimension.

    Args:
        a (Tensor): The input tensor.

    Returns:
        Tensor: The tensor with a batch dimension.
       r   )dim	unsqueezer   r   r   r   _convert_to_batch$   s    

r#   c                 C  s"   t | } |  dkr| d} | S )a  
    Converts the input data to a tensor with a batch dimension.
    Handles lists of sparse tensors by stacking them.

    Args:
        a (Union[list, np.ndarray, Tensor]): The input data to be converted.

    Returns:
        Tensor: The converted tensor with a batch dimension.
    r    r   )r   r!   r"   r   r   r   r   _convert_to_batch_tensor3   s    
r$   )
embeddingsr	   c                 C  s   | j stjjj| dddS |  } |  |   }}tj| 	d| j
d}|d|d |d  t|d|d }|dk}| }||  ||   < t||| 	 S )z
    Normalizes the embeddings matrix, so that each sentence embedding has unit length.

    Args:
        embeddings (Tensor): The input embeddings matrix.

    Returns:
        Tensor: The normalized embeddings matrix.
       r    )pr!   r   r   )r   r   nnZ
functional	normalizer   indicesvaluesZzerossizer   Z
index_add_sqrtZindex_selectcloneZsparse_coo_tensor)r%   r+   r,   Z	row_normsmaskZnormalized_valuesr   r   r   normalize_embeddingsD   s    
r1   z
np.ndarrayz
int | None)r%   truncate_dimr	   c                 C  s   d S r
   r   r%   r2   r   r   r   truncate_embeddingsa   s    r4   ztorch.Tensorc                 C  s   d S r
   r   r3   r   r   r   r4   e   s    znp.ndarray | torch.Tensorc                 C  s   | dd|f S )a  
    Truncates the embeddings matrix.

    Args:
        embeddings (Union[np.ndarray, torch.Tensor]): Embeddings to truncate.
        truncate_dim (Optional[int]): The dimension to truncate sentence embeddings to. `None` does no truncation.

    Example:
        >>> from sentence_transformers import SentenceTransformer
        >>> from sentence_transformers.util import truncate_embeddings
        >>> model = SentenceTransformer("tomaarsen/mpnet-base-nli-matryoshka")
        >>> embeddings = model.encode(["It's so nice outside!", "Today is a beautiful day.", "He drove to work earlier"])
        >>> embeddings.shape
        (3, 768)
        >>> model.similarity(embeddings, embeddings)
        tensor([[1.0000, 0.8100, 0.1426],
                [0.8100, 1.0000, 0.2121],
                [0.1426, 0.2121, 1.0000]])
        >>> truncated_embeddings = truncate_embeddings(embeddings, 128)
        >>> truncated_embeddings.shape
        >>> model.similarity(truncated_embeddings, truncated_embeddings)
        tensor([[1.0000, 0.8092, 0.1987],
                [0.8092, 1.0000, 0.2716],
                [0.1987, 0.2716, 1.0000]])

    Returns:
        Union[np.ndarray, torch.Tensor]: Truncated embeddings.
    .Nr   r3   r   r   r   r4   i   s    )r%   max_active_dimsr	   c           	      C  s   |du r| S t | tjr"t| } | j\}}| j}tjt| t	||dd\}}tj
| tjd}tj||dddt	||}d|| | f< d| | < | S )	a  
    Keeps only the top-k values (in absolute terms) for each embedding and creates a sparse tensor.

    Args:
        embeddings (Union[np.ndarray, torch.Tensor]): Embeddings to sparsify by keeping only top_k values.
        max_active_dims (int): Number of values to keep as non-zeros per embedding.

    Returns:
        torch.Tensor: A sparse tensor containing only the top-k values per embedding.
    Nr    )kr!   r   r(   Tr   )r   npZndarrayr   r   shaper   ZtopkabsminZ
zeros_likeboolZaranger"   expandflatten)	r%   r5   Z
batch_sizer!   r   _Ztop_indicesr0   Zbatch_indicesr   r   r   select_max_active_dims   s    

 "
r@   zdict[str, Any]r   )batchtarget_devicer	   c                 C  s.   | D ]$}t | | tr| | || |< q| S )au  
    Send a PyTorch batch (i.e., a dictionary of string keys to Tensors) to a device (e.g. "cpu", "cuda", "mps").

    Args:
        batch (Dict[str, Tensor]): The batch to send to the device.
        target_device (torch.device): The target device (e.g. "cpu", "cuda", "mps").

    Returns:
        Dict[str, Tensor]: The batch with tensors sent to the target device.
    )r   r   r   )rA   rB   keyr   r   r   batch_to_device   s    rD   r   )r   r	   c                 C  sF   |   } |    }|    }t||d |d ff| jdS )Nr   r    )r9   )r   r+   cpunumpyr,   r   r9   )r   r+   r,   r   r   r   to_scipy_coo   s    rG   )
__future__r   typingr   r   rF   r8   r   Zscipy.sparser   r   r   r   r#   r$   r1   r4   r@   rD   rG   r   r   r   r   <module>   s"    "