a
    h\                  	   @   s  d dl Z d dlmZ d dlmZmZmZ d dlZd dlm	  m
Z d dlm	Z	 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZmZmZ ddlmZm Z m!Z! eeG dd deZ"edG dd de	j#Z$G dd de	j#Z%G dd de	j#Z&G dd de	j#Z'd5e	j#ej(ej(ej(eej( e)e)dddZ*G dd de	j#Z+G d d! d!eZ,G d"d# d#e	j#Z-G d$d% d%e	j#Z.eG d&d' d'eZ/ed(d)G d*d+ d+e/Z0ed,d)G d-d. d.e/Z1ej(ej(d/d0d1Z2eG d2d3 d3e/Z3g d4Z4dS )6    N)	dataclass)AnyCallableOptional)nn   )ACT2FN)use_kernel_forward_from_hub)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPooling)ALL_ATTENTION_FUNCTIONSPreTrainedModel)ModelOutputauto_docstringcan_return_tuple   )Aimv2ConfigAimv2TextConfigAimv2VisionConfigc                   @   s   e Zd ZU dZdZeej ed< dZ	eej ed< dZ
eej ed< dZeej ed< dZeej ed< dZeed< dZeed	< ee d
ddZdS )Aimv2Outputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `return_loss` is `True`):
        Contrastive loss for image-text similarity.
    logits_per_image (`torch.FloatTensor` of shape `(image_batch_size, text_batch_size)`):
        The scaled dot product scores between `image_embeds` and `text_embeds`. This represents the image-text
        similarity scores.
    logits_per_text (`torch.FloatTensor` of shape `(text_batch_size, image_batch_size)`):
        The scaled dot product scores between `text_embeds` and `image_embeds`. This represents the text-image
        similarity scores.
    text_embeds (`torch.FloatTensor` of shape `(batch_size, output_dim`):
        The text embeddings obtained by applying the projection layer to the pooled output of [`Aimv2TextModel`].
    image_embeds (`torch.FloatTensor` of shape `(batch_size, output_dim`):
        The image embeddings obtained by applying the projection layer to the pooled output of [`Aimv2VisionModel`].
    text_model_output (`BaseModelOutputWithPooling`):
        The output of the [`Aimv2TextModel`].
    vision_model_output (`BaseModelOutputWithPooling`):
        The output of the [`Aimv2VisionModel`].
    Nlosslogits_per_imagelogits_per_texttext_embedsimage_embedstext_model_outputvision_model_outputreturnc                    s   t  fdd  D S )Nc                 3   s,   | ]$}|d vr | nt  | V  qdS ))r   r   N)getattrto_tuple).0kself d/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/aimv2/modeling_aimv2.py	<genexpr>H   s   z'Aimv2Output.to_tuple.<locals>.<genexpr>)tuplekeysr%   r'   r%   r(   r"   G   s    zAimv2Output.to_tuple)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   r   r   r   r   r*   r   r"   r'   r'   r'   r(   r   )   s   
r   ZRMSNormc                       s.   e Zd Zd fdd	Zdd Zdd Z  ZS )	Aimv2RMSNormư>c                    s&   t    tt|| _|| _dS )z;
        Aimv2RMSNorm is equivalent to T5LayerNorm
        N)super__init__r   	Parameterr0   Zonesweightvariance_epsilon)r&   hidden_sizeeps	__class__r'   r(   r6   P   s    
zAimv2RMSNorm.__init__c                 C   sJ   |j }|tj}|djddd}|t|| j  }| j|| S )N   T)keepdim)	dtypetor0   float32powmeanZrsqrtr9   r8   )r&   hidden_statesZinput_dtypeZvariancer'   r'   r(   forwardX   s
    zAimv2RMSNorm.forwardc                 C   s   t | jj d| j S )Nz, eps=)r*   r8   shaper9   r%   r'   r'   r(   
extra_repr_   s    zAimv2RMSNorm.extra_repr)r4   )r,   r-   r.   r6   rG   rI   __classcell__r'   r'   r<   r(   r3   N   s   r3   c                       s$   e Zd Z fddZdd Z  ZS )Aimv2MLPc                    sx   t    || _|j| _|j| _tj| j| j|jd| _tj| j| j|jd| _	tj| j| j|jd| _
t|j | _d S )NZbias)r5   r6   configr:   Zintermediate_sizer   LinearZmlp_bias	gate_projup_proj	down_projr   Z
hidden_actact_fnr&   rM   r<   r'   r(   r6   d   s    
zAimv2MLP.__init__c                 C   s$   |  | | || | }|S N)rQ   rR   rO   rP   )r&   xrQ   r'   r'   r(   rG   n   s     zAimv2MLP.forward)r,   r-   r.   r6   rG   rJ   r'   r'   r<   r(   rK   c   s   
rK   c                       sV   e Zd Zed fddZedddejfejddd	Z	ejejd
ddZ
  ZS )Aimv2VisionEmbeddingsrM   c                    s   t    || _|j| _tj|j|j|j|jd| _t	|j|j
| _|j|j d }| jjslt||j| _| jdt|ddd d S )N)Zkernel_sizeZstrider>   position_idsr   r?   F
persistent)r5   r6   rM   
patch_sizer   ZConv2dZnum_channelsr:   patch_embedr3   rms_norm_epsrms_normZ
image_size	is_native	Embeddingposition_embeddingregister_bufferr0   arangeexpand)r&   rM   Znum_patchesr<   r'   r(   r6   t   s    
zAimv2VisionEmbeddings.__init__   g     @cpur   c                 C   s   t jt|||d}t jt| ||d}t j||dd\}}|d }t j|||d| }	d||	  }	| d |	d d d f  }
| d |	d d d f  }t j|
 |
 | | gddd d d d d f S )	NrA   deviceZxy)Zindexing   g      ?).Nr   dim)r0   rd   intZmeshgridflattenconcatsincos)heightwidth	embed_dimZtemperatureri   rA   Zgrid_wZgrid_hZpos_dimomegaZout_hZout_wr'   r'   r(   "build_2d_sincos_position_embedding   s    z8Aimv2VisionEmbeddings.build_2d_sincos_position_embedding)pixel_valuesr    c                 C   s|   |  \}}}}| |ddd}| |}| jjrd| j|| j || j | jj	|j
|jd}n| | j}|| }|S )Nr>   r   )rt   ri   rA   )sizer]   rn   	transposer_   rM   r`   rv   r\   r:   ri   rA   rb   rX   )r&   rw   _rr   rs   rF   Z	pos_embedr'   r'   r(   rG      s    
zAimv2VisionEmbeddings.forward)r,   r-   r.   r   r6   staticmethodr0   rC   Tensorrv   rG   rJ   r'   r'   r<   r(   rV   s   s   
rV   c                       sL   e Zd Zed fddZdeej eej eej ej	dddZ
  ZS )	Aimv2TextEmbeddingsrW   c                    sR   t    |j}t|j|| _t|j|| _| j	dt
|jddd d S )NrX   rY   FrZ   )r5   r6   r:   r   ra   Z
vocab_sizetoken_embeddingZmax_position_embeddingsrb   rc   r0   rd   re   )r&   rM   rt   r<   r'   r(   r6      s    
zAimv2TextEmbeddings.__init__N)	input_idsrX   inputs_embedsr    c                 C   s   |d ur|j d n|j d }| jjj d }||krFtd| d| |d u rd| jd d d |f }|d u rv| |}| |}|| }|S )Nr?   r   zRSequence length must be less than max_position_embeddings (got `sequence length`: z and max_position_embeddings: )rH   rb   r8   
ValueErrorrX   r~   )r&   r   rX   r   
seq_lengthZmax_position_embeddingZposition_embeddings
embeddingsr'   r'   r(   rG      s"    

zAimv2TextEmbeddings.forward)NNN)r,   r-   r.   r   r6   r   r0   
LongTensorr1   r|   rG   rJ   r'   r'   r<   r(   r}      s      r}           )modulequerykeyvalueattention_maskscalingdropoutc           
      K   s|   t ||dd| }|d ur(|| }tjj|dt jd|j}tjj	||| j
d}t ||}	|	dd }	|	|fS )Nr?   r   )rl   rA   )ptrainingr   r>   )r0   matmulry   r   
functionalZsoftmaxrC   rB   rA   r   r   
contiguous)
r   r   r   r   r   r   r   kwargsattn_weightsattn_outputr'   r'   r(   eager_attention_forward   s    
r   c                       sL   e Zd ZdZ fddZdejeej eejeej f dddZ	  Z
S )	Aimv2Attentionz=Multi-headed attention from 'Attention Is All You Need' paperc                    s   t    || _|j| _|j| _| j| j | _| j| j | jkrZtd| j d| j d| jd | _	|j
| _d| _tj| j| j|jd| _tj| j| j|jd| _tj| j| j|jd| _tj| j| j|jd| _d S )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      FrL   )r5   r6   rM   r:   rt   num_attention_heads	num_headshead_dimr   scaleZattention_dropoutr   	is_causalr   rN   qkv_biask_projv_projq_projout_projrS   r<   r'   r(   r6      s$    

zAimv2Attention.__init__N)rF   r   r    c              
   K   s   |j \}}}| |}| |}| |}	|||| j| jdd}|||| j| jdd}|	||| j| jdd}	t}
| j	j
dkrt| j	j
 }
|
| |||	|| j| j| jsdn| jd\}}|||| }| |}||fS )z#Input shape: Batch x Time x Channelr   r>   eagerr   )r   r   r   )rH   r   r   r   viewr   r   ry   r   rM   Z_attn_implementationr   r   r   r   r   reshaper   r   )r&   rF   r   r   
batch_sizer   rt   Zqueriesr+   valuesZattention_interfacer   r   r'   r'   r(   rG      s.    




zAimv2Attention.forward)N)r,   r-   r.   r/   r6   r0   r|   r   r*   rG   rJ   r'   r'   r<   r(   r      s    r   c                       sP   e Zd Zed fddZd	ejeej ee e	ejejf dddZ
  ZS )
Aimv2EncoderLayerrW   c                    sB   t    t|| _t|| _t|j|j| _	t|j|j| _
d S rT   )r5   r6   r   	attentionrK   ffnr3   r:   r^   	rms_norm1	rms_norm2rS   r<   r'   r(   r6   $  s
    


zAimv2EncoderLayer.__init__NF)rF   r   output_attentionsr    c                 C   sT   |  |}| j||d\}}|| }| |}| |}|| }|rL||fS |d fS )N)rF   r   )r   r   r   r   )r&   rF   r   r   Znorm_hidden_statesr   r   Z
mlp_outputr'   r'   r(   rG   +  s    


zAimv2EncoderLayer.forward)NF)r,   r-   r.   r   r6   r0   r|   r   boolr*   rG   rJ   r'   r'   r<   r(   r   #  s   
  r   c                       sN   e Zd ZdZed fddZed	eej	 ee
 ee
 edddZ  ZS )
Aimv2Encoderz
    Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a
    [`Aimv2EncoderLayer`].

    Args:
        config: Aimv2Config
    rW   c                    s:   t     | _t fddt jD | _d| _d S )Nc                    s   g | ]}t  qS r'   )r   )r#   rz   rW   r'   r(   
<listcomp>H      z)Aimv2Encoder.__init__.<locals>.<listcomp>F)	r5   r6   rM   r   Z
ModuleListrangeZnum_hidden_layerslayersZgradient_checkpointingrS   r<   rW   r(   r6   E  s    
 zAimv2Encoder.__init__Nr   r   output_hidden_statesr    c           
      C   s   |dur|n| j j}|dur |n| j j}|r0dnd}|r<dnd}|}| jD ]:}|r\||f }||||d}	|	d }|rJ||	d f }qJ|r||f }t|||dS )ad  
        Args:
            inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
                Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
                This is useful if you want more control over how to convert `input_ids` indices into associated vectors
                than the model's internal embedding lookup matrix.
            attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
                Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

                - 1 for tokens that are **not masked**,
                - 0 for tokens that are **masked**.

                [What are attention masks?](../glossary#attention-mask)
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
            output_hidden_states (`bool`, *optional*):
                Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
                for more detail.
            return_dict (`bool`, *optional*):
                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
        Nr'   )r   r   r   )last_hidden_staterF   
attentions)rM   r   r   r   r   )
r&   r   r   r   r   Zencoder_statesZall_attentionsrF   Zencoder_layerZlayer_outputsr'   r'   r(   rG   L  s0    


zAimv2Encoder.forward)NNN)r,   r-   r.   r/   r   r6   r   r   r0   r|   r   r   rG   rJ   r'   r'   r<   r(   r   <  s      r   c                       s6   e Zd Zed fddZejejdddZ  ZS )Aimv2AttentionPoolingHeadrW   c                    s|   t    |j| _|j| _tj| j| j|jd| _tj| j| j|jd| _	t
tdd| j| _tj| j| jdd| _d S )NrL   r   T)r5   r6   r:   r   r   r   rN   r   r   r   r7   r0   Zzeros	cls_tokenoutput_projrS   r<   r'   r(   r6     s    
z"Aimv2AttentionPoolingHead.__init__)rF   r    c                 C   s   |j \}}}| j|dd}| |||| j|| j }| |||| j|| j }||d| j|| j }|dddd}|dddd}|dddd}t	|||}	|	
dd|d|}	|	jdd}	| |	}
|
S )Nr?   r   r   r>   r   rk   )rH   r   re   r   r   r   r   ZpermuteFZscaled_dot_product_attentionry   rE   r   )r&   rF   r   seq_lenZ
hidden_dimr   r   r   r   r   outputr'   r'   r(   rG     s    
z!Aimv2AttentionPoolingHead.forward)	r,   r-   r.   r   r6   r0   r|   rG   rJ   r'   r'   r<   r(   r     s   r   c                       sF   e Zd ZU dZeed< dZdZg dZdZ	dZ
dZ fddZ  ZS )Aimv2PreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models. The model is only intended for inference and doesn't support finetuning.
    rM   Zaimv2T)r   r   rV   r}   c                    s^   t  | t|dr:t|jtjrZ|jjt	
d n t|trZ|jjjd| jjd d S )Nlogit_scaleg$I$I,@r   )rE   Zstd)r5   _init_weightshasattr
isinstancer   r   r7   dataZfill_mathlogr   r   Znormal_rM   Zinitializer_range)r&   r   r<   r'   r(   r     s    

z"Aimv2PreTrainedModel._init_weights)r,   r-   r.   r/   r   r2   Zbase_model_prefixZsupports_gradient_checkpointing_no_split_modulesZ_supports_sdpa_supports_flash_attnZ_supports_flex_attnr   rJ   r'   r'   r<   r(   r     s   
r   zL
    The Vision model from AIMv2 without any head or projection on top.
    )Zcustom_introc                       sl   e Zd ZU eed< dZed fddZejdddZ	e
edeej ee ee ed
ddZ  ZS )Aimv2VisionModelrM   rw   rW   c                    sZ   t  | || _t|| _t|| _t|j|j	| _
|j| _| jrNt|| _|   d S rT   )r5   r6   rM   rV   r   r   encoderr3   r:   r^   r_   use_headr   head	post_initrS   r<   r'   r(   r6     s    


zAimv2VisionModel.__init__r   c                 C   s   | j jS rT   )r   r]   r%   r'   r'   r(   get_input_embeddings  s    z%Aimv2VisionModel.get_input_embeddingsNr   c           	      C   s|   |dur|n| j j}|dur |n| j j}| |}| j|||d}|d }| |}| jrd| |nd}t|||j	|j
dS )a  
        Examples:

        ```python
        >>> from PIL import Image
        >>> import requests
        >>> from transformers import AutoProcessor, Siglip2VisionModel

        >>> model = Aimv2VisionModel.from_pretrained("apple/aimv2-large-patch14-native")
        >>> processor = AutoProcessor.from_pretrained("apple/aimv2-large-patch14-native")

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> inputs = processor(images=image, return_tensors="pt")

        >>> outputs = model(**inputs)
        >>> last_hidden_state = outputs.last_hidden_state
        >>> pooled_output = outputs.pooler_output  # pooled features
        ```N)r   r   r   r   r   pooler_outputrF   r   )rM   r   r   r   r   r_   r   r   r   rF   r   )	r&   rw   r   r   r   rF   encoder_outputsr   r   r'   r'   r(   rG     s$    

zAimv2VisionModel.forward)NNN)r,   r-   r.   r   r2   main_input_namer6   r   Moduler   r   r   r   r0   r|   r   r   rG   rJ   r'   r'   r<   r(   r     s   
   r   zJ
    The text model from AIMv2 without any head or projection on top.
    c                       sj   e Zd ZdZed fddZejdddZdd	 Z	e
edeej ee ee edddZ  ZS )Aimv2TextModelr   rW   c                    sJ   t  | || _t|| _t|| _t|j|j	| _
|j| _|   d S rT   )r5   r6   rM   r}   r   r   r   r3   r:   r^   r_   eos_token_idr   rS   r<   r'   r(   r6   $  s    

zAimv2TextModel.__init__r   c                 C   s   | j jS rT   r   r~   r%   r'   r'   r(   r   /  s    z#Aimv2TextModel.get_input_embeddingsc                 C   s   || j _d S rT   r   )r&   r   r'   r'   r(   set_input_embeddings2  s    z#Aimv2TextModel.set_input_embeddingsNr   c                 C   s   |d ur|n| j j}|d ur |n| j j}| |}|j\}}}tj|tj|jd}	|		d
|d}
|d urt| j ||
||	d d}| j||||d}|d }| |}|tj|jd |jd|jtj|jd| jk jddf }t|||j|jdS )	Nrh   r   r?   )rM   Zinput_embedsrX   r   cache_positionZpast_key_values)r   r   r   r   )ri   rk   r   )rM   r   r   r   rH   r0   rd   longri   Z	unsqueezere   r
   r   r_   rB   rm   r   Zargmaxr   rF   r   )r&   r   r   r   r   rF   r   r   rz   r   rX   r   r   pooled_outputr'   r'   r(   rG   5  sF    	
	
"zAimv2TextModel.forward)NNN)r,   r-   r.   r   r   r6   r   r   r   r   r   r   r   r0   r|   r   r   rG   rJ   r'   r'   r<   r(   r     s      r   )tensorr    c                 C   s,   t | d}t j|ddd}t |d}|S )z
    This method is equivalent to tensor.norm(p=2, dim=-1, keepdim=True) and used to make
    model `executorch` exportable. See issue https://github.com/pytorch/executorch/issues/3566
    r>   r?   T)rl   r@   g      ?)r0   rD   sum)r   Zsquare_tensorZ
sum_tensorZnormed_tensorr'   r'   r(   _get_vector_normj  s    r   c                
       s   e Zd ZU eed< g dZdZed fddZede	e
j e	e
j e	e
j e	e e	e e
jdd	d
Zede	e
j e	e e	e ee
jdddZeede	e
j e	e
j e	e
j e	e e	e edddZ  ZS )
Aimv2ModelrM   )r}   r   rV   TrW   c                    s   t  | |j| _|jj| _|jj| _t	|j| _
t	|j| _tj| j| jdd| _tj| j| jdd| _tt| jj| _t|j| _|   d S )NFrL   )r5   r6   Zprojection_dimZvision_configr:   Zvision_embed_dimZtext_configZtext_embed_dimr   _from_configvision_modelr   
text_modelr   rN   visual_projectiontext_projectionr7   r0   r   rM   Zlogit_scale_init_valuer   r   r   Zmax_logit_scalemax_log_logit_scaler   rS   r<   r'   r(   r6   {  s    

zAimv2Model.__init__N)r   r   rX   r   r   r    c           	      C   sP   |dur|n| j j}|dur |n| j j}| j|||||d}|j}| |}|S )a  
        Returns:
            text_features (`torch.FloatTensor` of shape `(batch_size, output_dim`): The text embeddings obtained by
            applying the projection layer to the pooled output of [`Aimv2TextModel`].

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, Aimv2Model

        >>> model = Aimv2Model.from_pretrained("openai/aimv2-vit-base-patch32")
        >>> tokenizer = AutoTokenizer.from_pretrained("openai/aimv2-vit-base-patch32")

        >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
        >>> text_features = model.get_text_features(**inputs)
        ```N)r   r   rX   r   r   )rM   r   r   r   r   r   )	r&   r   r   rX   r   r   text_outputsr   Ztext_featuresr'   r'   r(   get_text_features  s    
zAimv2Model.get_text_featuresF)rw   r   r   interpolate_pos_encodingr    c                 C   sN   |dur|n| j j}|dur |n| j j}| j||||d}|j}| |}|S )aI  
        Returns:
            image_features (`torch.FloatTensor` of shape `(batch_size, output_dim`): The image embeddings obtained by
            applying the projection layer to the pooled output of [`Aimv2VisionModel`].

        Examples:

        ```python
        >>> from PIL import Image
        >>> import requests
        >>> from transformers import AutoProcessor, Aimv2Model

        >>> model = Aimv2Model.from_pretrained("openai/aimv2-vit-base-patch32")
        >>> processor = AutoProcessor.from_pretrained("openai/aimv2-vit-base-patch32")

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> inputs = processor(images=image, return_tensors="pt")

        >>> image_features = model.get_image_features(**inputs)
        ```N)rw   r   r   r   )rM   r   r   r   r   r   )r&   rw   r   r   r   vision_outputsr   Zimage_featuresr'   r'   r(   get_image_features  s    
zAimv2Model.get_image_features)r   rw   r   r   r   r    c                 C   s   |dur|n| j j}|dur |n| j j}| j|||d}| j||||d}|j}| |}|j}	| |	}	|t| }|	t|	 }	| j	
d| j |	j}
|
|	 |  }| }t|||	|||dS )a  
        Examples:

        ```python
        >>> from PIL import Image
        >>> import requests
        >>> from transformers import AutoProcessor, Aimv2Model

        >>> model = Aimv2Model.from_pretrained("apple/aimv2-large-patch14-224-lit")
        >>> processor = AutoProcessor.from_pretrained("apple/aimv2-large-patch14-224-lit")

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> inputs = processor(
        ...     text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True
        ... )

        >>> outputs = model(**inputs)
        >>> logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
        >>> probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
        ```N)rw   r   r   )r   r   r   r   r   )r   r   r   r   r   r   )rM   r   r   r   r   r   r   r   r   r   clampr   exprB   ri   tr   )r&   r   rw   r   r   r   r   r   r   r   r   r   r   r'   r'   r(   rG     s>    !

zAimv2Model.forward)NNNNN)NNNF)NNNNN)r,   r-   r.   r   r2   r   r   r6   r   r   r0   r|   r   r1   r   r   r   r   r   rG   rJ   r'   r'   r<   r(   r   u  sZ   
     +    /     r   )r   r   r   r   )r   )5r   dataclassesr   typingr   r   r   r0   Ztorch.nn.functionalr   r   r   Zactivationsr   Zintegrationsr	   Zmasking_utilsr
   Zmodeling_layersr   Zmodeling_outputsr   r   Zmodeling_utilsr   r   utilsr   r   r   Zconfiguration_aimv2r   r   r   r   r   r3   rK   rV   r}   r|   floatr   r   r   r   r   r   r   r   r   r   __all__r'   r'   r'   r(   <module>   sd   #4/ =P"LI ?