a
    ½ÀhŒ  ã                   @   s‚   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ e e¡ZG d	d
„ d
eƒZG dd„ de
ƒZd
dgZdS )zLeViT model configurationé    ©ÚOrderedDict)ÚMapping)Úversioné   )ÚPretrainedConfig)Ú
OnnxConfig)Úloggingc                       sZ   e Zd ZdZdZddddddg d¢g d	¢g d
¢g d¢dg d¢g d¢df‡ fdd„	Z‡  ZS )ÚLevitConfigaÇ  
    This is the configuration class to store the configuration of a [`LevitModel`]. It is used to instantiate a LeViT
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the LeViT
    [facebook/levit-128S](https://huggingface.co/facebook/levit-128S) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        image_size (`int`, *optional*, defaults to 224):
            The size of the input image.
        num_channels (`int`, *optional*, defaults to 3):
            Number of channels in the input image.
        kernel_size (`int`, *optional*, defaults to 3):
            The kernel size for the initial convolution layers of patch embedding.
        stride (`int`, *optional*, defaults to 2):
            The stride size for the initial convolution layers of patch embedding.
        padding (`int`, *optional*, defaults to 1):
            The padding size for the initial convolution layers of patch embedding.
        patch_size (`int`, *optional*, defaults to 16):
            The patch size for embeddings.
        hidden_sizes (`list[int]`, *optional*, defaults to `[128, 256, 384]`):
            Dimension of each of the encoder blocks.
        num_attention_heads (`list[int]`, *optional*, defaults to `[4, 8, 12]`):
            Number of attention heads for each attention layer in each block of the Transformer encoder.
        depths (`list[int]`, *optional*, defaults to `[4, 4, 4]`):
            The number of layers in each encoder block.
        key_dim (`list[int]`, *optional*, defaults to `[16, 16, 16]`):
            The size of key in each of the encoder blocks.
        drop_path_rate (`int`, *optional*, defaults to 0):
            The dropout probability for stochastic depths, used in the blocks of the Transformer encoder.
        mlp_ratios (`list[int]`, *optional*, defaults to `[2, 2, 2]`):
            Ratio of the size of the hidden layer compared to the size of the input layer of the Mix FFNs in the
            encoder blocks.
        attention_ratios (`list[int]`, *optional*, defaults to `[2, 2, 2]`):
            Ratio of the size of the output dimension compared to input dimension of attention layers.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.

    Example:

    ```python
    >>> from transformers import LevitConfig, LevitModel

    >>> # Initializing a LeViT levit-128S style configuration
    >>> configuration = LevitConfig()

    >>> # Initializing a model (with random weights) from the levit-128S style configuration
    >>> model = LevitModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```Zlevitéà   r   é   é   é   )é€   é   i€  )é   é   é   )r   r   r   )r   r   r   r   )r   r   r   g{®Gáz”?c                    s¬   t ƒ jf i |¤Ž || _|| _|| _|| _|| _|| _|| _|	| _	|
| _
|| _|| _|| _|| _|| _d|
d |d |
d  dddgd|
d |d |
d  dddgg| _d S )NZ	Subsampler   r   r   r   )ÚsuperÚ__init__Ú
image_sizeÚnum_channelsÚkernel_sizeÚstrideÚpaddingÚhidden_sizesÚnum_attention_headsÚdepthsÚkey_dimÚdrop_path_rateÚ
patch_sizeÚattention_ratioÚ	mlp_ratioÚinitializer_rangeZdown_ops)Úselfr   r   r   r   r   r    r   r   r   r   r   r"   r!   r#   Úkwargs©Ú	__class__© úi/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/levit/configuration_levit.pyr   X   s$    þzLevitConfig.__init__)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Z
model_typer   Ú__classcell__r(   r(   r&   r)   r
      s"   7ñr
   c                   @   sJ   e Zd Ze d¡Zeeeee	ef f dœdd„ƒZ
eedœdd„ƒZdS )ÚLevitOnnxConfigz1.11)Úreturnc                 C   s   t ddddddœfgƒS )NZpixel_valuesÚbatchr   ÚheightÚwidth)r   r   r   r   r   ©r$   r(   r(   r)   Úinputsƒ   s    ÿÿzLevitOnnxConfig.inputsc                 C   s   dS )Ng-Cëâ6?r(   r4   r(   r(   r)   Úatol_for_validation‹   s    z#LevitOnnxConfig.atol_for_validationN)r*   r+   r,   r   ÚparseZtorch_onnx_minimum_versionÚpropertyr   ÚstrÚintr5   Úfloatr6   r(   r(   r(   r)   r/   €   s
   
 r/   N)r-   Úcollectionsr   Úcollections.abcr   Ú	packagingr   Zconfiguration_utilsr   Zonnxr   Úutilsr	   Z
get_loggerr*   Úloggerr
   r/   Ú__all__r(   r(   r(   r)   Ú<module>   s   
b