a
    hÃ                     @   s  d Z ddlZddlmZmZ ddlZddlm  mZ	 ddl
ZddlmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZmZ ddlmZ eeZ G dd dej!Z"G dd dej!Z#G dd dej!Z$G dd dej!Z%G dd dej!Z&G dd dej!Z'G dd dej!Z(G dd dej!Z)G dd  d ej!Z*G d!d" d"ej!Z+G d#d$ d$ej!Z,eG d%d& d&eZ-eG d'd( d(e-Z.ed)d*G d+d, d,e-eZ/g d-Z0dS ).zPyTorch CPMAnt    N)OptionalUnion)nn)CrossEntropyLoss   )ACT2FN)CacheDynamicCache)GenerationMixin)BaseModelOutputWithPastCausalLMOutputWithPast)PreTrainedModel)auto_docstringlogging   )CpmAntConfigc                       s6   e Zd ZdZed fddZejdddZ  Z	S )CpmAntLayerNormz~
    We use Root Mean Square (RMS) Layer Normalization, please see https://huggingface.co/papers/1910.07467 for details."
    configc                    s2   t    |j| _|j| _tt|j| _	d S N)
super__init__epshidden_sizedim_normr   	Parametertorchemptyweightselfr   	__class__ f/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/cpmant/modeling_cpmant.pyr   +   s    
zCpmAntLayerNorm.__init__hidden_statesc                 C   s^   | d| jkrtd|j}|tjdjddd}|t	|| j
  || j }|S )f
        Args:
            hidden_states (`torch.Tensor` of shape `(batch, seq_len, dim_in)`)
        z'hidden_states.size(-1) != self.dim_norm   T)dimZkeepdim)sizer   AssertionErrordtypetor   Zfloat32powmeanZrsqrtr   r   )r    r&   Z	old_dtypeZvariancer#   r#   r$   forward2   s     zCpmAntLayerNorm.forward)
__name__
__module____qualname____doc__r   r   r   Tensorr1   __classcell__r#   r#   r!   r$   r   &   s   r   c                
       s\   e Zd Zd	ed fddZd
ejejejejee	 ee
 ee	 eej dddZ  ZS )CpmAntAttentionNr   c                    s   t    |j| _|j| _|j| _|| _tj	| j| j| j dd| _
tj	| j| j| j dd| _tj	| j| j| j dd| _tj	| j| j | jdd| _tjjdd| _|jd urtjj|jd| _nd | _d S )NFbiasr(   r*   )p)r   r   r   Z	dim_modelnum_attention_heads	num_headsdim_head	layer_idxr   Linear	project_q	project_k	project_vattention_outr   ZSoftmaxsoftmax	dropout_pDropoutdropoutr    r   r@   r!   r#   r$   r   @   s    

zCpmAntAttention.__init__F)hidden_q	hidden_kvattention_maskposition_biasoutput_attentionspast_key_values	use_cachecache_positionc	              	   C   s  | d}	| d}
| d}| |}| |}| |}||	|
| j| jdddd}||	|| j| jdddd}||	|| j| jdddd}|dur|||| j	d|i\}}| d}t
||ddt| j }|| }t
|||	d|
|t
d	kt
jtd
|j|jd}| |}t
|||	d|
|t
d	kt
jd|j|jd}|rr|}nd}| jdur| |}t
||}||	| j|
| jdddd}| |	|
| j| j }| |}||fS )a  
        Args:
            hidden_q (`torch.Tensor`):
                Input of transformer block(self-attention block). It can be the raw embedding of a batch of sequences.
            hidden_kv (`torch.Tensor` of shape `(batch, len_k, dim_model)`)):
                Tensor *key_value* and *query* of shape `(batch, len_k, dim_model)`
            attention_mask (`torch.Tensor` of shape `(batch, len_seq, len_seq)`):
                Avoid invalid areas to participate in the calculation of self-attention.
            position_bias (`torch.Tensor` of shape `(batch, len_seq, len_seq)`):
                Provide positional information to self-attention block.
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers.
            past_key_values (`tuple[torch.Tensor, torch.Tensor]`, *optional*):
                Cached past key and value projection states.
            use_cache (`bool`, *optional*):
                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
                (see `past_key_values`).
        r   r   r)   r   NrR   r(   Fz-inf)devicer-   )r+   rB   rC   rD   viewr>   r?   permuteupdater@   r   matmulZ	transposemathsqrtZmasked_filltensorZscalar_tensorfloatrT   r-   rF   rI   
contiguousrE   )r    rK   rL   rM   rN   rO   rP   rQ   rR   Z
batch_sizelen_qZlen_kquerykeyvalueZscoreattn_weightsr#   r#   r$   r1   T   sF    





   
 

 
zCpmAntAttention.forward)N)FNNN)r2   r3   r4   r   r   r   r6   Z
BoolTensorr   boolr   r1   r7   r#   r#   r!   r$   r8   ?   s       r8   c                	       s\   e Zd Zd	ed fddZd
ejejeej ee ee	 ee eej dddZ
  ZS )CpmAntSelfAttentionBlockNr   c                    sD   t    t|| _t||d| _|jr:tj	|j| _
nd | _
d S Nr@   )r   r   r   layernorm_before_attentionr8   self_attentionrG   r   r   rH   rI   rJ   r!   r#   r$   r      s    

z!CpmAntSelfAttentionBlock.__init__Fr&   rM   rN   rO   rP   rQ   rR   c           
   
   C   sJ   |  |}| ||||||||\}}	| jdur:| |}|| }||	fS )a  
        Args:
            hidden_states (`torch.Tensor` of shape `(batch, len_seq, dim_model)`):
                Input of transformer block(self-attention block). It can be the raw embedding of a batch of sequences.
            attention_mask (`torch.Tensor` of shape `(batch, len_seq, len_seq)`):
                Avoid invalid areas to participate in the calculation of self-attention.
            position_bias (`torch.Tensor` of shape `(batch, len_seq, len_seq)`):
                Provide positional information to self-attention block.
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers.
            past_key_values (`Tuple(torch.FloatTensor)`, *optional*):
                Cached past key and value projection states.
            use_cache (`bool`, *optional*):
                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
                (see `past_key_values`).
        N)rg   rh   rI   )
r    r&   rM   rN   rO   rP   rQ   rR   outputsrb   r#   r#   r$   r1      s    


z CpmAntSelfAttentionBlock.forward)N)NFNNNr2   r3   r4   r   r   r   r6   r   rc   r   r1   r7   r#   r#   r!   r$   rd      s        rd   c                       s2   e Zd Zed fddZejdddZ  ZS )CpmAntDenseGatedACTr   c                    sF   t    tj|j|jdd| _tj|j|jdd| _tj	 | _
d S NFr9   )r   r   r   rA   r   dim_ffw_0w_1r   ZGELUactr   r!   r#   r$   r      s    
zCpmAntDenseGatedACT.__init__r%   c                 C   s&   |  | |}| |}|| }|S )zTransform an input tensor from one feature space to another via a nonlinear operation

        Args:
            hidden_states (`torch.Tensor` of shape `(batch, seq_len, dim_in)`)
        )rq   ro   rp   )r    r&   Z
gate_scorer#   r#   r$   r1      s    
zCpmAntDenseGatedACT.forward	r2   r3   r4   r   r   r   r6   r1   r7   r#   r#   r!   r$   rl      s   rl   c                       s2   e Zd Zed fddZejdddZ  ZS )CpmAntFeedForwardr   c                    sP   t    t|| _|jd ur0tj|j| _nd | _tj	|j
|jdd| _d S rm   )r   r   rl   w_inrG   r   r   rH   rI   rA   rn   r   w_outr   r!   r#   r$   r      s    


zCpmAntFeedForward.__init__r%   c                 C   s,   |  |}| jdur| |}| |}|S )r'   N)rt   rI   ru   r    r&   r#   r#   r$   r1      s
    



zCpmAntFeedForward.forwardrr   r#   r#   r!   r$   rs      s   
rs   c                       s2   e Zd Zed fddZejdddZ  ZS )CpmAntFFNBlockr   c                    s@   t    t|| _t|| _|jr6tj	|j| _
nd | _
d S r   )r   r   r   layernorm_before_ffnrs   ffnrG   r   r   rH   rI   r   r!   r#   r$   r   
  s    


zCpmAntFFNBlock.__init__r%   c                 C   s4   |  |}| |}| jdur(| |}|| }|S )z
        Args:
            hidden_states (`torch.Tensor` of shape `(batch, len_seq, dim_model)`):
                Hidden states before feed forward layer.
        N)rx   ry   rI   )r    r&   Z
ln_outputsrj   r#   r#   r$   r1     s    	



zCpmAntFFNBlock.forwardrr   r#   r#   r!   r$   rw   	  s   rw   c                	       s\   e Zd Zd	ed fddZd
ejejeej ee ee	 ee eej dddZ
  ZS )CpmAntTransformerBlockNr   c                    s&   t    t||d| _t|| _d S re   )r   r   rd   self_attrw   ry   rJ   r!   r#   r$   r   %  s    
zCpmAntTransformerBlock.__init__Fri   c           	   	   C   s.   | j |||||||d\}}| |}||fS )a  
        Args:
            hidden_states (`torch.Tensor`):
                Input to the layer of shape `(batch, seq_len, dim_model)`
            attention_mask (`torch.Tensor`):
                Avoid invalid areas to participate in the calculation of shape `(batch, seq_len, seq_len)`
            position_bias (`torch.Tensor`):
                Provides position information to attention mechanism of shape `(num_heads, seq_len, seq_len)`
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers.
            past_key_values (`tuple[torch.Tensor, torch.Tensor])`, *optional*):
                Cached past key and value projection states
            use_cache (`bool`, *optional*):
                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
                (see `past_key_values`).
        )rM   rN   rO   rP   rQ   rR   )r{   ry   )	r    r&   rM   rN   rO   rP   rQ   rR   rb   r#   r#   r$   r1   *  s    


zCpmAntTransformerBlock.forward)N)NFNNNrk   r#   r#   r!   r$   rz   $  s   	     rz   c                
       s\   e Zd Zed fddZdejejejee ee ee	 ee eej dddZ
  ZS )	CpmAntEncoderr   c                    s@   t     j| _t fddt| jD | _t | _	d S )Nc                    s   g | ]}t  |d qS )rf   )rz   ).0ir   r#   r$   
<listcomp>V      z*CpmAntEncoder.__init__.<locals>.<listcomp>)
r   r   Znum_hidden_layersZ
num_layersr   Z
ModuleListrangelayersr   output_layernormr   r!   r   r$   r   S  s    
 zCpmAntEncoder.__init__N)r&   rM   rN   rO   output_hidden_statesrP   rQ   cache_postionc	              	   C   s   |rdnd}	|rdnd}
t | jD ]@\}}|r8|	|f7 }	|||||||d}|\}}|r"|
|f7 }
q"| |}|r||	|f7 }	||	|
fS )a%  
        Args:
            hidden_states (`torch.Tensor`):
                Input to the layer of shape `(batch, seq_len, dim_model)`
            attention_mask (`torch.Tensor`):
                Avoid invalid areas to participate in the calculation of shape `(batch, seq_len, seq_len)`
            position_bias (`torch.Tensor`):
                Provides position information to attention mechanism of shape `(num_heads, seq_len, seq_len)`
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers.
            output_hidden_states (`bool`, *optional*):
                Whether or not to return the hidden states of all layers.
            past_key_values (`tuple[torch.Tensor, torch.Tensor])`, *optional*):
                Cached past key and value projection states
            use_cache (`bool`, *optional*):
                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
                (see `past_key_values`).
        r#   N)rO   rP   rQ   )	enumerater   r   )r    r&   rM   rN   rO   r   rP   rQ   r   all_hidden_statesZall_self_attnsr~   layerZlayer_outputsrb   r#   r#   r$   r1   Z  s(    


zCpmAntEncoder.forward)NNNNNrk   r#   r#   r!   r$   r|   R  s         r|   c                       s0   e Zd Z fddZejejdddZ  ZS )CpmAntIntermediatec                    sB   t    t|j|j| _t|jt	r6t
|j | _n|j| _d S r   )r   r   r   rA   r   intermediate_sizedense
isinstanceZ
hidden_actstrr   intermediate_act_fnr   r!   r#   r$   r     s
    
zCpmAntIntermediate.__init__)r&   returnc                 C   s   |  |}| |}|S r   )r   r   rv   r#   r#   r$   r1     s    

zCpmAntIntermediate.forwardr2   r3   r4   r   r   r6   r1   r7   r#   r#   r!   r$   r     s   r   c                       sP   e Zd Zed fddZejejejejdddZdd ZdddZ	  Z
S )CpmAntSegmentPositionEmbeddingr   c                    sR   t    |j| _|j| _|j| _|j| _	t
t|j|j |j |j| _d S r   )r   r   r=   r>   Zposition_bias_num_bucketsnum_bucketsZposition_bias_max_distancemax_distancesegment_typesnum_segmentsr   r   r   r   relative_attention_biasr   r!   r#   r$   r     s    
z'CpmAntSegmentPositionEmbedding.__init__)key_pos	query_poskey_segmentquery_segmentc              	   C   s  t   |d}|d}|d}|d|dkr`td|d d|d d||dks|||dkrtd| d|d d||dkrtd| d|d d||d|}|||d}||d|}|||d}| ||}|| j }| jt j|t j	|j
d	d d d f t j|t j	|j
d	d d d f  | j| jd
}	t ||k|	d d d d d f |}W d    n1 s0    Y  t|| j}
|
dddd }
|
S )Nr   r   z>key_pos.size(0) should be equal to query_pos.size(0), but got z and !z7keylen should be equal to key_segment.size(1), but got z;querylen should be equal to query_segment.size(1), but got r(   r-   rT   )r   r   r   r)   )r   Zno_gradr+   r,   rU   !_segment_relative_position_bucketr   _position_bucketarangeint32rT   r   whereFZ	embeddingr   rV   r]   )r    r   r   r   r   batchZkeylenZquerylenZrelative_position_bucketZabsolute_position_bucketZembedsr#   r#   r$   r1     sJ    



$z&CpmAntSegmentPositionEmbedding.forwardc                 C   s   || j  | S r   )r   )r    r   r   r#   r#   r$   r     s    z@CpmAntSegmentPositionEmbedding._segment_relative_position_bucket       c                 C   s   d}|d }|dk tj| }t|}|d }||k }|t| | t||  ||   tj }t|t||d }|t	|| tj|7 }|S )Nr   r)   r   )
r.   r   r   abslogr\   rY   minZ	full_liker   )r    Zrelative_positionr   r   Zrelative_bucketsZ	max_exactZis_smallZrelative_postion_if_larger#   r#   r$   r     s*    
z/CpmAntSegmentPositionEmbedding._position_bucket)r   r   )r2   r3   r4   r   r   r   r6   r1   r   r   r7   r#   r#   r!   r$   r     s   4r   c                       s4   e Zd Z fddZejejejdddZ  ZS )CpmAntOutputc                    sB   t    t|j|j| _tj|j|jd| _t	|j
| _d S )N)r   )r   r   r   rA   r   r   r   	LayerNormZlayer_norm_epsrH   Zhidden_dropout_probrI   r   r!   r#   r$   r     s    
zCpmAntOutput.__init__)r&   input_tensorr   c                 C   s&   |  |}| |}| || }|S r   )r   rI   r   )r    r&   r   r#   r#   r$   r1     s    

zCpmAntOutput.forwardr   r#   r#   r!   r$   r     s   r   c                   @   s"   e Zd ZU eed< dZdd ZdS )CpmAntPreTrainedModelr   cpmantc                 C   s   t |tjr:|jjjd| jjd |jdur|jj	  nt |tj
rz|jjjd| jjd |jdur|jj|j 	  nbt |tjr|jj	  |jjd n:t |tr|jjd n t |tr|jjjd| jjd dS )zInitialize the weightsg        )r0   ZstdNg      ?)r   r   rA   r   dataZnormal_r   Zinit_stdr:   Zzero_	EmbeddingZpadding_idxr   Zfill_r   r   r   )r    moduler#   r#   r$   _init_weights  s    



z#CpmAntPreTrainedModel._init_weightsN)r2   r3   r4   r   __annotations__Zbase_model_prefixr   r#   r#   r#   r$   r     s   
r   c                       s   e Zd Zed fddZdd Zdd Zdd	 Zede	e
j e	e e	e e	eee
j   e	e e	e e	e
j eee
j ef dddZ  ZS )CpmAntModelr   c                    sl   t  | t|| _t|j|j| _t|j	|j
|j  |j| _t|| _|j| _|j	| _	|   d S r   )r   r   r|   encoderr   r   r   r   segment_embedding
vocab_sizeprompt_typesprompt_lengthinput_embeddingr   rN   	post_initr   r!   r#   r$   r   &  s    

zCpmAntModel.__init__c                 C   s   | j S r   r   r    r#   r#   r$   get_input_embeddings3  s    z CpmAntModel.get_input_embeddingsc                 K   s
   || _ d S r   r   )r    
embeddingskwargsr#   r#   r$   set_input_embeddings6  s    z CpmAntModel.set_input_embeddingsc                 C   s>  | d}| d}|j}tj||dtj||dddk}|d d d d d f |d d d d d f  |d||@ B }	|	|d d d d d f |d d d d d f k@ }	tjtt|| j	 d d d |dd d d f 
|d|d d d f k }
tjtj|| j	|d |
fdd}
|
||d|
|d|@ |	@ }	|	S )Nr   r   )rT   r(   r;   )r+   rT   r   r   rU   Zlogical_notr[   listr   r   repeatcatZonesrc   )r    	input_idsspancontextlengthr   ZseqlenrT   Zdirectional_mask_2drM   Zmask_1dr#   r#   r$   _prepare_attention_mask9  s    

$&08$ z#CpmAntModel._prepare_attention_maskN)r   rO   r   rP   rQ   return_dictrR   r   c              
   K   sr  |dur|n| j j}|dur |n| j j}|dur4|n| j j}|durH|n| j j}|jtjkrh|tj}|j|j	 }	}
t
|dkddj|	|
d}|dkdj|	|
d}tjtj| jd | j | jd | j |	|
d|dd|fdd}| \}}tjtj|| j|	|
d|fdd}tj||fd|	|
d}tj||	|
d|d}tj||fd|	|
d}|r~|du r~t| j d	}|rt|trtd
 t|}|dur| nd}| }| |}| |}|dkr|ddddddf }|| }| ||||}| ||||}|dd|dddf }|dddd|dddf }|dd|dddf }|  ||||||||\}}}|dkrB|dd| jdddf }|durd}|D ]0}||dddd| jd| jdf f7 }q|}|durBd}|D ](}||dd| jdddf f7 }q|}|sbtdd ||||fD S t!||||dS )ai  
        input_ids (`torch.Tensor` of shape `(batch_size, seq_len)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`CPMAntTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        Nr   r)   r   r(   r   r   r;   r   zPassing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.58.0. You should pass an instance of `DynamicCache` instead, e.g. `past_key_values=DynamicCache.from_legacy_cache(past_key_values)`.r#   c                 s   s   | ]}|d ur|V  qd S r   r#   )r}   vr#   r#   r$   	<genexpr>  s   z&CpmAntModel.forward.<locals>.<genexpr>)last_hidden_staterP   r&   
attentions)"r   rO   r   use_return_dictrQ   r-   r   r   r.   rT   r   sumr   r   r   r   r   r+   Zzerosfullr	   r   tupleloggerZwarning_onceZfrom_legacy_cacheZget_seq_lengthr]   r   r   r   rN   r   r   )r    r   rO   r   rP   rQ   r   rR   r   r-   rT   segmentr   r   Z
seq_lengthr   positionr   Zpast_lengthr&   Zsegment_statesrM   rN   r   Zall_attentionsZnew_attentionsZ	attentionZnew_hidden_statesZhidden_stater#   r#   r$   r1   K  s    
	"



 


.
&
zCpmAntModel.forward)NNNNNNN)r2   r3   r4   r   r   r   r   r   r   r   r   r6   rc   r   r   r   r1   r7   r#   r#   r!   r$   r   $  s,          r   zy
    The CPMAnt Model with a language modeling head on top (linear layer with weights tied to the input embeddings).
    )Zcustom_introc                       s   e Zd ZdgZed fddZedeej	 ee
eej	ej	f   ee ee ee eej	 ee eej	 eej	 eeef d
ddZd	d
 Zdd Zdd Z  ZS )CpmAntForCausalLMzlm_head.weightr   c                    sD   t  | t|| _tj|j|j|j|j	  dd| _
|   d S rm   )r   r   r   r   r   rA   r   r   r   r   lm_headr   r   r!   r#   r$   r     s    
zCpmAntForCausalLM.__init__N)
r   rP   rQ   rO   r   labelsr   rM   rR   r   c
              	   K   s   |dur|n| j j}| |||||||	}|r4|jn|d }| |}d}|durvt }||d|d|d}|s|f|dd  }|dur|f| S |S t|||j	|j
|jdS )u<  
        input_ids (`torch.Tensor` of shape `(batch_size, seq_len)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`CPMAntTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        labels (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss.

        Example:

        Text Generation with CpmAntForCausalLM.
        ```python
        >>> from transformers import CPMAntTokenizer, CpmAntForCausalLM

        >>> texts = "今天天气不错，"
        >>> model = CpmAntForCausalLM.from_pretrained("openbmb/cpm-ant-10b")
        >>> tokenizer = CPMAntTokenizer.from_pretrained("openbmb/cpm-ant-10b")
        >>> input_ids = tokenizer(texts, return_tensors="pt")
        >>> outputs = model.generate(**input_ids)
        >>> output_texts = tokenizer.batch_decode(outputs)
        >>> print(output_texts)
        ['今天天气不错，阳光明媚，我和妈妈一起去超市买东西。\n在超市里，我看到了一个很好玩的玩具，它的名字叫“机器人”。它有一个圆圆的脑袋，两只圆圆的眼睛，还有一个圆圆的']
        ```
        Nr   r(   r   )losslogitsrP   r&   r   )r   r   r   r   r   r   rU   r+   r   rP   r&   r   )r    r   rP   rQ   rO   r   r   r   rM   rR   r   Zmodel_outputr&   r   r   Z	loss_funcoutputr#   r#   r$   r1     s4    )	
zCpmAntForCausalLM.forwardc                 C   s   | j jS r   r   r   r   r#   r#   r$   r     s    z&CpmAntForCausalLM.get_input_embeddingsc                 C   s   || j _d S r   r   )r    r   r#   r#   r$   r     s    z&CpmAntForCausalLM.set_input_embeddingsc                 C   s<   dd |D }|D ]$}|d | |d< |d | |d< q|S )Nc                 S   s    g | ]}|d urt |n|qS r   )r   )r}   Zeachr#   r#   r$   r   !  r   z4CpmAntForCausalLM._reorder_cache.<locals>.<listcomp>r   r   r#   )r    rP   Zbeam_idxZkey_value_layerr#   r#   r$   _reorder_cache   s
    z CpmAntForCausalLM._reorder_cache)	NNNNNNNNN)r2   r3   r4   Z_tied_weights_keysr   r   r   r   r   r6   r   r   rc   r   r   r1   r   r   r   r7   r#   r#   r!   r$   r     s6   
         
Hr   )r   r   r   )1r5   rY   typingr   r   r   Ztorch.nn.functionalr   Z
functionalr   Ztorch.utils.checkpointZtorch.nnr   Zactivationsr   Zcache_utilsr   r	   Z
generationr
   Zmodeling_outputsr   r   Zmodeling_utilsr   utilsr   r   Zconfiguration_cpmantr   Z
get_loggerr2   r   Moduler   r8   rd   rl   rs   rw   rz   r|   r   r   r   r   r   r   __all__r#   r#   r#   r$   <module>   sH   
e7.@] d