a
    h                     @   s  d Z ddlZddlmZ ddlmZmZ ddlZddl	Z	ddl	m
Z
 ddlmZmZmZ ddlmZ dd	lmZmZmZmZmZmZ dd
lmZ ddlmZmZmZ ddlmZ e e!Z"dZ#dd Z$G dd de
j%Z&G dd de
j%Z'e	j(e)e)e	j(dddZ*G dd de
j%Z+G dd de
j%Z,G dd de
j%Z-G dd de
j%Z.dEe	j(e)e)e/e/e	j(d"d#d$Z0G d%d& d&e
j%Z1G d'd( d(e
j%Z2eG d)d* d*eZ3G d+d, d,e
j%Z4eed-d.G d/d0 d0eZ5ed1d.G d2d3 d3e3Z6eG d4d5 d5e3Z7ed6d.G d7d8 d8e3Z8eG d9d: d:e3Z9ed;d.G d<d= d=e3Z:eG d>d? d?e3Z;eG d@dA dAe3Z<eG dBdC dCe3Z=g dDZ>dS )Fz!PyTorch Funnel Transformer model.    N)	dataclass)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)ModelOutputauto_docstringlogging   )FunnelConfigg    .Ac                 C   s  zddl }ddl}ddl}W n ty:   td  Y n0 tj|}t	d|  |j
|}g }g }	|D ]@\}
}t	d|
 d|  |j
||
}||
 |	| qpddd	d
ddddddddddd}t||	D ]\}
}|
d}
tdd |
D rt	dd|
  q|
d dkr.q| }d}|
dd D ]}t|ts|d|rt|d| d }||jk rd}||j| kr||j| 8 }|d7 }q|j| | }n||j8 }|j| }n|dkrt|tr|j} qdnb||v rt||| }nHzt||}W n8 ty^   t dd|
 |j! d}Y  qdY n0 qB|st"|j!t"|j!kr|#|j!}|dkr|$|}t%&||_'q| S ) z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape k_headq_headv_head	post_projlinear_1linear_2	attentionffnweightbiasword_embeddings
embeddings)kqvoZlayer_1Zlayer_2Zrel_attnffkernelgammabetaZlookup_tableZword_embeddinginput/c                 s   s   | ]}|d v V  qdS ))Zadam_vZadam_mZAdamWeightDecayOptimizerZAdamWeightDecayOptimizer_1Zglobal_stepN ).0nr-   r-   f/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/funnel/modeling_funnel.py	<genexpr>\   s   z,load_tf_weights_in_funnel.<locals>.<genexpr>z	Skipping 	generatorFr   z	layer_\d+zlayer_(\d+)rTr(   )(renumpyZ
tensorflowImportErrorloggererrorospathabspathinfotrainZlist_variablesZload_variableappendzipsplitanyjoin
isinstanceFunnelPositionwiseFFN	fullmatchintsearchgroupsZnum_hidden_layersblock_sizesblockslayersFunnelRelMultiheadAttentionr_kernelgetattrAttributeErrorprintshapelenreshapeZ	transposetorchZ
from_numpydata)modelconfigZtf_checkpoint_pathr4   nptfZtf_pathZ	init_varsnamesZarraysnamerQ   arrayZ
_layer_mapZpointerZskippedZm_namelayer_indexZ	block_idxr-   r-   r0   load_tf_weights_in_funnel.   s    






r^   c                       sF   e Zd Zedd fddZdeej eej ejdddZ  Z	S )	FunnelEmbeddingsNrW   returnc                    sH   t    tj|j|j|jd| _tj|j	|j
d| _t|j| _d S )N)padding_idxeps)super__init__r   	Embedding
vocab_sizehidden_sizeZpad_token_idr!   	LayerNormd_modellayer_norm_eps
layer_normDropouthidden_dropoutdropoutselfrW   	__class__r-   r0   rf      s    
zFunnelEmbeddings.__init__)	input_idsinputs_embedsra   c                 C   s*   |d u r|  |}| |}| |}|S N)r!   rm   rp   )rr   ru   rv   r"   r-   r-   r0   forward   s
    


zFunnelEmbeddings.forward)NN)
__name__
__module____qualname__r   rf   r   rT   Tensorrx   __classcell__r-   r-   rs   r0   r_      s    r_   c                       s  e Zd ZU dZdZeed< edd fddZd$e	j
ee	j
 ee	j
 ee	j
 dd	d
Ze	j
e	j
dddZee	je	jeee	j
 eee	j
  f dddZe	j
edddZd%e	j
eee	j
dddZee	j
ee	j
 ee	j
 f eeee ee f e	j
dddZd&ee	j
ee	j
 ee	j
 f eee	j
dddZee	j
 ee	j
ee	j
 f dd d!Zee	j
 ee	j
 dd"d#Z  ZS )'FunnelAttentionStructurez>
    Contains helpers for `FunnelRelMultiheadAttention `.
       cls_token_type_idNr`   c                    s6   t    || _t|j| _t|j| _d | _d S rw   )	re   rf   rW   r   rn   ro   sin_dropoutcos_dropoutpooling_multrq   rs   r-   r0   rf      s
    
z!FunnelAttentionStructure.__init__)rv   attention_masktoken_type_idsra   c                 C   sv   d| _ |d | _}| ||j|j}|dur:| |nd}| jjrft	j
||d |d gdnd}||||fS )zCReturns the attention inputs associated to the inputs of the model.r   N)r   r   r   r   )r   sizeseq_lenget_position_embedsdtypedevicetoken_type_ids_to_matrW   separate_clsr   
functionalpadZnew_ones)rr   rv   r   r   r   position_embedstoken_type_matcls_maskr-   r-   r0   init_attention_inputs   s    	"z.FunnelAttentionStructure.init_attention_inputs)r   ra   c                 C   s^   |dddddf |dddf k}|| j k}|dddddf |dddf B }||B S )z-Convert `token_type_ids` to `token_type_mat`.N)r   )rr   r   r   Zcls_idsZcls_matr-   r-   r0   r      s    &
&z.FunnelAttentionStructure.token_type_ids_to_mat)r   r   r   ra   c                 C   s  | j j}| j jdkrtjd|dtj|d|}tjd|d dtj|d|}dd||d    }|dddf |d  }t|}	| |	}
t	|}| 
|}tj|
|
gd	d
}tj||	gd	d
}tj||gd	d
}tj|	 |gd	d
}||||fS tjd|d dtj|d|}dd||d    }tj| d |d dtj|d|}|d }|dddf |d  }| t|}	| 
t	|}tj|	|gd	d
}tjd|tj|d|}|}g }td| j jD ]}|dkrd}n^| ||}d|d  }| j|||dd}|dddf | }||d|}t|d|}|}d| }| ||}|dddf | }||d|}t|d|}|||g q|S dS )a  
        Create and cache inputs related to relative position encoding. Those are very different depending on whether we
        are using the factorized or the relative shift attention:

        For the factorized attention, it returns the matrices (phi, pi, psi, omega) used in the paper, appendix A.2.2,
        final formula.

        For the relative shift attention, it returns all possible vectors R used in the paper, appendix A.2.1, final
        formula.

        Paper link: https://huggingface.co/papers/2006.03236
        
factorizedr         ?r   r   r   r   i'  Ndim)shift)rW   rk   attention_typerT   arangeZint64tosinr   cosr   catrangeZ
num_blocksstride_pool_posrelative_posexpandr   Zgatherr>   )rr   r   r   r   rk   Zpos_seqZfreq_seqZinv_freqZsinusoidZ	sin_embedZsin_embed_dZ	cos_embedZcos_embed_dphipsipiomegaZ
rel_pos_idZzero_offsetZ	pos_embedpos
pooled_posZposition_embeds_listblock_indexZposition_embeds_poolingstrideZrel_posZposition_embeds_no_poolingr-   r-   r0   r      sV     



 &
z,FunnelAttentionStructure.get_position_embeds)pos_idr   c                 C   sj   | j jrX|d|  d g}| j jr2|dd n
|dd }t||ddd gdS |ddd S dS )ze
        Pool `pos_id` while keeping the cls token separate (if `config.separate_cls=True`).
        r   r   r   Nr   )rW   r   Z
new_tensortruncate_seqrT   r   )rr   r   r   Zcls_posZpooled_pos_idr-   r-   r0   r     s
     z(FunnelAttentionStructure.stride_pool_posr   )r   r   r   ra   c           	      C   sb   |du r|}|d |d  }|t | }|||  }|d |d  }tj||d | tj|jdS )zV
        Build the relative positional vector between `pos` and `pooled_pos`.
        Nr   r   r   r   )rR   rT   r   longr   )	rr   r   r   r   r   Z	ref_pointZ
num_removeZmax_distZmin_distr-   r-   r0   r   $  s    z%FunnelAttentionStructure.relative_pos)tensoraxisra   c                    s   |du rdS t  ttfr4 D ]}||}q|S t |ttfr^t| fdd|D S  |j;  jjrjjrt	dddn
t	ddd}t	dg  |g }jjrt	dg  t	ddg }t
j|| |g d}|| S )zT
        Perform pooling by stride slicing the tensor along the given axis.
        Nc                 3   s   | ]} | V  qd S rw   )stride_poolr.   xr   rr   r-   r0   r1   E      z7FunnelAttentionStructure.stride_pool.<locals>.<genexpr>r   r   r   )r   )rC   listtupler   typendimrW   r   r   slicerT   r   )rr   r   r   axZ
axis_sliceZ	enc_sliceZ	cls_slicer-   r   r0   r   2  s     
&z$FunnelAttentionStructure.stride_poolmean)r   moder   ra   c                    s  du rdS t ttfr:t fddD S jjrjjr^ddddf n}tjddddf |gddj	}|dkrddddddf n$|dkrЈdddddddf df d	krt
jjd
dnL dkrt
jjd
dn, dkr:t
jj d
d ntd|dkrdddddddf S |dkr~dddf S S )z3Apply 1D pooling to a tensor of size [B x T (x H)].Nc                 3   s   | ]}j  d V  qdS ))r   r   N)pool_tensorr   r   rr   r   r   r-   r0   r1   \  r   z7FunnelAttentionStructure.pool_tensor.<locals>.<genexpr>r   r   r   r   r	   r   T)r   Z	ceil_modemaxminz0The supported modes are 'mean', 'max' and 'min'.r   )rC   r   r   r   rW   r   r   rT   r   r   r   r   Z
avg_pool2dZ
max_pool2dNotImplementedError)rr   r   r   r   suffixr   r-   r   r0   r   S  s2      "



z$FunnelAttentionStructure.pool_tensor)attention_inputsra   c                 C   s   |\}}}}| j jrl| j jdkr@| |dd d|dd  }| |d}| |d}| j|| j jd}nf|  jd9  _| j jdkr| |d}| |ddg}| |ddg}| j|dd}| j|| j jd}||||f}||fS )zTPool `output` and the proper parts of `attention_inputs` before the attention layer.r   Nr   r   r   r   r   )rW   pool_q_onlyr   r   r   Zpooling_typer   )rr   outputr   r   r   r   r   r-   r-   r0   pre_attention_poolingy  s      z.FunnelAttentionStructure.pre_attention_poolingc                 C   s   |\}}}}| j jrt|  jd9  _| j jdkrN|dd | |dd d }| |d}| |d}| j|dd}||||f}|S )zFPool the proper parts of `attention_inputs` after the attention layer.r   r   Nr   r   r   r   )rW   r   r   r   r   r   )rr   r   r   r   r   r   r-   r-   r0   post_attention_pooling  s     z/FunnelAttentionStructure.post_attention_pooling)NN)Nr   )r   r   )ry   rz   r{   __doc__r   rF   __annotations__r   rf   rT   r|   r   r   r   r   r   r   r   r   r   r   r   r   strr   r   r   r}   r-   r-   rs   r0   r~      s<   
  	
P" 'r~   )positional_attncontext_lenr   ra   c                 C   sn   | j \}}}}t| ||||g} | d d d d |d d d f } t| ||||| g} | dd |f } | S )N.)rQ   rT   rS   )r   r   r   
batch_sizen_headr   Zmax_rel_lenr-   r-   r0   _relative_shift_gather  s     r   c                	       sj   e Zd Zeedd fddZdddZdddZdej	ej	ej	e
ej	 ee
ej	d
f dddZ  ZS )rL   NrW   r   ra   c                    s*  t    || _|| _|j|j|j  }}}t|j	| _	t|j
| _
tj||| dd| _t||| | _t||| | _tt||g| _tt||g| _tt|||g| _tt||g| _ttd||g| _t|| || _tj||jd| _d|d  | _d S )NF)r    r   rc   r   g      ?)re   rf   rW   r   rk   r   d_headr   rn   ro   attention_dropoutLinearr   r   r   	ParameterrT   zerosr_w_biasr_r_biasrM   r_s_bias	seg_embedr   rj   rl   rm   scale)rr   rW   r   rk   r   r   rs   r-   r0   rf     s"    
z$FunnelRelMultiheadAttention.__init__c                 C   s   | j jdkr|\}}}}| j| j }	| j}
td||	 |
}||dddf  }||dddf  }td||td|| }nf|jd |krdnd}|| j |d  }| j| j }| j}
td||
}td|| |}t	|||}|dur||9 }|S )	z5Relative attention score for the positional encodingsr   zbinh,dnh->bindNzbind,jd->bnijr   r   ztd,dnh->tnhzbinh,tnh->bnit)
rW   r   r   r   rM   rT   einsumrQ   r   r   )rr   r   r   r   r   r   r   r   r   uZw_rZq_r_attentionZq_r_attention_1Zq_r_attention_2r   r   r3   r%   Zr_headr-   r-   r0   relative_positional_attention  s(    z9FunnelRelMultiheadAttention.relative_positional_attentionc                 C   s   |du rdS |j \}}}| j| j }td|| | j}|dddf ||j d ||g}tj|ddd\}	}
t||
|j |	|j }|dur||9 }|S )z/Relative attention score for the token_type_idsNr   zbind,snd->bnisr   r   r   r   )	rQ   r   r   rT   r   r   r   r@   where)rr   r   r   r   r   r   r   r   Ztoken_type_biasZdiff_token_typeZsame_token_typetoken_type_attnr-   r-   r0   relative_token_type_attention  s    $z9FunnelRelMultiheadAttention.relative_token_type_attentionF.)querykeyvaluer   output_attentionsra   c                 C   sn  |\}}}}	|j \}
}}|j d }| jj| jj }}| ||
|||}| ||
|||}| ||
|||}|| j }| j	| j }t
d|| |}| ||||	}| |||	}|| | }|j}| }|d ur|td|d d d d f     }t
j|d|d}| |}t
d||}| ||
||| }| |}| || }|rh||fS |fS )Nr   zbind,bjnd->bnijr   )r   r   zbnij,bjnd->bind)rQ   rW   r   r   r   viewr   r   r   r   rT   r   r   r   r   floatINFZsoftmaxr   r   rS   ro   rm   )rr   r   r   r   r   r   r   r   r   r   r   r   _r   r   r   r   r   r   r   Zcontent_scorer   r   Z
attn_scorer   Z	attn_probZattn_vecZattn_outr   r-   r-   r0   rx     s0    



"

z#FunnelRelMultiheadAttention.forward)N)N)F)ry   rz   r{   r   rF   rf   r   r   rT   r|   r   boolrx   r}   r-   r-   rs   r0   rL     s   
*
 rL   c                       s8   e Zd Zedd fddZejejdddZ  ZS )rD   Nr`   c                    sl   t    t|j|j| _t|j | _	t
|j| _t|j|j| _t
|j| _t|j|j| _d S rw   )re   rf   r   r   rk   Zd_innerr   r
   
hidden_actactivation_functionrn   activation_dropoutr   ro   rp   rj   rl   rm   rq   rs   r-   r0   rf   >  s    
zFunnelPositionwiseFFN.__init__hiddenra   c                 C   s@   |  |}| |}| |}| |}| |}| || S rw   )r   r   r   r   rp   rm   )rr   r   hr-   r-   r0   rx   G  s    




zFunnelPositionwiseFFN.forward)	ry   rz   r{   r   rf   rT   r|   rx   r}   r-   r-   rs   r0   rD   =  s   	rD   c                       sD   e Zd Zeedd fddZd	ejejejee	dddZ
  ZS )
FunnelLayerNr   c                    s$   t    t||| _t|| _d S rw   )re   rf   rL   r   rD   r   )rr   rW   r   rs   r-   r0   rf   Q  s    
zFunnelLayer.__init__F)r   r   r   r   ra   c                 C   s8   | j |||||d}| |d }|r2||d fS |fS )Nr   r   r   )r   r   )rr   r   r   r   r   r   Zattnr   r-   r-   r0   rx   V  s    zFunnelLayer.forward)F)ry   rz   r{   r   rF   rf   rT   r|   r   r   rx   r}   r-   r-   rs   r0   r   P  s    r   c                
       sV   e Zd Zedd fddZd
ejeej eej eeee	e
ef ddd	Z  ZS )FunnelEncoderNr`   c                    s>   t     | _t | _t fddt jD | _	d S )Nc                    s.   g | ]&\ }t  fd dt|D qS )c                    s   g | ]}t  qS r-   r   r.   r   )r   rW   r-   r0   
<listcomp>j  r   z5FunnelEncoder.__init__.<locals>.<listcomp>.<listcomp>)r   
ModuleListr   )r.   
block_sizerW   )r   r0   r   i  s   z*FunnelEncoder.__init__.<locals>.<listcomp>)
re   rf   rW   r~   attention_structurer   r   	enumeraterI   rJ   rq   rs   r   r0   rf   d  s    


zFunnelEncoder.__init__FT)rv   r   r   r   output_hidden_statesreturn_dictra   c              
   C   sl  | |}| jj|||d}|}|r*|fnd }	|r6dnd }
t| jD ]\}}|d| jjr`dndk}|op|dk}|r| j||\}}t|D ]\}}t	| jj
| D ]}|dko|dko|}|r|}| jjr|n| }}n| } }}||||||d}|d }|r| j|}|r,|
|dd   }
|r|	|f }	qqqD|s^tdd ||	|
fD S t||	|
d	S )
Nr   r   r-   r   r   r   r   c                 s   s   | ]}|d ur|V  qd S rw   r-   r.   r%   r-   r-   r0   r1     r   z(FunnelEncoder.forward.<locals>.<genexpr>last_hidden_statehidden_states
attentions)Ztype_asr   r   r   rJ   r   rW   r   r   r   Zblock_repeatsr   r   r   r   )rr   rv   r   r   r   r   r   r   r   all_hidden_statesall_attentionsr   blockZpooling_flagZpooled_hiddenr]   layerZrepeat_indexZ
do_poolingr   r   r   layer_outputr-   r-   r0   rx   o  sD    

zFunnelEncoder.forward)NNFFTry   rz   r{   r   rf   rT   r|   r   r   r   r   r   rx   r}   r-   r-   rs   r0   r   c  s        
r   TF)r   r   
target_lenr   r   ra   c              	   C   s   |dkr| S |r8| ddddf }| ddddf } t j| |dd}|r|rntj|ddd|d ddf}|ddd|d f }t j||gdd}n|ddd|f }|S )z{
    Upsample tensor `x` to match `target_len` by repeating the tokens `stride` time on the sequence length dimension.
    r   N)Zrepeatsr   r   r   )rT   Zrepeat_interleaver   r   r   r   )r   r   r  r   r   clsr   r-   r-   r0   upsample  s    r  c                       sZ   e Zd Zedd fddZd
ejejeej eej eeee	e
ef ddd	Z  ZS )FunnelDecoderNr`   c                    s>   t     | _t | _t fddt jD | _	d S )Nc                    s   g | ]}t  d qS )r   r   r   r   r-   r0   r     r   z*FunnelDecoder.__init__.<locals>.<listcomp>)
re   rf   rW   r~   r   r   r   r   Znum_decoder_layersrK   rq   rs   r   r0   rf     s    

zFunnelDecoder.__init__FT)final_hiddenfirst_block_hiddenr   r   r   r   r   ra   c                 C   s   t |dt| jjd  |jd | jj| jjd}|| }	|rB|	fnd }
|rNdnd }| jj|	||d}| j	D ]@}||	|	|	||d}|d }	|r||dd   }|rj|
|	f }
qj|st
dd	 |	|
|fD S t|	|
|d
S )Nr   r   )r   r  r   r   r-   r   r   r   c                 s   s   | ]}|d ur|V  qd S rw   r-   r   r-   r-   r0   r1     r   z(FunnelDecoder.forward.<locals>.<genexpr>r  )r  rR   rW   rI   rQ   r   r   r   r   rK   r   r   )rr   r  r  r   r   r   r   r   Zupsampled_hiddenr   r  r  r   r  r	  r-   r-   r0   rx     s2    

zFunnelDecoder.forward)NNFFTr
  r-   r-   rs   r0   r    s    
     
r  c                       s<   e Zd ZdZedd fddZejejdddZ  Z	S )	FunnelDiscriminatorPredictionszEPrediction module for the discriminator, made up of two dense layers.Nr`   c                    s6   t    || _t|j|j| _t|jd| _d S Nr   )re   rf   rW   r   r   rk   densedense_predictionrq   rs   r-   r0   rf     s    
z'FunnelDiscriminatorPredictions.__init__)discriminator_hidden_statesra   c                 C   s.   |  |}t| jj |}| |d}|S )Nr   )r  r
   rW   r   r  squeeze)rr   r  r  logitsr-   r-   r0   rx     s    
z&FunnelDiscriminatorPredictions.forward)
ry   rz   r{   r   r   rf   rT   r|   rx   r}   r-   r-   rs   r0   r    s   r  c                   @   s&   e Zd ZU eed< eZdZdd ZdS )FunnelPreTrainedModelrW   funnelc                 C   sl  |j j}|ddkrt|dd d urp| jjd u rV|jj\}}t	dt
||  }n| jj}tjj|j|d t|dd d urtj|jd n|dkrtjj|j| jjd	 tjj|j| jjd	 tjj|j| jjd	 tjj|j| jjd	 tjj|j| jjd	 n\|d
krh| jjd u r(dn| jj}tjj|jj|d |jjd urh|jjj|jj   d S )Nr   r   r   r   )stdr    g        rL   )br_   )rt   ry   findrN   rW   Zinitializer_stdr   rQ   rX   sqrtr   r   initZnormal_Z	constant_r    Zuniform_r   Zinitializer_ranger   rM   r   r   r!   rb   rU   Zzero_)rr   module	classnameZfan_outZfan_inr  r-   r-   r0   _init_weights  s*    

z#FunnelPreTrainedModel._init_weightsN)	ry   rz   r{   r   r   r^   Zload_tf_weightsZbase_model_prefixr!  r-   r-   r-   r0   r    s   
r  c                       s:   e Zd Zeedd fddZejejdddZ  Z	S )FunnelClassificationHeadN)rW   n_labelsra   c                    s>   t    t|j|j| _t|j| _t|j|| _	d S rw   )
re   rf   r   r   rk   linear_hiddenrn   ro   rp   
linear_out)rr   rW   r#  rs   r-   r0   rf     s    
z!FunnelClassificationHead.__init__r   c                 C   s(   |  |}t|}| |}| |S rw   )r$  rT   tanhrp   r%  )rr   r   r-   r-   r0   rx     s    


z FunnelClassificationHead.forward)
ry   rz   r{   r   rF   rf   rT   r|   rx   r}   r-   r-   rs   r0   r"    s   r"  z2
    Output type of [`FunnelForPreTraining`].
    )Zcustom_introc                   @   sb   e Zd ZU dZdZeej ed< dZ	eej ed< dZ
eeej  ed< dZeeej  ed< dS )FunnelForPreTrainingOutputa1  
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss of the ELECTRA-style objective.
    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
        Prediction scores of the head (scores for each token before SoftMax).
    Nlossr  r  r  )ry   rz   r{   r   r(  r   rT   ZFloatTensorr   r  r  r   r  r-   r-   r-   r0   r'  &  s
   
r'  z
    The base Funnel Transformer Model transformer outputting raw hidden-states without upsampling head (also called
    decoder) or any task-specific head on top.
    c                       s   e Zd Zedd fddZejdddZejddd	d
Ze	de
ej e
ej e
ej e
ej e
ej e
ej e
e e
e e
e eeef d
ddZ  ZS )FunnelBaseModelNr`   c                    s,   t  | t|| _t|| _|   d S rw   )re   rf   r_   r"   r   encoder	post_initrq   rs   r-   r0   rf   A  s    

zFunnelBaseModel.__init__ra   c                 C   s   | j jS rw   r"   r!   rr   r-   r-   r0   get_input_embeddingsJ  s    z$FunnelBaseModel.get_input_embeddingsnew_embeddingsra   c                 C   s   || j _d S rw   r-  rr   r1  r-   r-   r0   set_input_embeddingsM  s    z$FunnelBaseModel.set_input_embeddings)
ru   r   r   position_ids	head_maskrv   r   r   r   ra   c
                 C   s  |d ur|n| j j}|d ur |n| j j}|	d ur4|	n| j j}	|d urV|d urVtdn@|d urt| || | }
n"|d ur| d d }
ntd|d ur|jn|j}|d u rtj	|
|d}|d u rtj
|
tj|d}| j||d}| j||||||	d}|S )NDYou cannot specify both input_ids and inputs_embeds at the same timer   5You have to specify either input_ids or inputs_embedsr   r   rv   r   r   r   r   r   )rW   r   r   use_return_dict
ValueError%warn_if_padding_and_no_attention_maskr   r   rT   onesr   r   r"   r*  )rr   ru   r   r   r4  r5  rv   r   r   r   input_shaper   encoder_outputsr-   r-   r0   rx   P  s6    

	zFunnelBaseModel.forward)	NNNNNNNNNry   rz   r{   r   rf   r   rg   r/  r3  r   r   rT   r|   r   r   r   r   rx   r}   r-   r-   rs   r0   r)  :  s2   	         
r)  c                       s   e Zd Zedd fddZejdddZejddd	d
Ze	de
ej e
ej e
ej e
ej e
e e
e e
e eeef dddZ  ZS )FunnelModelNr`   c                    s<   t  | || _t|| _t|| _t|| _| 	  d S rw   )
re   rf   rW   r_   r"   r   r*  r  decoderr+  rq   rs   r-   r0   rf     s    


zFunnelModel.__init__r,  c                 C   s   | j jS rw   r-  r.  r-   r-   r0   r/    s    z FunnelModel.get_input_embeddingsr0  c                 C   s   || j _d S rw   r-  r2  r-   r-   r0   r3    s    z FunnelModel.set_input_embeddings)ru   r   r   rv   r   r   r   ra   c              	   C   s  |d ur|n| j j}|d ur |n| j j}|d ur4|n| j j}|d urV|d urVtdn@|d urt| || | }n"|d ur| d d }ntd|d ur|jn|j}	|d u rtj	||	d}|d u rtj
|tj|	d}| j||d}| j||||d|d}
| j|
d	 |
d
 | j jd	  |||||d}|sd	}|d	 f}|rb|d
7 }||
d
 ||  f }|r|d
7 }||
d ||  f }|S t|d	 |r|
j|j nd |r|
j|j nd dS )Nr6  r   r7  r8  r   r9  Tr:  r   r   )r  r  r   r   r   r   r   r   r  )rW   r   r   r;  r<  r=  r   r   rT   r>  r   r   r"   r*  rC  rI   r   r  r  )rr   ru   r   r   rv   r   r   r   r?  r   r@  Zdecoder_outputsidxoutputsr-   r-   r0   rx     sh    

	

zFunnelModel.forward)NNNNNNNrA  r-   r-   rs   r0   rB    s*   
       
rB  z
    Funnel Transformer model with a binary classification head on top as used during pretraining for identifying
    generated tokens.
    c                       sz   e Zd Zedd fddZedeej eej eej eej eej ee	 ee	 ee	 e
eef d	ddZ  ZS )	FunnelForPreTrainingNr`   c                    s,   t  | t|| _t|| _|   d S rw   )re   rf   rB  r  r  discriminator_predictionsr+  rq   rs   r-   r0   rf     s    

zFunnelForPreTraining.__init__	ru   r   r   rv   labelsr   r   r   ra   c	              	   C   s   |dur|n| j j}| j|||||||d}	|	d }
| |
}d}|durt }|dur|d|
jd dk}|d|
jd | }|| }||| }n||d|
jd | }|s|f|	dd  }|dur|f| S |S t	|||	j
|	jdS )a"  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the ELECTRA-style loss. Input should be a sequence of tokens (see `input_ids`
            docstring) Indices should be in `[0, 1]`:

            - 0 indicates the token is an original token,
            - 1 indicates the token was replaced.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, FunnelForPreTraining
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("funnel-transformer/small")
        >>> model = FunnelForPreTraining.from_pretrained("funnel-transformer/small")

        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> logits = model(**inputs).logits
        ```Nr   r   rv   r   r   r   r   r   r   r(  r  r  r  )rW   r;  r  rG  r   r   r   rQ   r   r'  r  r  )rr   ru   r   r   rv   rI  r   r   r   r  Zdiscriminator_sequence_outputr  r(  loss_fctZactive_lossZactive_logitsZactive_labelsr   r-   r-   r0   rx     s<     	
zFunnelForPreTraining.forward)NNNNNNNN)ry   rz   r{   r   rf   r   r   rT   r|   r   r   r   r'  rx   r}   r-   r-   rs   r0   rF    s*           
rF  c                       s   e Zd ZdgZedd fddZejdddZej	dd	d
dZ
edeej eej eej eej eej ee ee ee eeef d	ddZ  ZS )FunnelForMaskedLMzlm_head.weightNr`   c                    s4   t  | t|| _t|j|j| _| 	  d S rw   )
re   rf   rB  r  r   r   rk   rh   lm_headr+  rq   rs   r-   r0   rf   :  s    
zFunnelForMaskedLM.__init__r,  c                 C   s   | j S rw   rN  r.  r-   r-   r0   get_output_embeddingsC  s    z'FunnelForMaskedLM.get_output_embeddingsr0  c                 C   s
   || _ d S rw   rO  r2  r-   r-   r0   set_output_embeddingsF  s    z'FunnelForMaskedLM.set_output_embeddingsrH  c	              	   C   s   |dur|n| j j}| j|||||||d}	|	d }
| |
}d}|durlt }||d| j j|d}|s|f|	dd  }|dur|f| S |S t|||	j|	j	dS )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        NrJ  r   r   r   rK  )
rW   r;  r  rN  r   r   rh   r   r  r  )rr   ru   r   r   rv   rI  r   r   r   rE  r  Zprediction_logitsZmasked_lm_lossrL  r   r-   r-   r0   rx   I  s2    

zFunnelForMaskedLM.forward)NNNNNNNN)ry   rz   r{   Z_tied_weights_keysr   rf   r   r   rP  rg   rQ  r   r   rT   r|   r   r   r   r   rx   r}   r-   r-   rs   r0   rM  6  s0   	        
rM  z
    Funnel Transformer Model with a sequence classification/regression head on top (two linear layer on top of the
    first timestep of the last hidden state) e.g. for GLUE tasks.
    c                       sz   e Zd Zedd fddZedeej eej eej eej eej ee	 ee	 ee	 e
eef d	ddZ  ZS )	FunnelForSequenceClassificationNr`   c                    s>   t  | |j| _|| _t|| _t||j| _|   d S rw   )	re   rf   
num_labelsrW   r)  r  r"  
classifierr+  rq   rs   r-   r0   rf     s    
z(FunnelForSequenceClassification.__init__rH  c	              	   C   s~  |dur|n| j j}| j|||||||d}	|	d }
|
dddf }| |}d}|dur:| j jdu r| jdkr|d| j _n4| jdkr|jtjks|jtj	krd| j _nd| j _| j jdkrt
 }| jdkr|| | }n
|||}nN| j jdkrt }||d| j|d}n| j jdkr:t }|||}|sj|f|	dd  }|durf|f| S |S t|||	j|	jd	S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        NrJ  r   r   Z
regressionZsingle_label_classificationZmulti_label_classificationr   rK  )rW   r;  r  rT  Zproblem_typerS  r   rT   r   rF   r   r  r   r   r   r   r  r  )rr   ru   r   r   rv   rI  r   r   r   rE  r  pooled_outputr  r(  rL  r   r-   r-   r0   rx     sR    




"


z'FunnelForSequenceClassification.forward)NNNNNNNN)ry   rz   r{   r   rf   r   r   rT   r|   r   r   r   r   rx   r}   r-   r-   rs   r0   rR  {  s*   
        
rR  c                       sz   e Zd Zedd fddZedeej eej eej eej eej ee	 ee	 ee	 e
eef d	ddZ  ZS )	FunnelForMultipleChoiceNr`   c                    s.   t  | t|| _t|d| _|   d S r  )re   rf   r)  r  r"  rT  r+  rq   rs   r-   r0   rf     s    
z FunnelForMultipleChoice.__init__rH  c	              	   C   sX  |dur|n| j j}|dur&|jd n|jd }	|durJ|d|dnd}|durh|d|dnd}|dur|d|dnd}|dur|d|d|dnd}| j|||||||d}
|
d }|dddf }| |}|d|	}d}|durt }|||}|sD|f|
dd  }|dur@|f| S |S t|||
j	|
j
dS )aJ  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r   rJ  r   rK  )rW   r;  rQ   r   r   r  rT  r   r   r  r  )rr   ru   r   r   rv   rI  r   r   r   Znum_choicesrE  r  rU  r  Zreshaped_logitsr(  rL  r   r-   r-   r0   rx     sF    



zFunnelForMultipleChoice.forward)NNNNNNNN)ry   rz   r{   r   rf   r   r   rT   r|   r   r   r   r   rx   r}   r-   r-   rs   r0   rV    s*           
rV  c                       sz   e Zd Zedd fddZedeej eej eej eej eej ee	 ee	 ee	 e
eef d	ddZ  ZS )	FunnelForTokenClassificationNr`   c                    sJ   t  | |j| _t|| _t|j| _t	|j
|j| _|   d S rw   )re   rf   rS  rB  r  r   rn   ro   rp   r   ri   rT  r+  rq   rs   r-   r0   rf     s    
z%FunnelForTokenClassification.__init__rH  c	              	   C   s   |dur|n| j j}| j|||||||d}	|	d }
| |
}
| |
}d}|durtt }||d| j|d}|s|f|	dd  }|dur|f| S |S t|||	j	|	j
dS )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        NrJ  r   r   r   rK  )rW   r;  r  rp   rT  r   r   rS  r   r  r  )rr   ru   r   r   rv   rI  r   r   r   rE  r  r  r(  rL  r   r-   r-   r0   rx   &  s4    


z$FunnelForTokenClassification.forward)NNNNNNNN)ry   rz   r{   r   rf   r   r   rT   r|   r   r   r   r   rx   r}   r-   r-   rs   r0   rX    s*           
rX  c                       s   e Zd Zedd fddZedeej eej eej eej eej eej ee	 ee	 ee	 e
eef d
ddZ  ZS )	FunnelForQuestionAnsweringNr`   c                    s<   t  | |j| _t|| _t|j|j| _| 	  d S rw   )
re   rf   rS  rB  r  r   r   ri   
qa_outputsr+  rq   rs   r-   r0   rf   Y  s
    
z#FunnelForQuestionAnswering.__init__)
ru   r   r   rv   start_positionsend_positionsr   r   r   ra   c
              	   C   sL  |	d ur|	n| j j}	| j|||||||	d}
|
d }| |}|jddd\}}|d }|d }d }|d ur|d urt| dkr|	d}t| dkr|d}|d}|
d|}|
d|}t|d}|||}|||}|| d }|	s6||f|
dd   }|d ur2|f| S |S t||||
j|
jdS )	NrJ  r   r   r   r   )Zignore_indexr   )r(  start_logits
end_logitsr  r  )rW   r;  r  rZ  r@   r  
contiguousrR   r   Zsquezeclampr   r   r  r  )rr   ru   r   r   rv   r[  r\  r   r   r   rE  r  r  r]  r^  Z
total_lossZignored_indexrL  Z
start_lossZend_lossr   r-   r-   r0   rx   c  sL    







z"FunnelForQuestionAnswering.forward)	NNNNNNNNN)ry   rz   r{   r   rf   r   r   rT   r|   r   r   r   r   rx   r}   r-   r-   rs   r0   rY  W  s.   
         
rY  )
r)  rM  rV  rF  rY  rR  rX  rB  r  r^   )TF)?r   r9   dataclassesr   typingr   r   r5   rX   rT   r   Ztorch.nnr   r   r   Zactivationsr
   Zmodeling_outputsr   r   r   r   r   r   Zmodeling_utilsr   utilsr   r   r   Zconfiguration_funnelr   Z
get_loggerry   r7   r   r^   Moduler_   r~   r|   rF   r   rL   rD   r   r   r   r  r  r  r  r"  r'  r)  rB  rF  rM  rR  rV  rX  rY  __all__r-   r-   r-   r0   <module>   s~    
Z   @ 1C]ODPG=I