a
    h
                     @   s  d Z ddlZddlmZmZ ddlZddlmZ ddlmZm	Z	m
Z
 ddlmZmZ ddlmZmZmZmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZ e e!Z"eG dd deZ#G dd dej$Z%G dd dej$Z&G dd dej$Z'G dd dej$Z(G dd dej$Z)G dd dej$Z*G dd dej$Z+G dd dej$Z,eG d d! d!e#Z-G d"d# d#e#Z.G d$d% d%ej$Z/ed&d'G d(d) d)e#Z0eG d*d+ d+e#Z1eG d,d- d-e#Z2G d.d/ d/ej$Z3eG d0d1 d1e#Z4d2d3 Z5g d4Z6dS )5zPyTorch MPNet model.    N)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FNgelu)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )MPNetConfigc                   @   s"   e Zd ZU eed< dZdd ZdS )MPNetPreTrainedModelconfigmpnetc                 C   s   t |tjr:|jjjd| jjd |jdur|jj	  n~t |tj
rz|jjjd| jjd |jdur|jj|j 	  n>t |tjr|jj	  |jjd nt |tr|jj	  dS )zInitialize the weightsg        )meanZstdNg      ?)
isinstancer   LinearweightdataZnormal_r   Zinitializer_rangebiasZzero_	Embeddingpadding_idx	LayerNormZfill_MPNetLMHead)selfmodule r(   d/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/mpnet/modeling_mpnet.py_init_weights1   s    


z"MPNetPreTrainedModel._init_weightsN)__name__
__module____qualname__r   __annotations__Zbase_model_prefixr*   r(   r(   r(   r)   r   ,   s   
r   c                       s.   e Zd Z fddZdddZdd Z  ZS )	MPNetEmbeddingsc                    s   t    d| _tj|j|j| jd| _tj|j|j| jd| _	tj
|j|jd| _
t|j| _| jdt|jddd d S )Nr   )r#   epsposition_ids)r   F)
persistent)super__init__r#   r   r"   
vocab_sizehidden_sizeword_embeddingsZmax_position_embeddingsposition_embeddingsr$   layer_norm_epsDropouthidden_dropout_probdropoutZregister_buffertorcharangeexpandr&   r   	__class__r(   r)   r6   E   s    
zMPNetEmbeddings.__init__Nc           	      K   s   |d u r(|d urt || j}n
| |}|d ur:| }n| d d }|d }|d u rp| jd d d |f }|d u r| |}| |}|| }| |}| |}|S )Nr3   r   )	"create_position_ids_from_input_idsr#   &create_position_ids_from_inputs_embedssizer2   r9   r:   r$   r>   )	r&   	input_idsr2   inputs_embedskwargsinput_shape
seq_lengthr:   
embeddingsr(   r(   r)   forwardS   s"    





zMPNetEmbeddings.forwardc                 C   sN   |  dd }|d }tj| jd || j d tj|jd}|d|S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr3   r   )dtypedevicer   )rG   r?   r@   r#   longrP   	unsqueezerA   )r&   rI   rK   Zsequence_lengthr2   r(   r(   r)   rF   m   s    	z6MPNetEmbeddings.create_position_ids_from_inputs_embeds)NNN)r+   r,   r-   r6   rN   rF   __classcell__r(   r(   rC   r)   r/   D   s   
r/   c                       s&   e Zd Z fddZdddZ  ZS )MPNetSelfAttentionc                    s   t    |j|j dkr>t|ds>td|j d|j d|j| _t|j|j | _| j| j | _t	
|j| j| _t	
|j| j| _t	
|j| j| _t	
|j|j| _t	|j| _d S )Nr   Zembedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ())r5   r6   r8   num_attention_headshasattr
ValueErrorintattention_head_sizeall_head_sizer   r   qkvor<   Zattention_probs_dropout_probr>   rB   rC   r(   r)   r6      s    

zMPNetSelfAttention.__init__NFc                 K   sD  |j \}}}	| ||d| j| jdd}
| ||d| j| jdd}| ||d| j| jdd}t	|
|dd}|t
| j }|d ur||7 }|d ur|| }tjj|dd}| |}|d ur|| }t	||}|dddd }| d d | jf }|j| }| |}|r:||fn|f}|S )Nr3   r      dimr   r   )shaper\   viewrV   rZ   Z	transposer]   r^   r?   matmulmathsqrtr   Z
functionalZsoftmaxr>   permute
contiguousrG   r[   r_   )r&   hidden_statesattention_mask	head_maskposition_biasoutput_attentionsrJ   Z
batch_sizerL   _r\   r]   r^   Zattention_scoresZattention_probscZnew_c_shaper_   outputsr(   r(   r)   rN      sH    	





zMPNetSelfAttention.forward)NNNFr+   r,   r-   r6   rN   rS   r(   r(   rC   r)   rT      s       rT   c                       s.   e Zd Z fddZdd Zd	ddZ  ZS )
MPNetAttentionc                    sB   t    t|| _tj|j|jd| _t|j	| _
t | _d S Nr0   )r5   r6   rT   attnr   r$   r8   r;   r<   r=   r>   setpruned_headsrB   rC   r(   r)   r6      s
    

zMPNetAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| jj
|dd| j_
| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   rb   )lenr   rv   rV   rZ   rx   r   r\   r]   r^   r_   r[   union)r&   headsindexr(   r(   r)   prune_heads   s    zMPNetAttention.prune_headsNFc           
      K   sB   | j |||||d}| | |d | }|f|dd   }	|	S )N)ro   r   r   )rv   r$   r>   )
r&   rk   rl   rm   rn   ro   rJ   Zself_outputsattention_outputrr   r(   r(   r)   rN      s    	zMPNetAttention.forward)NNNF)r+   r,   r-   r6   r}   rN   rS   r(   r(   rC   r)   rt      s       rt   c                       s0   e Zd Z fddZejejdddZ  ZS )MPNetIntermediatec                    sB   t    t|j|j| _t|jt	r6t
|j | _n|j| _d S N)r5   r6   r   r   r8   intermediate_sizedenser   Z
hidden_actstrr	   intermediate_act_fnrB   rC   r(   r)   r6      s
    
zMPNetIntermediate.__init__rk   returnc                 C   s   |  |}| |}|S r   )r   r   )r&   rk   r(   r(   r)   rN     s    

zMPNetIntermediate.forwardr+   r,   r-   r6   r?   TensorrN   rS   r(   r(   rC   r)   r      s   r   c                       s4   e Zd Z fddZejejejdddZ  ZS )MPNetOutputc                    sB   t    t|j|j| _tj|j|jd| _t	|j
| _d S ru   )r5   r6   r   r   r   r8   r   r$   r;   r<   r=   r>   rB   rC   r(   r)   r6     s    
zMPNetOutput.__init__)rk   input_tensorr   c                 C   s&   |  |}| |}| || }|S r   )r   r>   r$   )r&   rk   r   r(   r(   r)   rN     s    

zMPNetOutput.forwardr   r(   r(   rC   r)   r     s   r   c                       s&   e Zd Z fddZdddZ  ZS )
MPNetLayerc                    s,   t    t|| _t|| _t|| _d S r   )r5   r6   rt   	attentionr   intermediater   outputrB   rC   r(   r)   r6     s    


zMPNetLayer.__init__NFc                 K   sL   | j |||||d}|d }|dd  }	| |}
| |
|}|f|	 }	|	S )N)rn   ro   r   r   )r   r   r   )r&   rk   rl   rm   rn   ro   rJ   Zself_attention_outputsr~   rr   Zintermediate_outputZlayer_outputr(   r(   r)   rN      s    	

zMPNetLayer.forward)NNNFrs   r(   r(   rC   r)   r     s   	    r   c                       s\   e Zd Z fddZdejeej eej eeedddZdd	d
Z	e
dddZ  ZS )MPNetEncoderc                    sN   t     | _ j| _t fddt jD | _	t
 j| j| _d S )Nc                    s   g | ]}t  qS r(   )r   ).0rp   r   r(   r)   
<listcomp>>      z)MPNetEncoder.__init__.<locals>.<listcomp>)r5   r6   r   rV   Zn_headsr   Z
ModuleListrangenum_hidden_layerslayerr"   Zrelative_attention_num_bucketsrelative_attention_biasrB   rC   r   r)   r6   :  s
    
 zMPNetEncoder.__init__NF)rk   rl   rm   ro   output_hidden_statesreturn_dictc                 K   s   |  |}|rdnd }	|rdnd }
t| jD ]N\}}|rB|	|f }	||||| |fd|i|}|d }|r,|
|d f }
q,|r|	|f }	|stdd ||	|
fD S t||	|
dS )Nr(   ro   r   r   c                 s   s   | ]}|d ur|V  qd S r   r(   )r   r^   r(   r(   r)   	<genexpr>d  r   z'MPNetEncoder.forward.<locals>.<genexpr>)last_hidden_staterk   
attentions)compute_position_bias	enumerater   tupler   )r&   rk   rl   rm   ro   r   r   rJ   rn   Zall_hidden_statesZall_attentionsiZlayer_moduleZlayer_outputsr(   r(   r)   rN   A  s8    



zMPNetEncoder.forward    c                 C   s   | d| d| d  }}}|d urX|d d d d d f }|d d d d d f }n8tj|tjdd d d f }tj|tjdd d d f }|| }	| j|	|d}
|
|j}
| |
}|g d	d}|
|d||f }|S )Nr   r   )rO   )num_buckets)r`   r   r   r3   )rG   r?   r@   rQ   relative_position_buckettorP   r   ri   rR   rA   rj   )r&   xr2   r   ZbszqlenZklenZcontext_positionZmemory_positionrelative_positionZ	rp_bucketvaluesr(   r(   r)   r   k  s    "
z"MPNetEncoder.compute_position_bias   c                 C   s   d}|  }|d }||dk  tj| 7 }t|}|d }||k }|t| | t||  ||   tj }t|t||d }|t	|||7 }|S )Nr   r`   r   )
r   r?   rQ   abslogfloatrg   minZ	full_likewhere)r   r   Zmax_distanceretnZ	max_exactZis_smallZval_if_larger(   r(   r)   r   }  s    
&z%MPNetEncoder.relative_position_bucket)NNFFF)Nr   )r   r   )r+   r,   r-   r6   r?   r   r   boolrN   r   staticmethodr   rS   r(   r(   rC   r)   r   9  s"   
     *
r   c                       s0   e Zd Z fddZejejdddZ  ZS )MPNetPoolerc                    s*   t    t|j|j| _t | _d S r   )r5   r6   r   r   r8   r   ZTanh
activationrB   rC   r(   r)   r6     s    
zMPNetPooler.__init__r   c                 C   s(   |d d df }|  |}| |}|S Nr   )r   r   )r&   rk   Zfirst_token_tensorpooled_outputr(   r(   r)   rN     s    

zMPNetPooler.forwardr   r(   r(   rC   r)   r     s   r   c                       s   e Zd Zd fdd	Zdd Zdd Zdd	 Zedee	j
 ee	j ee	j
 ee	j ee	j ee ee ee eee	j ef d	ddZ  ZS )
MPNetModelTc                    sD   t  | || _t|| _t|| _|r2t|nd| _| 	  dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
r5   r6   r   r/   rM   r   encoderr   pooler	post_init)r&   r   add_pooling_layerrC   r(   r)   r6     s    

zMPNetModel.__init__c                 C   s   | j jS r   rM   r9   r&   r(   r(   r)   get_input_embeddings  s    zMPNetModel.get_input_embeddingsc                 C   s   || j _d S r   r   )r&   valuer(   r(   r)   set_input_embeddings  s    zMPNetModel.set_input_embeddingsc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr   r   r   r}   )r&   Zheads_to_pruner   r{   r(   r(   r)   _prune_heads  s    zMPNetModel._prune_headsN)	rH   rl   r2   rm   rI   ro   r   r   r   c	                 K   sR  |d ur|n| j j}|d ur |n| j j}|d ur4|n| j j}|d urV|d urVtdn@|d urt| || | }
n"|d ur| d d }
ntd|d ur|jn|j}|d u rtj	|
|d}| 
||
}| || j j}| j|||d}| j||||||d}|d }| jd ur | |nd }|s>||f|dd   S t|||j|jd	S )
NzDYou cannot specify both input_ids and inputs_embeds at the same timer3   z5You have to specify either input_ids or inputs_embeds)rP   )rH   r2   rI   )rl   rm   ro   r   r   r   r   )r   Zpooler_outputrk   r   )r   ro   r   use_return_dictrX   Z%warn_if_padding_and_no_attention_maskrG   rP   r?   ZonesZget_extended_attention_maskZget_head_maskr   rM   r   r   r   rk   r   )r&   rH   rl   r2   rm   rI   ro   r   r   rJ   rK   rP   Zextended_attention_maskZembedding_outputZencoder_outputssequence_outputr   r(   r(   r)   rN     sH    

zMPNetModel.forward)T)NNNNNNNN)r+   r,   r-   r6   r   r   r   r   r   r?   
LongTensorFloatTensorr   r   r   r   r   rN   rS   r(   r(   rC   r)   r     s0           r   c                       s   e Zd ZdgZ fddZdd Zdd Zedee	j
 ee	j ee	j
 ee	j ee	j ee	j
 ee ee ee eee	j ef d	
d
dZ  ZS )MPNetForMaskedLMzlm_head.decoderc                    s0   t  | t|dd| _t|| _|   d S NF)r   )r5   r6   r   r   r%   lm_headr   rB   rC   r(   r)   r6     s    
zMPNetForMaskedLM.__init__c                 C   s   | j jS r   )r   decoderr   r(   r(   r)   get_output_embeddings  s    z&MPNetForMaskedLM.get_output_embeddingsc                 C   s   || j _|j| j _d S r   )r   r   r!   )r&   Znew_embeddingsr(   r(   r)   set_output_embeddings  s    z&MPNetForMaskedLM.set_output_embeddingsN
rH   rl   r2   rm   rI   labelsro   r   r   r   c
              
   C   s   |	dur|	n| j j}	| j||||||||	d}
|
d }| |}d}|durnt }||d| j j|d}|	s|f|
dd  }|dur|f| S |S t|||
j|
j	dS )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        Nrl   r2   rm   rI   ro   r   r   r   r3   r`   losslogitsrk   r   )
r   r   r   r   r   re   r7   r   rk   r   )r&   rH   rl   r2   rm   rI   r   ro   r   r   rr   r   Zprediction_scoresZmasked_lm_lossloss_fctr   r(   r(   r)   rN     s4    
zMPNetForMaskedLM.forward)	NNNNNNNNN)r+   r,   r-   Z_tied_weights_keysr6   r   r   r   r   r?   r   r   r   r   r   r   r   rN   rS   r(   r(   rC   r)   r     s4   	         r   c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )r%   z5MPNet Head for masked and permuted language modeling.c                    sh   t    t|j|j| _tj|j|jd| _tj|j|j	dd| _
tt|j	| _| j| j
_d S )Nr0   F)r!   )r5   r6   r   r   r8   r   r$   r;   
layer_normr7   r   	Parameterr?   Zzerosr!   rB   rC   r(   r)   r6   F  s    
zMPNetLMHead.__init__c                 C   s   | j | j_ d S r   )r!   r   r   r(   r(   r)   _tie_weightsQ  s    zMPNetLMHead._tie_weightsc                 K   s*   |  |}t|}| |}| |}|S r   )r   r
   r   r   r&   featuresrJ   r   r(   r(   r)   rN   T  s
    


zMPNetLMHead.forward)r+   r,   r-   __doc__r6   r   rN   rS   r(   r(   rC   r)   r%   C  s   r%   z
    MPNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    )Zcustom_introc                       s   e Zd Z fddZedeej eej eej eej eej eej ee	 ee	 ee	 e
eej ef d
ddZ  ZS )MPNetForSequenceClassificationc                    s8   t  | |j| _t|dd| _t|| _|   d S r   )r5   r6   
num_labelsr   r   MPNetClassificationHead
classifierr   rB   rC   r(   r)   r6   f  s
    
z'MPNetForSequenceClassification.__init__Nr   c
              
   C   sp  |	dur|	n| j j}	| j||||||||	d}
|
d }| |}d}|dur,| j jdu r| jdkrnd| j _n4| jdkr|jtjks|jtj	krd| j _nd| j _| j jdkrt
 }| jdkr|| | }n
|||}nN| j jdkrt }||d| j|d}n| j jdkr,t }|||}|	s\|f|
d	d  }|durX|f| S |S t|||
j|
jd
S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   Z
regressionZsingle_label_classificationZmulti_label_classificationr3   r`   r   )r   r   r   r   Zproblem_typer   rO   r?   rQ   rY   r   squeezer   re   r   r   rk   r   r&   rH   rl   r2   rm   rI   r   ro   r   r   rr   r   r   r   r   r   r(   r(   r)   rN   p  sR    




"


z&MPNetForSequenceClassification.forward)	NNNNNNNNN)r+   r,   r-   r6   r   r   r?   r   r   r   r   r   r   r   rN   rS   r(   r(   rC   r)   r   _  s.   
         r   c                       s   e Zd Z fddZedeej eej eej eej eej eej ee	 ee	 ee	 e
eej ef d
ddZ  ZS )MPNetForMultipleChoicec                    s@   t  | t|| _t|j| _t|j	d| _
|   d S )Nr   )r5   r6   r   r   r   r<   r=   r>   r   r8   r   r   rB   rC   r(   r)   r6     s
    
zMPNetForMultipleChoice.__init__Nr   c
              
   C   sT  |	dur|	n| j j}	|dur&|jd n|jd }
|durJ|d|dnd}|durh|d|dnd}|dur|d|dnd}|dur|d|d|dnd}| j||||||||	d}|d }| |}| |}|d|
}d}|durt }|||}|	s@|f|dd  }|dur<|f| S |S t	|||j
|jdS )a  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r3   ra   )r2   rl   rm   rI   ro   r   r   r`   r   )r   r   rd   re   rG   r   r>   r   r   r   rk   r   )r&   rH   rl   r2   rm   rI   r   ro   r   r   Znum_choicesZflat_input_idsZflat_position_idsZflat_attention_maskZflat_inputs_embedsrr   r   r   Zreshaped_logitsr   r   r   r(   r(   r)   rN     sH    $




zMPNetForMultipleChoice.forward)	NNNNNNNNN)r+   r,   r-   r6   r   r   r?   r   r   r   r   r   r   r   rN   rS   r(   r(   rC   r)   r     s.   
         r   c                       s   e Zd Z fddZedeej eej eej eej eej eej ee	 ee	 ee	 e
eej ef d
ddZ  ZS )MPNetForTokenClassificationc                    sN   t  | |j| _t|dd| _t|j| _t	|j
|j| _|   d S r   )r5   r6   r   r   r   r   r<   r=   r>   r   r8   r   r   rB   rC   r(   r)   r6     s    z$MPNetForTokenClassification.__init__Nr   c
              
   C   s   |	dur|	n| j j}	| j||||||||	d}
|
d }| |}| |}d}|durvt }||d| j|d}|	s|f|
dd  }|dur|f| S |S t|||
j	|
j
dS )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr   r   r3   r`   r   )r   r   r   r>   r   r   re   r   r   rk   r   r   r(   r(   r)   rN     s6    

z#MPNetForTokenClassification.forward)	NNNNNNNNN)r+   r,   r-   r6   r   r   r?   r   r   r   r   r   r   r   rN   rS   r(   r(   rC   r)   r     s.            r   c                       s(   e Zd ZdZ fddZdd Z  ZS )r   z-Head for sentence-level classification tasks.c                    s@   t    t|j|j| _t|j| _t|j|j	| _
d S r   )r5   r6   r   r   r8   r   r<   r=   r>   r   out_projrB   rC   r(   r)   r6   W  s    
z MPNetClassificationHead.__init__c                 K   sL   |d d dd d f }|  |}| |}t|}|  |}| |}|S r   )r>   r   r?   tanhr   r   r(   r(   r)   rN   ]  s    




zMPNetClassificationHead.forward)r+   r,   r-   r   r6   rN   rS   r(   r(   rC   r)   r   T  s   r   c                       s   e Zd Z fddZedeej eej eej eej eej eej eej ee	 ee	 ee	 e
eej ef dddZ  ZS )MPNetForQuestionAnsweringc                    s@   t  | |j| _t|dd| _t|j|j| _| 	  d S r   )
r5   r6   r   r   r   r   r   r8   
qa_outputsr   rB   rC   r(   r)   r6   i  s
    z"MPNetForQuestionAnswering.__init__N)rH   rl   r2   rm   rI   start_positionsend_positionsro   r   r   r   c              
   C   sN  |
d ur|
n| j j}
| j|||||||	|
d}|d }| |}|jddd\}}|d }|d }d }|d ur|d urt| dkr|d}t| dkr|d}|d}|	d|}|	d|}t
|d}|||}|||}|| d }|
s8||f|dd   }|d ur4|f| S |S t||||j|jdS )	Nr   r   r   r3   rb   )Zignore_indexr`   )r   start_logits
end_logitsrk   r   )r   r   r   r   splitr   rj   ry   rG   clampr   r   rk   r   )r&   rH   rl   r2   rm   rI   r   r   ro   r   r   rr   r   r   r   r   Z
total_lossZignored_indexr   Z
start_lossZend_lossr   r(   r(   r)   rN   s  sN    






z!MPNetForQuestionAnswering.forward)
NNNNNNNNNN)r+   r,   r-   r6   r   r   r?   r   r   r   r   r   r   r   rN   rS   r(   r(   rC   r)   r   g  s2   
          r   c                 C   s2   |  | }tj|dd|| }| | S )z
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`. :param torch.Tensor x: :return torch.Tensor:
    r   rb   )nerY   r?   ZcumsumZtype_asrQ   )rH   r#   maskZincremental_indicesr(   r(   r)   rE     s    rE   )r   r   r   r   r   r   r   r   )7r   rg   typingr   r   r?   r   Ztorch.nnr   r   r   Zactivationsr	   r
   Zmodeling_outputsr   r   r   r   r   r   r   Zmodeling_utilsr   Zpytorch_utilsr   r   utilsr   r   Zconfiguration_mpnetr   Z
get_loggerr+   loggerr   Moduler/   rT   rt   r   r   r   r   r   r   r   r%   r   r   r   r   r   rE   __all__r(   r(   r(   r)   <module>   sN   $	
;M/ ZYGP\AK