a
    h*-                     @   s   d dl mZ d dlmZ d dlZd dlmZ ddlmZ ddl	m
Z
mZmZmZ ddlmZ ddlmZ dd	lmZmZmZmZ eeZG d
d dejZeG dd deZeG dd deZeG dd deZdS )    )partial)OptionalN   )Cache)BaseModelOutputWithPastQuestionAnsweringModelOutput SequenceClassifierOutputWithPastTokenClassifierOutput)	AutoModel)Unpack)TransformersKwargsauto_docstringcan_return_tupleloggingc                       s$   e Zd ZdZdZ fddZ  ZS )GradientCheckpointingLayera  Base class for layers with gradient checkpointing.

    This class enables gradient checkpointing functionality for a layer. By default, gradient checkpointing is disabled
    (`gradient_checkpointing = False`). When `model.set_gradient_checkpointing()` is called, gradient checkpointing is
    enabled by setting `gradient_checkpointing = True` and assigning a checkpointing function to `_gradient_checkpointing_func`.

    Important:

        When using gradient checkpointing with `use_reentrant=True`, inputs that require gradients (e.g. hidden states)
        must be passed as positional arguments (`*args`) rather than keyword arguments to properly propagate gradients.

        Example:

            ```python
            >>> # Correct - hidden_states passed as positional arg
            >>> out = self.layer(hidden_states, attention_mask=attention_mask)

            >>> # Incorrect - hidden_states passed as keyword arg
            >>> out = self.layer(hidden_states=hidden_states, attention_mask=attention_mask)
            ```
    Fc                    s  | j r| jrd}| jj}d| d}d|v rH|d rHd|d< |d7 }d}d|v rp|d d urpd |d< |d7 }d}d	|v r|d	 d urd |d	< |d
7 }d}d|v r|d d urd |d< |d7 }d}|r|dd }t| | jtt	 j
fi |g|R  S t	 j
|i |S )NFz7Caching is incompatible with gradient checkpointing in z	. Setting	use_cachez `use_cache=False`,TZpast_key_valuez `past_key_value=None`,past_key_valuesz `past_key_values=None`,Z
layer_pastz `layer_past=None`,,.)gradient_checkpointingZtraining	__class____name__rstriploggerwarning_onceZ_gradient_checkpointing_funcr   super__call__)selfargskwargsZdo_warnZ
layer_namemessager    X/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/modeling_layers.pyr   <   s2    
"z#GradientCheckpointingLayer.__call__)r   
__module____qualname____doc__r   r   __classcell__r"   r"   r!   r#   r   #   s   r   c                       sr   e Zd ZdZ fddZeedeej	 eej
 eej	 ee eej eej	 ee ee ed	ddZ  ZS )	 GenericForSequenceClassificationmodelc                    sJ   t  | |j| _t| | jt| tj|j	| jdd| _
|   d S )NF)Zbias)r   __init__
num_labelssetattrbase_model_prefixr
   from_confignnLinearhidden_sizescore	post_initr   configr!   r"   r#   r*   e   s
    z)GenericForSequenceClassification.__init__N)		input_idsattention_maskposition_idsr   inputs_embedslabelsr   r   returnc                 K   s4  t | | j|f|||||d|}	|	j}
| |
}|d urH|jd }n
|jd }| jjd u rn|dkrntd| jjd u rd}nb|d ur|| jjk|j	t
j}t
j|jd |j	t
jd}|| d}nd}t| jj d |t
j||j	d|f }d }|d ur| j|||| jd	}t|||	j|	j|	jd
S )Nr7   r8   r   r9   r   r   r   z=Cannot handle batch sizes > 1 if no padding token is defined.)deviceZdtypez will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`)r>   )logitsr:   pooled_logitsr5   )lossr?   r   hidden_states
attentions)getattrr-   last_hidden_stater2   shaper5   Zpad_token_id
ValueErrortor>   torchZint32ZarangeZargmaxr   r   r   r   loss_functionr   r   rB   rC   )r   r6   r7   r8   r   r9   r:   r   r   Ztransformer_outputsrB   r?   Z
batch_sizeZlast_non_pad_tokenZnon_pad_maskZtoken_indicesr@   rA   r"   r"   r#   forwardo   sN    
	


z(GenericForSequenceClassification.forward)NNNNNNN)r   r$   r%   r-   r*   r   r   r   rI   
LongTensorTensorr   FloatTensorboolr   r   r   rK   r'   r"   r"   r!   r#   r(   a   s,   
       r(   c                       s   e Zd ZdZ fddZdd Zdd Zeede	e
j e	e
j e	e
j e	e e	e
j e	e
j e	e
j ee ed		d
dZ  ZS )GenericForQuestionAnsweringr)   c                    s<   t  | t| | jt| t|jd| _	| 
  d S )N   )r   r*   r,   r-   r
   r.   r/   r0   r1   
qa_outputsr3   r4   r!   r"   r#   r*      s    z$GenericForQuestionAnswering.__init__c                 C   s   t | | jjS NrD   r-   Zembed_tokens)r   r"   r"   r#   get_input_embeddings   s    z0GenericForQuestionAnswering.get_input_embeddingsc                 C   s   |t | | j_d S rS   rT   )r   valuer"   r"   r#   set_input_embeddings   s    z0GenericForQuestionAnswering.set_input_embeddingsN)	r6   r7   r8   r   r9   start_positionsend_positionsr   r;   c                 K   s   t | | j|f||||d|}	|	j}
| |
}|jddd\}}|d }|d }d }|d ur|d ur| j||||fi |}t||||	j	|	j
dS )N)r7   r8   r   r9   r   r=   )dim)rA   start_logits
end_logitsrB   rC   )rD   r-   rE   rR   splitZsqueeze
contiguousrJ   r   rB   rC   )r   r6   r7   r8   r   r9   rX   rY   r   outputssequence_outputr?   r[   r\   rA   r"   r"   r#   rK      s2    
	
z#GenericForQuestionAnswering.forward)NNNNNNN)r   r$   r%   r-   r*   rU   rW   r   r   r   rI   rL   rM   r   rN   r   r   r   rK   r'   r"   r"   r!   r#   rP      s0   	       rP   c                       sl   e Zd ZdZ fddZeedeej	 eej
 eej	 ee eej eej	 ee edddZ  ZS )	GenericForTokenClassificationr)   c                    s   t  | |j| _t| | jt| t|dd d ur@|j}nt|dd d urX|j	}nd}t
|| _t
|j|j| _|   d S )Nclassifier_dropouthidden_dropoutg?)r   r*   r+   r,   r-   r
   r.   rD   rb   rc   r/   ZDropoutdropoutr0   r1   r2   r3   )r   r5   rb   r!   r"   r#   r*      s    z&GenericForTokenClassification.__init__N)r6   r7   r8   r   r9   r:   r   r;   c                 K   sn   t | | j|f|||||d|}	|	j}
| |
}
| |
}d }|d urZ| ||| j}t|||	j|	j	dS )Nr<   )rA   r?   rB   rC   )
rD   r-   rE   rd   r2   rJ   r5   r	   rB   rC   )r   r6   r7   r8   r   r9   r:   r   r   r_   r`   r?   rA   r"   r"   r#   rK      s.    
	

z%GenericForTokenClassification.forward)NNNNNNN)r   r$   r%   r-   r*   r   r   r   rI   rL   rM   r   rN   rO   r	   rK   r'   r"   r"   r!   r#   ra      s*          ra   ) 	functoolsr   typingr   rI   Ztorch.nnr/   Zcache_utilsr   Zmodeling_outputsr   r   r   r	   Zmodels.autor
   Zprocessing_utilsr   utilsr   r   r   r   Z
get_loggerr   r   Moduler   objectr(   rP   ra   r"   r"   r"   r#   <module>   s    
>J<