a
    h                     @   s   d dl mZ d dlmZmZ d dlZd dlZddlmZ ddl	m
Z
 ddlmZ ddlmZmZ d	d
lmZ ddlmZ eeZeG dd de
ZeG dd deZdgZdS )    )	dataclass)OptionalUnionN   )Cache)$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging   )AutoModelForImageTextToText   )ShieldGemma2Configc                   @   s$   e Zd ZU dZdZeej ed< dS )0ShieldGemma2ImageClassifierOutputWithNoAttentionz^ShieldGemma2 classifies imags as violative or not relative to a specific policy
    Args:
    Nprobabilities)	__name__
__module____qualname____doc__r   r   torchTensor__annotations__ r   r   r/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/shieldgemma2/modeling_shieldgemma2.pyr   $   s   
r   c                       s   e Zd ZU eed< dddddZed fdd	Zd
d Zdd Zdd Z	dd Z
dd Zdd Zdd Zedeej eej eej eej eeeej ef  eej eej eej eej ee ee ee ee eeejf edddZ  ZS )"ShieldGemma2ForImageClassificationconfigzmodel.model.language_modelzmodel.model.vision_towerz!model.model.multi_modal_projectorzmodel.lm_head)zmodel.language_model.modelzmodel.vision_towerzmodel.multi_modal_projectorzmodel.language_model.lm_headr   c                    s<   t  j|d t|dd| _t|dd| _tj|d| _d S )Nr   yes_token_indexi *  no_token_indexi  )super__init__getattrr   r   r   from_configmodel)selfr   	__class__r   r   r    7   s    z+ShieldGemma2ForImageClassification.__init__c                 C   s   | j j S N)r#   language_modelget_input_embeddingsr$   r   r   r   r)   =   s    z7ShieldGemma2ForImageClassification.get_input_embeddingsc                 C   s   | j j| d S r'   )r#   r(   set_input_embeddings)r$   valuer   r   r   r+   @   s    z7ShieldGemma2ForImageClassification.set_input_embeddingsc                 C   s   | j j S r'   )r#   r(   get_output_embeddingsr*   r   r   r   r-   C   s    z8ShieldGemma2ForImageClassification.get_output_embeddingsc                 C   s   | j j| d S r'   )r#   r(   set_output_embeddings)r$   Znew_embeddingsr   r   r   r.   F   s    z8ShieldGemma2ForImageClassification.set_output_embeddingsc                 C   s   | j j| d S r'   )r#   r(   set_decoder)r$   decoderr   r   r   r/   I   s    z.ShieldGemma2ForImageClassification.set_decoderc                 C   s   | j j S r'   )r#   r(   get_decoderr*   r   r   r   r1   L   s    z.ShieldGemma2ForImageClassification.get_decoderc                 C   s   | j j S r'   )r#   r(   tie_weightsr*   r   r   r   r2   O   s    z.ShieldGemma2ForImageClassification.tie_weightsNr   )	input_idspixel_valuesattention_maskposition_idspast_key_valuestoken_type_idscache_positioninputs_embedslabels	use_cacheoutput_attentionsoutput_hidden_statesreturn_dictlogits_to_keepreturnc                 K   sh   | j f |||||||||	|
||||d|}|j}|ddd| j| jgf }tj|dd}t||dS )aY  
        Returns:
            A `ShieldGemma2ImageClassifierOutputWithNoAttention` instance containing the logits and probabilities
            associated with the model predicting the `Yes` or `No` token as the response to that prompt, captured in the
            following properties.

                *   `logits` (`torch.Tensor` of shape `(batch_size, 2)`):
                    The first position along dim=1 is the logits for the `Yes` token and the second position along dim=1 is
                    the logits for the `No` token.
                *   `probabilities` (`torch.Tensor` of shape `(batch_size, 2)`):
                    The first position along dim=1 is the probability of predicting the `Yes` token and the second position
                    along dim=1 is the probability of predicting the `No` token.

            ShieldGemma prompts are constructed such that predicting the `Yes` token means the content *does violate* the
            policy as described. If you are only interested in the violative condition, use
            `violated = outputs.probabilities[:, 1]` to extract that slice from the output tensors.

            When used with the `ShieldGemma2Processor`, the `batch_size` will be equal to `len(images) * len(policies)`,
            and the order within the batch will be img1_policy1, ... img1_policyN, ... imgM_policyN.
        )r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   N)dim)logitsr   )r#   rD   r   r   r   Zsoftmaxr   )r$   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   Z	lm_kwargsoutputsrD   Zselected_logitsr   r   r   r   forwardR   s2    'z*ShieldGemma2ForImageClassification.forward)NNNNNNNNNNNNNr   )r   r   r   r   r   Z_checkpoint_conversion_mappingr    r)   r+   r-   r.   r/   r1   r2   r	   r   r   Z
LongTensorZFloatTensorr   r   listr   boolintr   rF   __classcell__r   r   r%   r   r   -   s\   
              r   )dataclassesr   typingr   r   r   Ztorch.utils.checkpointZcache_utilsr   Zmodeling_outputsr   Zmodeling_utilsr   utilsr	   r
   autor   Zconfiguration_shieldgemma2r   Z
get_loggerr   loggerr   r   __all__r   r   r   r   <module>   s    
f