a
    h                     @   s"  d Z ddlmZmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZ ee Z!G dd deZ"G dd deZ#G dd deZ$G dd deZ%G dd deZ&G dd deZ'g dZ(dS )zPyTorch BitNet model.    )CallableOptionalN   )Cache)FlashAttentionKwargs)CausalLMOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)logging)deprecate_kwarg   )GemmaMLP)LlamaAttentionLlamaDecoderLayerLlamaForCausalLM
LlamaModelLlamaRMSNormapply_rotary_pos_embeager_attention_forward   )BitNetConfigc                   @   s   e Zd ZdS )BitNetRMSNormN__name__
__module____qualname__ r   r   e/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/bitnet/modular_bitnet.pyr   +   s   r   c                       s*   e Zd Zed fddZdd Z  ZS )	BitNetMLP)configc                    s"   t  | t|j|jd| _d S N)eps)super__init__r   Zintermediate_sizerms_norm_epsffn_sub_norm)selfr   	__class__r   r   r#   0   s    zBitNetMLP.__init__c              	   C   s*   |  | | | || | }|S )N)	down_projr%   Zact_fnZ	gate_projZup_proj)r&   xr)   r   r   r   forward4   s    &zBitNetMLP.forward)r   r   r   r   r#   r+   __classcell__r   r   r'   r   r   /   s   r   c                       s   e Zd Zeed fddZedddddeje	ejejf e
ej e
e e
ej ee e	eje
ej f d	d
dZ  ZS )BitNetAttention)r   	layer_idxc                    s$   t  || t|j|jd| _d S r    )r"   r#   r   Zhidden_sizer$   attn_sub_norm)r&   r   r.   r'   r   r   r#   :   s    zBitNetAttention.__init__Zpast_key_valuepast_key_valuesz4.58)new_nameversionN)hidden_statesposition_embeddingsattention_maskr0   cache_positionkwargsreturnc                 K   s.  |j d d }g |d| jR }| ||dd}	| ||dd}
| ||dd}|\}}t|	|
||\}	}
|d ur|||d}||
|| j	|\}
}t
}| jjdkrt| jj }|| |	|
||f| jsdn| j| jd|\}}|jg |dR   }| |}| |}||fS )Nr   r   )sincosr6   eagerg        )Zdropoutscaling)shapeZhead_dimZq_projviewZ	transposeZk_projZv_projr   updater.   r   r   Z_attn_implementationr   ZtrainingZattention_dropoutr=   Zreshape
contiguousr/   Zo_proj)r&   r3   r4   r5   r0   r6   r7   Zinput_shapeZhidden_shapeZquery_statesZ
key_statesZvalue_statesr;   r:   Zcache_kwargsZattention_interfaceZattn_outputZattn_weightsr   r   r   r+   >   s:    



zBitNetAttention.forward)NN)r   r   r   r   intr#   r   torchZTensortupler   r   Z
LongTensorr	   r   r+   r,   r   r   r'   r   r-   9   s     r-   c                   @   s   e Zd ZdS )BitNetDecoderLayerNr   r   r   r   r   rE   m   s   rE   c                   @   s   e Zd ZdS )BitNetModelNr   r   r   r   r   rF   q   s   rF   c                       s0   e Zd ZdgZdZdZed fddZ  ZS )BitNetForCausalLMzlm_head.weightN)r8   c                    s   t  jf i |S )a$  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers.,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, BitNetForCausalLM

        >>> model = BitNetForCausalLM.from_pretrained("microsoft/bitnet-b1.58-2B-4T")
        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/bitnet-b1.58-2B-4T")

        >>> prompt = f'<|begin_of_text|>User: Hey, are you conscious? Can you talk to me?<|eot_id|>Assistant: '
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=100)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "User: Hey, are you conscious? Can you talk to me?Assistant: No, I'm not conscious. I'm an artificial intelligence designed to assist with information and tasks. How can I help you today?"
        ```)r"   r+   )r&   Zsuper_kwargsr'   r   r   r+   z   s    zBitNetForCausalLM.forward)	r   r   r   Z_tied_weights_keysZ_tp_planZ_pp_planr   r+   r,   r   r   r'   r   rG   u   s
   rG   )rG   rF   ZBitNetPreTrainedModel))__doc__typingr   r   rC   Zcache_utilsr   Zmodeling_flash_attention_utilsr   Zmodeling_outputsr   Zmodeling_utilsr   Zprocessing_utilsr	   utilsr
   Zutils.deprecationr   Zgemma.modeling_gemmar   Zllama.modeling_llamar   r   r   r   r   r   r   Zconfiguration_bitnetr   Z
get_loggerr   loggerr   r   r-   rE   rF   rG   __all__r   r   r   r   <module>   s(   $	

4"