a
    ½ÀhÍ  ã                   @   s"  d dl mZ d dlmZ d dlmZ ddlmZmZm	Z	m
Z
mZ ddlmZmZmZmZmZmZ ddlmZ e e¡ZG d	d
„ d
eƒZG dd„ deƒZG dd„ deƒZG dd„ deƒZG dd„ de	ƒZG dd„ deƒZG dd„ deƒZG dd„ de
ƒZG dd„ deƒZ G dd„ deƒZ!g d¢Z"dS )é   )ÚCausalLMOutputWithPast)ÚUnpack)Úloggingé   )ÚDeepseekV3DecoderLayerÚDeepseekV3MLPÚDeepseekV3MoEÚDeepseekV3PreTrainedModelÚDeepseekV3TopkRouter)ÚQwen3AttentionÚQwen3ForCausalLMÚ
Qwen3ModelÚQwen3RMSNormÚQwen3RotaryEmbeddingÚTransformersKwargsé   )ÚDots1Configc                   @   s   e Zd ZdS )ÚDots1RMSNormN©Ú__name__Ú
__module__Ú__qualname__© r   r   úc/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/models/dots1/modular_dots1.pyr   '   s   r   c                   @   s   e Zd ZdS )ÚDots1RotaryEmbeddingNr   r   r   r   r   r   +   s   r   c                   @   s   e Zd ZdS )ÚDots1AttentionNr   r   r   r   r   r   /   s   r   c                   @   s   e Zd ZdS )ÚDots1MLPNr   r   r   r   r   r   3   s   r   c                   @   s   e Zd ZdS )ÚDots1MoENr   r   r   r   r   r   7   s   r   c                   @   s   e Zd ZdS )ÚDots1TopkRouterNr   r   r   r   r   r   ;   s   r   c                       s$   e Zd Zeedœ‡ fdd„Z‡  ZS )ÚDots1DecoderLayer)ÚconfigÚ	layer_idxc                    s   t ƒ  ||¡ |j| | _d S )N)ÚsuperÚ__init__Zlayer_typesZattention_type)Úselfr    r!   ©Ú	__class__r   r   r#   @   s    zDots1DecoderLayer.__init__)r   r   r   r   Úintr#   Ú__classcell__r   r   r%   r   r   ?   s   r   c                   @   s   e Zd ZdS )ÚDots1PreTrainedModelNr   r   r   r   r   r)   E   s   r)   c                   @   s   e Zd ZdS )Ú
Dots1ModelNr   r   r   r   r   r*   I   s   r*   c                       s(   e Zd Zee edœ‡ fdd„Z‡  ZS )ÚDots1ForCausalLM)Úsuper_kwargsÚreturnc                    s   t ƒ jf i |¤ŽS )a~  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Dots1ForCausalLM

        >>> model = Dots1ForCausalLM.from_pretrained("rednote-hilab/dots1.llm1.inst")
        >>> tokenizer = AutoTokenizer.from_pretrained("rednote-hilab/dots1.llm1.inst")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```)r"   Úforward)r$   r,   r%   r   r   r.   N   s    zDots1ForCausalLM.forward)r   r   r   r   r   r   r.   r(   r   r   r%   r   r+   M   s   ýr+   )r)   r*   r+   N)#Zmodeling_outputsr   Zprocessing_utilsr   Úutilsr   Z deepseek_v3.modeling_deepseek_v3r   r   r   r	   r
   Zqwen3.modeling_qwen3r   r   r   r   r   r   Zconfiguration_dots1r   Z
get_loggerr   Úloggerr   r   r   r   r   r   r   r)   r*   r+   Ú__all__r   r   r   r   Ú<module>   s"    
