a
    hr                     @   s0  d Z ddlmZ ddlZddlmZmZmZmZm	Z	 ddl
mZmZ ddlmZ ddlmZmZmZmZmZ dd	lmZ dd
lmZ eeZdddddddddddddd
ddddddddddd
ddddddddddd
dddddddddddddddddddddddd
ddddddddddddddddddddddddd
dddd d!ddddddddddd
d"dd#dd$d%d&d'd(d)d*d+dd$d,d-dd.dddddddd/dd0	d"d1d2d3d#d$d4ddddddd5d6dddd$dd7d8d9dd:	ddddddddd;dd
ddddddddddd<dd=ddddddddddd<dd=ddddddddddd
d>Zd?d@dAdBdCdddd dDdE
dFddddd dGdHZdIdJ ZG dKdL dLZG dMdN dNeZ G dOdP dPeZ!G dQdR dReZ"G dSdT dTeZ#G dUdV dVeZ$G dWdX dXeZ%e e!e!e!e!e"e#e#e#e#e#e$e#e#e%e%e e dYZ&edZd[d\Z'dS )]z
Integration with GGML / The file is copied and adapted from https://github.com/99991/pygguf
with extra methods beings exposed
    )arrayN)	Tokenizerdecodersnormalizerspre_tokenizers
processors)BPEUnigram   
AddedToken)GemmaConverterGPT2ConverterLlamaConverterQwen2ConverterT5Converter)logging)tqdmZ
model_typeZ_model_name_or_path)architecturenameZmax_position_embeddingsZnum_hidden_layersZintermediate_sizeZhidden_sizeZhead_dimZ
rope_thetaZnum_attention_headsZnum_key_value_headsZrms_norm_eps
vocab_size)
context_lengthblock_countfeed_forward_lengthembedding_lengthrope.dimension_countrope.freq_baseattention.head_countattention.head_count_kv attention.layer_norm_rms_epsilonr   Znum_expertsZnum_experts_per_tok)r   r   r   r   r   r   r   r   r   r   expert_countexpert_used_count)r   r   r   r   r   r   attention.key_lengthr   r   r   r   r    r!   bos_token_ideos_token_idunk_token_idpad_token_id)ggml.bos_token_idggml.eos_token_idggml.unknown_token_idggml.padding_token_idZn_layerZn_headZlayer_norm_epsilon)r   r   r   r   attention.layer_norm_epsilonZn_positionsZ
num_layersZd_ffZd_modelZd_kvZ	num_headsZrelative_attention_num_bucketsdecoder_start_token_id)r   r   r   r   r"   r   r   r+   z attention.relative_buckets_countr,   r   Zlayer_norm_eps)	r   r   r   r   r   r   r   r+   r   Zn_ctxZn_embdr   )r   r   r   r   r   r+   Znorm_epsilon)r   r   r   r   r   r   r+   Zconv_kernelZ
state_sizeZtime_step_rank)	r   r   r   r   r   zssm.conv_kernelzssm.state_sizezssm.time_step_rankzssm.inner_sizeZnorm_epsZsliding_window)r   r   r   r   r   r   r"   r   r   r   zattention.sliding_windowr   )ZgeneralllamaZmistralqwen2Zqwen2moeqwen3	qwen3_moefalcon	tokenizerphi3bloomt5stablelmgpt2
starcoder2mambanemotrongemma2Zgemma3decitokenizer_typetokensscores
token_typemergesadd_prefix_space)

ggml.modelzggml.tokenszggml.scoreszggml.token_typezggml.mergesr'   r(   r)   r*   zggml.add_space_prefixchat_template)rD   rC   r'   r(   r)   r*   )r2   Ztokenizer_configc                 C   s   t |ts|g}t|dkr*|d }d }n|d dkr>td|\}}|dv r\t| d } n^|dv rrt| d } nH|dv rt| d } n2|dv rtd	t|  	 } n|d
v rt
| |} | S )N   r   	   zPReceived multiple types, therefore expected the first type to indicate an array.)r   rE   r
            
      )      )   )   B)rF   )
isinstancelistlen
ValueErrorintfloatboolr   tobytesdecode_gguf_parse_value)_valueZ	data_typeZarray_data_type r\   Z/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/integrations/ggml.pyrZ   #  s&    

rZ   c                   @   s   e Zd Zdd ZdS )GGUFTokenizerSkeletonc                    s  |  D ]\}}t| || qt| ds&t| dr>t| dsFtd| j}| j  fddt|D td g }t	  D ]z\}}g }t
dt|D ]>}	|d |	 ||	d   }
}|
|v r||v r||
||f qt|fd	d
dd}|| qt|dd
 dd}dd |D }|| _n8dd | jD | _t| ds^dd t
t| jD | _t| dspg | _t| dsd | _t| dr| jd u r| j| _d S )NrA   r>   r?   z\tokens and scores need to be passed for a LLaMa tokenizer without merges to be instantiated.c                    s   i | ]\}}| | qS r\   r\   ).0it)r?   r\   r]   
<dictcomp>G      z2GGUFTokenizerSkeleton.__init__.<locals>.<dictcomp>z:Merges were not in checkpoint, building merges on the fly.rE   c                    s    | d   | d  fS )Nr   rE   r\   )x)vocabr\   r]   <lambda>Q  rc   z0GGUFTokenizerSkeleton.__init__.<locals>.<lambda>T)keyreversec                 S   s   | d S )Nr
   r\   )valr\   r\   r]   rf   S  rc   c                 S   s   g | ]}|d  |d fqS )r   rE   r\   )r_   ri   r\   r\   r]   
<listcomp>T  rc   z2GGUFTokenizerSkeleton.__init__.<locals>.<listcomp>c                 S   s   g | ]}t |d qS ) )tuplesplit)r_   merger\   r\   r]   rj   W  rc   c                 S   s   g | ]}d qS Nr\   )r_   _r\   r\   r]   rj   Y  rc   added_tokensr%   unknown_token_id)itemssetattrhasattrrT   r>   r?   	enumerateloggerwarningr   rangerS   appendsortedextendrA   rq   r%   rr   )selfZdict_kvr>   rA   rn   Zpiece_scorelocalindexZpiece_lZpiece_rr\   )r?   re   r]   __init__<  s@    
zGGUFTokenizerSkeleton.__init__N)__name__
__module____qualname__r   r\   r\   r\   r]   r^   ;  s   r^   c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )GGUFLlamaConverterc                 C   s0   t || _| j| _i | _t| jdddk| _d S )Nr=   r-   )r^   protooriginal_tokenizeradditional_kwargsgetattris_llama_3_tokenizerr}   tokenizer_dictr\   r\   r]   r   g  s    
zGGUFLlamaConverter.__init__c                 C   s   t t|j|jS ro   rR   zipr>   r?   r}   r   r\   r\   r]   re   m  s    zGGUFLlamaConverter.vocabc                 C   s   |j S ro   rA   r   r\   r\   r]   rA   p  s    zGGUFLlamaConverter.mergesc                 C   s  |  | j}| | j}dd t|D }|jd ur@|j|j nd }t|dd d ur`|j|j nd }t|dd d ur|j|j nd }tt	|||ddd}g }	t
| jds|d ur|	t|ddd	 |d ur|	t|ddd	 |d urH|	t|ddd	 nDtt| jjd
kd }
|
D ]"}|	t| jj| ddd	 q$t|	dkr`||	 t| jjdkr|dd | jjD  || jd< || jd< || jd< | jrd | jd< d| jd< d| jd< d| j_|S )Nc                 S   s   i | ]\}\}}||qS r\   r\   r_   r`   wordZ_scorer\   r\   r]   rb   v  rc   z0GGUFLlamaConverter.tokenizer.<locals>.<dictcomp>r#   r$   T)	unk_tokenZfuse_unkbyte_fallbackr@   F
normalizedspecialrG   r   c                 S   s   g | ]}t |d d dqS )Fr   r   )r_   Zadded_tokenr\   r\   r]   rj     rc   z0GGUFLlamaConverter.tokenizer.<locals>.<listcomp>r   	eos_token	bos_tokenrB   Zclean_up_tokenization_spaceslegacy)re   r   rA   rv   r%   r>   r   r#   r   r   ru   rz   r   npwherer   r@   rS   add_special_tokensrq   Z
add_tokensr   r   r   r   )r}   r   vocab_scoresrA   	bpe_vocabr   r   r   r2   special_tokensZspecial_tokens_idxidxr\   r\   r]   r2   s  sR      

 






zGGUFLlamaConverter.tokenizerc                 C   sX   t  t  t ddg}| jr6|t jddddg7 }|rN|t jdddg7 }t |S )N   ▁rk   FTrB   Ztrim_offsetsZ	use_regexrE   contentleft)r   ByteFallbackFuseReplacer   	ByteLevelStripSequencer}   replacementrB   sequencer\   r\   r]   decoder  s    
zGGUFLlamaConverter.decoderc                 C   s   |  | j}| | j}|d ur&||_d}d}t| jdrB| jj}| ||}|d ur\||_| |||_|  }|r|||_| j	rt
jdddd|_tg |_|S )Nr   TrB   Fr   )r2   r   
normalizerru   r   rB   pre_tokenizerr   post_processorr   r   r   r   r   )r}   r2   r   r   rB   r   r   r\   r\   r]   	converted  s*    zGGUFLlamaConverter.convertedN)	r   r   r   r   re   rA   r2   r   r   r\   r\   r\   r]   r   f  s   :r   c                       s*   e Zd Zdd Zed fddZ  ZS )GGUFQwen2Converterc                 C   s   t || _i | _d S ro   r^   r   r   r   r\   r\   r]   r     s    
zGGUFQwen2Converter.__init__returnc              	      s^   dd t | jjD }| jj}t ||}|tddddtddddtddddg |S )	Nc                 S   s   i | ]\}}||qS r\   r\   r_   r`   r   r\   r\   r]   rb     rc   z0GGUFQwen2Converter.converted.<locals>.<dictcomp><|endoftext|>FTr   z<|im_start|>z
<|im_end|>)rv   r   r>   rA   superr   r   r   r}   re   rA   r2   	__class__r\   r]   r     s    zGGUFQwen2Converter.convertedr   r   r   r   r   r   __classcell__r\   r\   r   r]   r     s   r   c                   @   sB   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZedddZ	dS )GGUFPhi3Converterc                 C   s   t || _| j| _i | _d S ro   r^   r   r   r   r   r\   r\   r]   r     s    
zGGUFPhi3Converter.__init__c                 C   s   t t|j|jS ro   r   r   r\   r\   r]   re     s    zGGUFPhi3Converter.vocabc                 C   s   |j S ro   r   r   r\   r\   r]   rA     s    zGGUFPhi3Converter.mergesc                 C   sv  |  | j}| | j}dd t|D }tt||}|tddddddtddddtd	dddd
tddddd
tddddd
tddddd
tddddd
tddddd
tddddd
tddddd
tddddd
tddddd
g |jd ur|j	|j nd | j
d< |jd ur$|j	|j nd | j
d< |jd urF|j	|j nd | j
d< |jd urh|j	|j nd | j
d< |S )Nc                 S   s   i | ]\}\}}||qS r\   r\   r   r\   r\   r]   rb     rc   z/GGUFPhi3Converter.tokenizer.<locals>.<dictcomp></s>TF)rstriplstripr   r   r   r   z<|assistant|>)r   r   r   z<|placeholder1|>z<|placeholder2|>z<|placeholder3|>z<|placeholder4|>z
<|system|>z<|end|>z<|placeholder5|>z<|placeholder6|>z<|user|>r   r   r   Z	pad_token)re   r   rA   rv   r   r   r   r   r%   r>   r   r$   r#   r&   )r}   r   r   rA   r   r2   r\   r\   r]   r2     s8    zGGUFPhi3Converter.tokenizerc                 C   s<   t  t  t |dg}|r2|t jdddg7 }t |S )Nrk   rE   r   )r   r   r   r   r   r   r   r\   r\   r]   r   &  s    
zGGUFPhi3Converter.decoderr   c                 C   s:   |  | j}d}d}t| jdr(| jj}| |||_|S )Nr   TrB   )r2   r   ru   r   rB   r   )r}   r2   r   rB   r\   r\   r]   r   1  s    zGGUFPhi3Converter.convertedN)
r   r   r   r   re   rA   r2   r   r   r   r\   r\   r\   r]   r     s   'r   c                       s*   e Zd Zdd Zed fddZ  ZS )GGUFGPTConverterc                 C   s   t || _i | _d S ro   r   r   r\   r\   r]   r   ?  s    
zGGUFGPTConverter.__init__r   c                    s0   dd t | jjD }| jj}t ||}|S )Nc                 S   s   i | ]\}}||qS r\   r\   r   r\   r\   r]   rb   D  rc   z.GGUFGPTConverter.converted.<locals>.<dictcomp>)rv   r   r>   rA   r   r   r   r   r\   r]   r   C  s    zGGUFGPTConverter.convertedr   r\   r\   r   r]   r   >  s   r   c                   @   s:   e Zd Zdd Zdd Zdd Zdd Zed	d
dZdS )GGUFT5Converterc                 C   s>   dg|d< t || _dd t| jjD | _| j| _i | _d S )N
dummy textrA   c                 S   s   i | ]\}}||qS r\   r\   )r_   r   r~   r\   r\   r]   rb   P  rc   z,GGUFT5Converter.__init__.<locals>.<dictcomp>)r^   r   rv   r>   token2idr   r   r   r\   r\   r]   r   K  s
    

zGGUFT5Converter.__init__c                 C   s   t t|j|jS ro   r   r   r\   r\   r]   re   T  s    zGGUFT5Converter.vocabc                 C   sT   t | jddrPg }t | jddr2|tjddg7 }|tjdddg7 }t|S d S )Nr   TrB   r   )prependrk   )patternr   )r   r   r   ZPrependr   r   )r}   r   r   r\   r\   r]   r   W  s    
zGGUFT5Converter.normalizerc                 C   s$   t jddgg dd| jd fgdS )N$Ar   )r   r   z$Br   )singlepairr   )r   ZTemplateProcessingr   )r}   r\   r\   r]   r   `  s    zGGUFT5Converter.post_processorr   c                 C   s   |  | j}tt|| jjdd}| | j}|d ur<||_d}d}t| jdrX| jj}| 	||}|d urr||_	| 
|||_
|  }|r||_|S )NFZunk_idr   r   TrB   )re   r   r   r	   r%   r   ru   r   rB   r   r   r   )r}   r   r2   r   r   rB   r   r   r\   r\   r]   r   i  s.    	zGGUFT5Converter.convertedN)	r   r   r   r   re   r   r   r   r   r\   r\   r\   r]   r   J  s
   			r   c                   @   s:   e Zd Zdd Zdd Zdd Zdd Zed	d
dZdS )GGUFGemmaConverterc                 C   s&   dg|d< t || _| j| _i | _d S )Nr   rA   r   r   r\   r\   r]   r     s    

zGGUFGemmaConverter.__init__c                 C   s   t t|j|j}g }|D ]b\}}|dkr:|d|f qd|v rnt| dkrndt| }|||f q|||f q|S )Nz<0x09>	rk   r   r   )rR   r   r>   r?   rz   rS   strip)r}   r   Zoriginal_vocabZupdated_vocabtokenZscoreZunderscoresr\   r\   r]   re     s    zGGUFGemmaConverter.vocabc                 C   s   t ddS )Nrk   r   )r   r   r   r\   r\   r]   r     s    zGGUFGemmaConverter.normalizerc                 C   s<   t ddt  t  g}|r2|t jdddg7 }t |S )Nr   rk   rE   r   )r   r   r   r   r   r   r   r\   r\   r]   r     s    
zGGUFGemmaConverter.decoderr   c                 C   s   |  | j}tt|| jj| jd}| | j}|d ur>||_d}d}t| jdrZ| jj	}| 
|||_
| ||}|d ur||_|S )Nr   r   TrB   )re   r   r   r	   r%   Zhandle_byte_fallbackr   ru   r   rB   r   r   )r}   r   r2   r   r   rB   r   r\   r\   r]   r     s(    zGGUFGemmaConverter.convertedN)	r   r   r   r   re   r   r   r   r   r\   r\   r\   r]   r     s
   r   )r-   r.   Z	qwen2_moer/   r0   r3   r4   r1   r6   r7   r8   r5   r9   r:   r;   Zgemma3_textr<   Zdecilmr   c                 C   s"   | }t | |}| }||jfS )a6  
    Utilities to convert a slow tokenizer instance in a fast tokenizer instance.

    Args:
        architecture (`str`): The model architecture derived from gguf file.
        transformer_tokenizer ([`~tokenization_utils_base.PreTrainedTokenizer`]):
            Instance of a slow tokenizer to convert in the backend tokenizer for
            [`~tokenization_utils_base.PreTrainedTokenizerFast`].

    Return:
        A instance of [`~tokenizers.Tokenizer`] to be used as the backend tokenizer of a
        [`~tokenization_utils_base.PreTrainedTokenizerFast`]
    )GGUF_TO_FAST_CONVERTERSr   r   )r   r   Ztokenizer_class_name	converterZfast_tokenizerr\   r\   r]   convert_gguf_tokenizer  s    r   )(__doc__r   numpyr   Z
tokenizersr   r   r   r   r   Ztokenizers.modelsr   r	    r   Zconvert_slow_tokenizerr   r   r   r   r   utilsr   Zutils.loggingr   Z
get_loggerr   rw   ZGGUF_CONFIG_MAPPINGZGGUF_TOKENIZER_MAPPINGrZ   r^   r   r   r   r   r   r   r   r   r\   r\   r\   r]   <module>   s
  
	
  j+yK?B