a
    h[                     @   sH   d dl mZ d dlmZ d dlmZ d dlmZmZ ee	dddZ
dS )	    )Path)Any)TikTokenConverter)TIKTOKEN_VOCAB_FILETOKENIZER_FILE)encoding
output_dirc           	      C   s   t |}|jdd |d t }|t }t| }t| }z:ddlm} ddlm	} t
| trl|| } || j| W n ty   tdY n0 t|| j| jd }|| d	S )
a  
    Converts given `tiktoken` encoding to `PretrainedTokenizerFast` and saves the configuration of converted tokenizer
    on disk.

    Args:
        encoding (`str` or `tiktoken.Encoding`):
            Tokenizer from `tiktoken` library. If `encoding` is `str`, the tokenizer will be loaded with
            `tiktoken.get_encoding(encoding)`.
        output_dir (`str`):
            Save path for converted tokenizer configuration file.
    T)exist_oktiktokenr   )get_encoding)dump_tiktoken_bpezY`tiktoken` is required to save a `tiktoken` file. Install it with `pip install tiktoken`.)Z
vocab_filepatternZadditional_special_tokensN)r   mkdirr   r   strabsoluter
   r   Ztiktoken.loadr   
isinstanceZ_mergeable_ranksImportError
ValueErrorr   Z_pat_strZ_special_tokensZ	convertedsave)	r   r   Z	save_fileZtokenizer_fileZsave_file_absoluteZoutput_file_absoluter   r   	tokenizer r   ^/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/integrations/tiktoken.pyconvert_tiktoken_to_fast   s$    


r   N)pathlibr   typingr   Z#transformers.convert_slow_tokenizerr   Z$transformers.tokenization_utils_fastr   r   r   r   r   r   r   r   <module>   s   