a
    м└h@<  у                   @   sо  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZmZ ddlmZmZmZ eаeбZh dгZh d	гZe	G d
dД dГГZeejef eeef dЬddДZ eeejf eeeeeee!f f  ddЬddДZ"eeejf eeejf ddЬddДZ#ej$eeeeje!f ddЬddДZ%eeee!f e!dЬddДZ&eedЬddДZ'eee ddЬdd ДZ(ej$ej)e*d!Ьd"d#ДZ+dS )$щ    N)┌contextmanager)┌	dataclass┌field)┌Path)┌Any┌Dict┌	Generator┌Iterable┌Tuple┌Unionщ   )┌DDUFCorruptedFileError┌DDUFExportError┌DDUFInvalidEntryNameError>   z.txtz.jsonz.modelz.safetensors>   zpreprocessor_config.jsonztokenizer_config.jsonzconfig.jsonzscheduler_config.jsonc                   @   sj   e Zd ZU dZeed< eed< eed< eddНZe	ed< e
eeddf d	Ьd
dДГZdeedЬddДZdS )┌	DDUFEntrya┐  Object representing a file entry in a DDUF file.

    See [`read_dduf_file`] for how to read a DDUF file.

    Attributes:
        filename (str):
            The name of the file in the DDUF archive.
        offset (int):
            The offset of the file in the DDUF archive.
        length (int):
            The length of the file in the DDUF archive.
        dduf_path (str):
            The path to the DDUF archive (for internal use).
    ┌filename┌length┌offsetF)┌repr┌	dduf_pathNй┌returnc              	   c   s~   | j аdбП^}tj|аб dtjdНП(}|| j| j| j Е V  W d  Г n1 sR0    Y  W d  Г n1 sp0    Y  dS )a-  Open the file as a memory-mapped file.

        Useful to load safetensors directly from the file.

        Example:
            ```py
            >>> import safetensors.torch
            >>> with entry.as_mmap() as mm:
            ...     tensors = safetensors.torch.load(mm)
            ```
        ┌rbr   )r   ┌accessN)r   ┌open┌mmap┌fileno┌ACCESS_READr   r   )┌self┌f┌mmй r!   ·_/var/www/html/assistant/venv/lib/python3.9/site-packages/huggingface_hub/serialization/_dduf.py┌as_mmap9   s    zDDUFEntry.as_mmap·utf-8)┌encodingr   c                 C   sP   | j аdбП0}|а| jб |а| jбj|dНW  d  Г S 1 sB0    Y  dS )z╒Read the file as text.

        Useful for '.txt' and '.json' entries.

        Example:
            ```py
            >>> import json
            >>> index = json.loads(entry.read_text())
            ```
        r   )r%   N)r   r   ┌seekr   ┌readr   ┌decode)r   r%   r   r!   r!   r"   ┌	read_textJ   s    zDDUFEntry.read_text)r$   )┌__name__┌
__module__┌__qualname__┌__doc__┌str┌__annotations__┌intr   r   r   r   r   ┌bytesr#   r)   r!   r!   r!   r"   r   "   s   
r   )r   r   c                 C   s:  i }t | Г} tаd| Ы Эб tаt| ГdбПо}|аб D ]Ф}tаd|jЫ Эб |j	tj
kr`tdГВzt|jГ W n6 tyд } ztd|jЫ ЭГ|ВW Y d}~n
d}~0 0 t||Г}t|j||j| dН||j< q6W d  Г n1 sр0    Y  d|vr·td	ГВtа|d аб б}t||аб Г tаd
| Ы dt|ГЫ dЭб |S )a  
    Read a DDUF file and return a dictionary of entries.

    Only the metadata is read, the data is not loaded in memory.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to read.

    Returns:
        `Dict[str, DDUFEntry]`:
            A dictionary of [`DDUFEntry`] indexed by filename.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).

    Example:
        ```python
        >>> import json
        >>> import safetensors.torch
        >>> from huggingface_hub import read_dduf_file

        # Read DDUF metadata
        >>> dduf_entries = read_dduf_file("FLUX.1-dev.dduf")

        # Returns a mapping filename <> DDUFEntry
        >>> dduf_entries["model_index.json"]
        DDUFEntry(filename='model_index.json', offset=66, length=587)

        # Load model index as JSON
        >>> json.loads(dduf_entries["model_index.json"].read_text())
        {'_class_name': 'FluxPipeline', '_diffusers_version': '0.32.0.dev0', '_name_or_path': 'black-forest-labs/FLUX.1-dev', ...

        # Load VAE weights using safetensors
        >>> with dduf_entries["vae/diffusion_pytorch_model.safetensors"].as_mmap() as mm:
        ...     state_dict = safetensors.torch.load(mm)
        ```
    zReading DDUF file ┌rzReading entry z)Data must not be compressed in DDUF file.z!Invalid entry name in DDUF file: N)r   r   r   r   ·model_index.json·7Missing required 'model_index.json' entry in DDUF file.zDone reading DDUF file z. Found z entries)r   ┌logger┌info┌zipfile┌ZipFiler.   ┌infolist┌debugr   ┌compress_type┌
ZIP_STOREDr   ┌_validate_dduf_entry_namer   ┌_get_data_offsetr   ┌	file_size┌json┌loadsr)   ┌_validate_dduf_structure┌keys┌len)r   ┌entries┌zfr6   ┌er   ┌indexr!   r!   r"   ┌read_dduf_fileZ   s,    '(
 ,rI   )r   rE   r   c                 C   sЦ  t аd| Ы dЭб tГ }d}tаt| ГdtjбПю}|D ]╪\}}||v rTtd|Ы ЭГВ|а|б |dkrоzt	а
t|Габ б}W n0 t	jyм } ztdГ|ВW Y d}~n
d}~0 0 zt|Г}W n4 tyю } ztd|Ы ЭГ|ВW Y d}~n
d}~0 0 t аd	|Ы d
Эб t|||Г q6W d  Г n1 Рs&0    Y  |du РrBtdГВzt||Г W n0 tРyА } ztdГ|ВW Y d}~n
d}~0 0 t аd| Ы Эб dS )aп  Write a DDUF file from an iterable of entries.

    This is a lower-level helper than [`export_folder_as_dduf`] that allows more flexibility when serializing data.
    In particular, you don't need to save the data on disk before exporting it in the DDUF file.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        entries (`Iterable[Tuple[str, Union[str, Path, bytes]]]`):
            An iterable of entries to write in the DDUF file. Each entry is a tuple with the filename and the content.
            The filename should be the path to the file in the DDUF archive.
            The content can be a string or a pathlib.Path representing a path to a file on the local disk or directly the content as bytes.

    Raises:
        - [`DDUFExportError`]: If anything goes wrong during the export (e.g. invalid entry name, missing 'model_index.json', etc.).

    Example:
        ```python
        # Export specific files from the local disk.
        >>> from huggingface_hub import export_entries_as_dduf
        >>> export_entries_as_dduf(
        ...     dduf_path="stable-diffusion-v1-4-FP16.dduf",
        ...     entries=[ # List entries to add to the DDUF file (here, only FP16 weights)
        ...         ("model_index.json", "path/to/model_index.json"),
        ...         ("vae/config.json", "path/to/vae/config.json"),
        ...         ("vae/diffusion_pytorch_model.fp16.safetensors", "path/to/vae/diffusion_pytorch_model.fp16.safetensors"),
        ...         ("text_encoder/config.json", "path/to/text_encoder/config.json"),
        ...         ("text_encoder/model.fp16.safetensors", "path/to/text_encoder/model.fp16.safetensors"),
        ...         # ... add more entries here
        ...     ]
        ... )
        ```

        ```python
        # Export state_dicts one by one from a loaded pipeline
        >>> from diffusers import DiffusionPipeline
        >>> from typing import Generator, Tuple
        >>> import safetensors.torch
        >>> from huggingface_hub import export_entries_as_dduf
        >>> pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
        ... # ... do some work with the pipeline

        >>> def as_entries(pipe: DiffusionPipeline) -> Generator[Tuple[str, bytes], None, None]:
        ...     # Build an generator that yields the entries to add to the DDUF file.
        ...     # The first element of the tuple is the filename in the DDUF archive (must use UNIX separator!). The second element is the content of the file.
        ...     # Entries will be evaluated lazily when the DDUF file is created (only 1 entry is loaded in memory at a time)
        ...     yield "vae/config.json", pipe.vae.to_json_string().encode()
        ...     yield "vae/diffusion_pytorch_model.safetensors", safetensors.torch.save(pipe.vae.state_dict())
        ...     yield "text_encoder/config.json", pipe.text_encoder.config.to_json_string().encode()
        ...     yield "text_encoder/model.safetensors", safetensors.torch.save(pipe.text_encoder.state_dict())
        ...     # ... add more entries here

        >>> export_entries_as_dduf(dduf_path="stable-diffusion-v1-4.dduf", entries=as_entries(pipe))
        ```
    zExporting DDUF file '·'N┌wzCan't add duplicate entry: r3   z#Failed to parse 'model_index.json'.zInvalid entry name: zAdding entry 'z' to DDUF filer4   zInvalid DDUF file structure.zDone writing DDUF file )r5   r6   ┌setr7   r8   r.   r<   r   ┌addr@   rA   ┌_load_contentr(   ┌JSONDecodeErrorr=   r   r:   ┌_dump_content_in_archiverB   r   )r   rE   ┌	filenamesrH   ┌archiver   ┌contentrG   r!   r!   r"   ┌export_entries_as_ddufЯ   s4    :
 &.
 rT   )r   ┌folder_pathr   c                    s6   t И ГЙ tttt f  dЬЗ fddД}t| |Г Г dS )a  
    Export a folder as a DDUF file.

    AUses [`export_entries_as_dduf`] under the hood.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        folder_path (`str` or `os.PathLike`):
            The path to the folder containing the diffusion model.

    Example:
        ```python
        >>> from huggingface_hub import export_folder_as_dduf
        >>> export_folder_as_dduf(dduf_path="FLUX.1-dev.dduf", folder_path="path/to/FLUX.1-dev")
        ```
    r   c                  3   sz   t И ГаdбD ]f} | аб sq| jtvr:tаd| Ы dЭб q| аИ б}t|j	Гdkrftаd| Ы dЭб q|а
б | fV  qd S )Nz**/*zSkipping file 'z' (file type not allowed)щ   z"' (nested directories not allowed))r   ┌glob┌is_file┌suffix┌DDUF_ALLOWED_ENTRIESr5   r:   ┌relative_torD   ┌parts┌as_posix)┌pathZpath_in_archiveйrU   r!   r"   ┌_iterate_over_folder  s    

z3export_folder_as_dduf.<locals>._iterate_over_folderN)r   r	   r
   r.   rT   )r   rU   r`   r!   r_   r"   ┌export_folder_as_dduf·   s    ra   )rR   r   rS   r   c              	   C   sк   | j |dddНПЖ}t|ttfГrbt|Г}|а dбП}tа||dб W d   Г qИ1 sV0    Y  n&t|tГrx|а|б ntd|Ы dЭГВW d   Г n1 sЬ0    Y  d S )NrK   T)┌force_zip64r   i  А zInvalid content type for z. Must be str, Path or bytes.)	r   ┌
isinstancer.   r   ┌shutil┌copyfileobjr1   ┌writer   )rR   r   rS   Z
archive_fhZcontent_pathZ
content_fhr!   r!   r"   rP     s    .
rP   )rS   r   c                 C   s@   t | ttfГrt| Габ S t | tГr(| S tdt| ГЫ dЭГВdS )zoLoad the content of an entry as bytes.

    Used only for small checks (not to dump content into archive).
    z6Invalid content type. Must be str, Path or bytes. Got ┌.N)rc   r.   r   ┌
read_bytesr1   r   ┌type)rS   r!   r!   r"   rN   *  s
    
rN   )┌
entry_namer   c                 C   sh   d| а dбd  tvr$td| Ы ЭГВd| v r<td| Ы dЭГВ| аdб} | аdбdkrdtd| Ы dЭГВ| S )	Nrg   щ    zFile type not allowed: ·\z0Entry names must use UNIX separators ('/'). Got ·/щ   z-DDUF only supports 1 level of directory. Got )┌splitrZ   r   ┌strip┌count)rj   r!   r!   r"   r=   7  s    
r=   )rH   ┌entry_namesr   c                    sА   t | tГstdt| ГЫ dЭГВddД И D Г}|D ]JЙИ| vrLtdИЫ dЭГВtЗ ЗfddДtD ГГs0td	ИЫ d
tЫ dЭГВq0dS )aЭ  
    Consistency checks on the DDUF file structure.

    Rules:
    - The 'model_index.json' entry is required and must contain a dictionary.
    - Each folder name must correspond to an entry in 'model_index.json'.
    - Each folder must contain at least a config file ('config.json', 'tokenizer_config.json', 'preprocessor_config.json', 'scheduler_config.json').

    Args:
        index (Any):
            The content of the 'model_index.json' entry.
        entry_names (Iterable[str]):
            The list of entry names in the DDUF file.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).
    z>Invalid 'model_index.json' content. Must be a dictionary. Got rg   c                 S   s"   h | ]}d |v r|а d бd ТqS )rm   r   )ro   )┌.0┌entryr!   r!   r"   ┌	<setcomp>W  є    z+_validate_dduf_structure.<locals>.<setcomp>zMissing required entry 'z' in 'model_index.json'.c                 3   s    | ]}ИЫ d |Ы ЭИ v V  qdS )rm   Nr!   )rs   Zrequired_entryйrr   ┌folderr!   r"   ┌	<genexpr>[  rv   z+_validate_dduf_structure.<locals>.<genexpr>z!Missing required file in folder 'z!'. Must contains at least one of N)rc   ┌dictr   ri   ┌any┌DDUF_FOLDER_REQUIRED_ENTRIES)rH   rr   Zdduf_foldersr!   rw   r"   rB   B  s    
 rB   )rF   r6   r   c                 C   sА   | j du rtdГВ|j}| j а|б | j аdб}t|Гdk rDtdГВtа|ddЕ dб}tа|ddЕ dб}|d | | }|S )a1  
    Calculate the data offset for a file in a ZIP archive.

    Args:
        zf (`zipfile.ZipFile`):
            The opened ZIP file. Must be opened in read mode.
        info (`zipfile.ZipInfo`):
            The file info.

    Returns:
        int: The offset of the file data in the ZIP archive.
    Nz+ZipFile object must be opened in read mode.щ   zIncomplete local file header.щ   щ   ┌little)┌fpr   ┌header_offsetr&   r'   rD   r0   ┌
from_bytes)rF   r6   rВ   Zlocal_file_headerZfilename_lenZextra_field_lenZdata_offsetr!   r!   r"   r>   a  s    
r>   ),r@   ┌loggingr   ┌osrd   r7   ┌
contextlibr   ┌dataclassesr   r   ┌pathlibr   ┌typingr   r   r   r	   r
   r   ┌errorsr   r   r   ┌	getLoggerr*   r5   rZ   r|   r   ┌PathLiker.   rI   r1   rT   ra   r8   rP   rN   r=   rB   ┌ZipInfor0   r>   r!   r!   r!   r"   ┌<module>   s2    
	7"F$■[&$"