a
    h@<                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZmZ ddlmZmZmZ eeZh dZh d	Ze	G d
d dZeejef eeef dddZ eeejf eeeeeee!f f  ddddZ"eeejf eeejf ddddZ#ej$eeeeje!f ddddZ%eeee!f e!dddZ&eedddZ'eee dddd Z(ej$ej)e*d!d"d#Z+dS )$    N)contextmanager)	dataclassfield)Path)AnyDict	GeneratorIterableTupleUnion   )DDUFCorruptedFileErrorDDUFExportErrorDDUFInvalidEntryNameError>   z.txtz.jsonz.modelz.safetensors>   zpreprocessor_config.jsonztokenizer_config.jsonzconfig.jsonzscheduler_config.jsonc                   @   sj   e Zd ZU dZeed< eed< eed< eddZe	ed< e
eeddf d	d
dZdeedddZdS )	DDUFEntrya  Object representing a file entry in a DDUF file.

    See [`read_dduf_file`] for how to read a DDUF file.

    Attributes:
        filename (str):
            The name of the file in the DDUF archive.
        offset (int):
            The offset of the file in the DDUF archive.
        length (int):
            The length of the file in the DDUF archive.
        dduf_path (str):
            The path to the DDUF archive (for internal use).
    filenamelengthoffsetF)repr	dduf_pathNreturnc              	   c   s~   | j d^}tj| dtjd(}|| j| j| j  V  W d   n1 sR0    Y  W d   n1 sp0    Y  dS )a-  Open the file as a memory-mapped file.

        Useful to load safetensors directly from the file.

        Example:
            ```py
            >>> import safetensors.torch
            >>> with entry.as_mmap() as mm:
            ...     tensors = safetensors.torch.load(mm)
            ```
        rbr   )r   accessN)r   openmmapfilenoACCESS_READr   r   )selffmm r!   _/var/www/html/assistant/venv/lib/python3.9/site-packages/huggingface_hub/serialization/_dduf.pyas_mmap9   s    zDDUFEntry.as_mmaputf-8)encodingr   c                 C   sP   | j d0}|| j || jj|dW  d   S 1 sB0    Y  dS )zRead the file as text.

        Useful for '.txt' and '.json' entries.

        Example:
            ```py
            >>> import json
            >>> index = json.loads(entry.read_text())
            ```
        r   )r%   N)r   r   seekr   readr   decode)r   r%   r   r!   r!   r"   	read_textJ   s    zDDUFEntry.read_text)r$   )__name__
__module____qualname____doc__str__annotations__intr   r   r   r   r   bytesr#   r)   r!   r!   r!   r"   r   "   s   
r   )r   r   c                 C   s:  i }t | } td|   tt| d}| D ]}td|j  |j	tj
kr`tdzt|j W n6 ty } ztd|j |W Y d}~n
d}~0 0 t||}t|j||j| d||j< q6W d   n1 s0    Y  d|vrtd	t|d  }t||  td
|  dt| d |S )a  
    Read a DDUF file and return a dictionary of entries.

    Only the metadata is read, the data is not loaded in memory.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to read.

    Returns:
        `Dict[str, DDUFEntry]`:
            A dictionary of [`DDUFEntry`] indexed by filename.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).

    Example:
        ```python
        >>> import json
        >>> import safetensors.torch
        >>> from huggingface_hub import read_dduf_file

        # Read DDUF metadata
        >>> dduf_entries = read_dduf_file("FLUX.1-dev.dduf")

        # Returns a mapping filename <> DDUFEntry
        >>> dduf_entries["model_index.json"]
        DDUFEntry(filename='model_index.json', offset=66, length=587)

        # Load model index as JSON
        >>> json.loads(dduf_entries["model_index.json"].read_text())
        {'_class_name': 'FluxPipeline', '_diffusers_version': '0.32.0.dev0', '_name_or_path': 'black-forest-labs/FLUX.1-dev', ...

        # Load VAE weights using safetensors
        >>> with dduf_entries["vae/diffusion_pytorch_model.safetensors"].as_mmap() as mm:
        ...     state_dict = safetensors.torch.load(mm)
        ```
    zReading DDUF file rzReading entry z)Data must not be compressed in DDUF file.z!Invalid entry name in DDUF file: N)r   r   r   r   model_index.json7Missing required 'model_index.json' entry in DDUF file.zDone reading DDUF file z. Found z entries)r   loggerinfozipfileZipFiler.   infolistdebugr   compress_type
ZIP_STOREDr   _validate_dduf_entry_namer   _get_data_offsetr   	file_sizejsonloadsr)   _validate_dduf_structurekeyslen)r   entrieszfr6   er   indexr!   r!   r"   read_dduf_fileZ   s,    '(
,rI   )r   rE   r   c                 C   s  t d|  d t }d}tt| dtj}|D ]\}}||v rTtd| || |dkrzt	
t| }W n0 t	jy } ztd|W Y d}~n
d}~0 0 zt|}W n4 ty } ztd| |W Y d}~n
d}~0 0 t d	| d
 t||| q6W d   n1 s&0    Y  |du rBtdzt|| W n0 ty } ztd|W Y d}~n
d}~0 0 t d|   dS )a  Write a DDUF file from an iterable of entries.

    This is a lower-level helper than [`export_folder_as_dduf`] that allows more flexibility when serializing data.
    In particular, you don't need to save the data on disk before exporting it in the DDUF file.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        entries (`Iterable[Tuple[str, Union[str, Path, bytes]]]`):
            An iterable of entries to write in the DDUF file. Each entry is a tuple with the filename and the content.
            The filename should be the path to the file in the DDUF archive.
            The content can be a string or a pathlib.Path representing a path to a file on the local disk or directly the content as bytes.

    Raises:
        - [`DDUFExportError`]: If anything goes wrong during the export (e.g. invalid entry name, missing 'model_index.json', etc.).

    Example:
        ```python
        # Export specific files from the local disk.
        >>> from huggingface_hub import export_entries_as_dduf
        >>> export_entries_as_dduf(
        ...     dduf_path="stable-diffusion-v1-4-FP16.dduf",
        ...     entries=[ # List entries to add to the DDUF file (here, only FP16 weights)
        ...         ("model_index.json", "path/to/model_index.json"),
        ...         ("vae/config.json", "path/to/vae/config.json"),
        ...         ("vae/diffusion_pytorch_model.fp16.safetensors", "path/to/vae/diffusion_pytorch_model.fp16.safetensors"),
        ...         ("text_encoder/config.json", "path/to/text_encoder/config.json"),
        ...         ("text_encoder/model.fp16.safetensors", "path/to/text_encoder/model.fp16.safetensors"),
        ...         # ... add more entries here
        ...     ]
        ... )
        ```

        ```python
        # Export state_dicts one by one from a loaded pipeline
        >>> from diffusers import DiffusionPipeline
        >>> from typing import Generator, Tuple
        >>> import safetensors.torch
        >>> from huggingface_hub import export_entries_as_dduf
        >>> pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
        ... # ... do some work with the pipeline

        >>> def as_entries(pipe: DiffusionPipeline) -> Generator[Tuple[str, bytes], None, None]:
        ...     # Build an generator that yields the entries to add to the DDUF file.
        ...     # The first element of the tuple is the filename in the DDUF archive (must use UNIX separator!). The second element is the content of the file.
        ...     # Entries will be evaluated lazily when the DDUF file is created (only 1 entry is loaded in memory at a time)
        ...     yield "vae/config.json", pipe.vae.to_json_string().encode()
        ...     yield "vae/diffusion_pytorch_model.safetensors", safetensors.torch.save(pipe.vae.state_dict())
        ...     yield "text_encoder/config.json", pipe.text_encoder.config.to_json_string().encode()
        ...     yield "text_encoder/model.safetensors", safetensors.torch.save(pipe.text_encoder.state_dict())
        ...     # ... add more entries here

        >>> export_entries_as_dduf(dduf_path="stable-diffusion-v1-4.dduf", entries=as_entries(pipe))
        ```
    zExporting DDUF file ''NwzCan't add duplicate entry: r3   z#Failed to parse 'model_index.json'.zInvalid entry name: zAdding entry 'z' to DDUF filer4   zInvalid DDUF file structure.zDone writing DDUF file )r5   r6   setr7   r8   r.   r<   r   addr@   rA   _load_contentr(   JSONDecodeErrorr=   r   r:   _dump_content_in_archiverB   r   )r   rE   	filenamesrH   archiver   contentrG   r!   r!   r"   export_entries_as_dduf   s4    :
 &.
 rT   )r   folder_pathr   c                    s6   t   tttt f  d fdd}t| |  dS )a  
    Export a folder as a DDUF file.

    AUses [`export_entries_as_dduf`] under the hood.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        folder_path (`str` or `os.PathLike`):
            The path to the folder containing the diffusion model.

    Example:
        ```python
        >>> from huggingface_hub import export_folder_as_dduf
        >>> export_folder_as_dduf(dduf_path="FLUX.1-dev.dduf", folder_path="path/to/FLUX.1-dev")
        ```
    r   c                  3   sz   t  dD ]f} |  sq| jtvr:td|  d q|  }t|j	dkrftd|  d q|
 | fV  qd S )Nz**/*zSkipping file 'z' (file type not allowed)   z"' (nested directories not allowed))r   globis_filesuffixDDUF_ALLOWED_ENTRIESr5   r:   relative_torD   partsas_posix)pathZpath_in_archiverU   r!   r"   _iterate_over_folder  s    

z3export_folder_as_dduf.<locals>._iterate_over_folderN)r   r	   r
   r.   rT   )r   rU   r`   r!   r_   r"   export_folder_as_dduf   s    ra   )rR   r   rS   r   c              	   C   s   | j |ddd}t|ttfrbt|}| d}t||d W d    q1 sV0    Y  n&t|trx|| ntd| dW d    n1 s0    Y  d S )NrK   T)force_zip64r   i   zInvalid content type for z. Must be str, Path or bytes.)	r   
isinstancer.   r   shutilcopyfileobjr1   writer   )rR   r   rS   Z
archive_fhZcontent_pathZ
content_fhr!   r!   r"   rP     s    .
rP   )rS   r   c                 C   s@   t | ttfrt|  S t | tr(| S tdt|  ddS )zoLoad the content of an entry as bytes.

    Used only for small checks (not to dump content into archive).
    z6Invalid content type. Must be str, Path or bytes. Got .N)rc   r.   r   
read_bytesr1   r   type)rS   r!   r!   r"   rN   *  s
    
rN   )
entry_namer   c                 C   sh   d|  dd  tvr$td|  d| v r<td|  d| d} | ddkrdtd|  d| S )	Nrg   zFile type not allowed: \z0Entry names must use UNIX separators ('/'). Got /   z-DDUF only supports 1 level of directory. Got )splitrZ   r   stripcount)rj   r!   r!   r"   r=   7  s    
r=   )rH   entry_namesr   c                    s   t | tstdt|  ddd  D }|D ]J| vrLtd dt fddtD s0td	 d
t dq0dS )a  
    Consistency checks on the DDUF file structure.

    Rules:
    - The 'model_index.json' entry is required and must contain a dictionary.
    - Each folder name must correspond to an entry in 'model_index.json'.
    - Each folder must contain at least a config file ('config.json', 'tokenizer_config.json', 'preprocessor_config.json', 'scheduler_config.json').

    Args:
        index (Any):
            The content of the 'model_index.json' entry.
        entry_names (Iterable[str]):
            The list of entry names in the DDUF file.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).
    z>Invalid 'model_index.json' content. Must be a dictionary. Got rg   c                 S   s"   h | ]}d |v r| d d qS )rm   r   )ro   ).0entryr!   r!   r"   	<setcomp>W      z+_validate_dduf_structure.<locals>.<setcomp>zMissing required entry 'z' in 'model_index.json'.c                 3   s    | ]} d |  v V  qdS )rm   Nr!   )rs   Zrequired_entryrr   folderr!   r"   	<genexpr>[  rv   z+_validate_dduf_structure.<locals>.<genexpr>z!Missing required file in folder 'z!'. Must contains at least one of N)rc   dictr   ri   anyDDUF_FOLDER_REQUIRED_ENTRIES)rH   rr   Zdduf_foldersr!   rw   r"   rB   B  s    
rB   )rF   r6   r   c                 C   s   | j du rtd|j}| j | | j d}t|dk rDtdt|dd d}t|dd d}|d | | }|S )a1  
    Calculate the data offset for a file in a ZIP archive.

    Args:
        zf (`zipfile.ZipFile`):
            The opened ZIP file. Must be opened in read mode.
        info (`zipfile.ZipInfo`):
            The file info.

    Returns:
        int: The offset of the file data in the ZIP archive.
    Nz+ZipFile object must be opened in read mode.   zIncomplete local file header.      little)fpr   header_offsetr&   r'   rD   r0   
from_bytes)rF   r6   r   Zlocal_file_headerZfilename_lenZextra_field_lenZdata_offsetr!   r!   r"   r>   a  s    
r>   ),r@   loggingr   osrd   r7   
contextlibr   dataclassesr   r   pathlibr   typingr   r   r   r	   r
   r   errorsr   r   r   	getLoggerr*   r5   rZ   r|   r   PathLiker.   rI   r1   rT   ra   r8   rP   rN   r=   rB   ZipInfor0   r>   r!   r!   r!   r"   <module>   s2    
	7"F$[&$"