a
    h"                     @   s   d dl Z d dlmZmZ d dlmZ d dlmZ d dlZddl	m
Z
 ddlmZ e
dZee
j eeejeeef d	d
dZG dd deZeG dd dZeG dd dZdS )    N)	dataclassfield)Enum)Optional   )logging)tracedZContinuousBatchingLoggerreturnc                  C   s   t j rPt d} t j  t j  t j| j}t j| }t j	| }nVt j
j rt j
j rt d} t j }|t j  }d}nt d} d }d}d}| |||fS )Ncudampsr   cpu)torchr   Zis_availabledeviceZempty_cacheZsynchronizeZget_device_propertiestotal_memoryZmemory_reservedZmemory_allocatedbackendsr   Zis_builtZdriver_allocated_memoryZrecommended_max_memory)r   r   Zreserved_memoryZallocated_memory r   o/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/generation/continuous_batching/classes.pyget_device_and_memory_breakdown   s"    






r   c                   @   s,   e Zd ZdZdZdZdZdZdZdZ	dZ
d	S )
RequestStatusz5Status of a generation request through its lifecycle.pendingZ
prefillingZprefilling_splitZsplit_pending_remainderZdecodingfinishedfailedN)__name__
__module____qualname____doc__PENDINGZ
PREFILLINGZPREFILLING_SPLITZSPLIT_PENDING_REMAINDERDECODINGFINISHEDZFAILEDr   r   r   r   r   6   s   r   c                   @   s   e Zd ZU dZeed< eedZee	 ed< eedZ
ee	 ed< eedZee ed< dZee ed< ejZeed	< eejdZeed
< ee	dZee	 ed< dS )GenerationOutputay  Tracks the output of a generation request.

    Attributes:
        request_id (str): The ID of the generation request.
        prompt_ids (list[int]): The IDs of the prompt tokens.
        generated_tokens (list[int]): The generated tokens.
        logprobs (list[float]): The log probabilities of the generated tokens.
        error (Optional[str]): Any error message associated with the request. When None, the request was successful.
        status (RequestStatus): The status of the request.
        created_time (float): The time the request was created.
        next_token (Optional[int]): The next token to be generated.
    
request_iddefault_factory
prompt_idsgenerated_tokenslogprobsNerrorstatuscreated_time
next_token)r   r   r   r   str__annotations__r   listr$   intr%   r&   floatr'   r   r   r   r(   timer)   r*   r   r   r   r   r    B   s   
r    c                   @   sl  e Zd ZU dZeed< dZeee	  ed< dZ
eee	  ed< eedZee	 ed< eedZee	 ed< eedZee	 ed	< d
Ze	ed< ejZeed< dZe	ed< dZe	ed< eejdZeed< dZee ed< dZee ed< dZeeef ed< eedddZejedddZdd Ze	dddZ e	ddd Z!e"e	e#d!d"d#Z$d$d% Z%d&d' Z&dS )(RequestStatea  Tracks the state of a generation request through its lifecycle.

    Attributes:
        request_id (str): The ID of the generation request.
        full_prompt_ids (list[int] | None): The tokens IDs of the full prompt.
        prompt_ids (list[int] | None): The tokens IDs currently being processed.
        remaining_prompt_ids (list[int]): The tokens IDs remaining to be processed (for split requests).
        static_outputs (list[int]): The generated tokens.
        allocated_blocks (list[int]): The identifiers of the allocated blocks to the request.
        position_offset (int): The current position in the sequence for position_ids.
        status (RequestStatus): The status of the request: can be one of PENDING, PREFILLING, PREFILLING_SPLIT,
                                SPLIT_PENDING_REMAINDER, DECODING, FINISHED, FAILED
        max_new_tokens (int): The maximum number of new tokens to generate.
        eos_token_id (int): The ID of the end-of-sequence token.
        created_time (float): The time the request was created.
        error (Optional[str]): Any error message associated with the request. When None, has had no error yet.
        next_token (Optional[str]): The next token to be generated.
    r!   Nfull_prompt_idsr$   r"   remaining_prompt_idsstatic_outputsallocated_blocksr   position_offset_status   max_new_tokenseos_token_idr)   r'   r*   )r:   r:   lifespanr	   c                 C   s   | j S )N)r7   selfr   r   r   r(      s    zRequestState.status)valuec                 C   sL   | j tjkrt df| _n&|tjkrB| jd t f| _|   || _ d S )Nr:   r   )r7   r   r   r0   r<   r   log_end_of_request)r>   r?   r   r   r   r(      s    
c                 C   s`   t | j}|  }| jd | j }| jd | j }td| j d|d|d|d|
 d S )Nr      zRequest z finished: prefill_len = z decode_len = z start_time = z end_time = )lenr2   generated_lenr<   r)   loggerinfor!   )r>   Zprefill_lenZ
decode_len
start_timeZend_timer   r   r   r@      s    
"zRequestState.log_end_of_requestc                 C   s   | j S )zCGet the current length of the sequence (prompt + generated tokens).)r6   r=   r   r   r   current_len   s    zRequestState.current_lenc                 C   s
   t | jS )z*Get the number of tokens generated so far.)rB   r4   r=   r   r   r   rC      s    zRequestState.generated_len)token_idr
   c                 C   s`   | j tjkrdS || jko"| jdk}|  | jk}|r:|rH| j|g |sP|r\tj| _ dS dS )zUpdate the request with a newly generated token and check for completion.

        Args:
            token_id: The token ID to add to the output sequence

        Returns:
            bool: True if the request is now complete, False otherwise
        Fr:   T)	r(   r   r   r;   rC   r9   r4   extendr   )r>   rH   Zis_eosZ
is_max_lenr   r   r   update_with_token   s    zRequestState.update_with_tokenc              
   C   s~   d| j  d| j d|   dt| j dt| j d| j dt| j d| j d	| j	 g	}d
d
| d S )Nzrequest_id=zstatus=zout_tokens=zquery_length=zremaining_tokens=z
kv_length=zfull_prompt_length=zallocated_blocks=zgenerated_tokens=zRequestState(
	z,
	z
))r!   r7   rC   rB   r$   r3   r6   r2   r5   r4   join)r>   msgr   r   r   __repr__   s    




zRequestState.__repr__c              	   C   s"   t | j| j| j| jg | j| jdS )z7Convert the request state to a GenerationOutput object.)r!   r$   r(   r%   r&   r'   r*   )r    r!   r2   r(   r4   r'   r*   r=   r   r   r   to_generation_output   s    z!RequestState.to_generation_output)'r   r   r   r   r+   r,   r2   r   r-   r.   r$   r   r3   r4   r5   r6   r   r   r7   r9   r;   r0   r)   r/   r'   r*   r<   tuplepropertyr(   setterr@   rG   rC   r   boolrJ   rM   rN   r   r   r   r   r1   [   s4   
	r1   )r0   dataclassesr   r   enumr   typingr   r   Zutils.loggingr   Zutils.metricsr   	getLoggerrD   setLevelINFOstaticmethodrO   r   r.   r   r   r    r1   r   r   r   r   <module>   s   
