a
    ht=                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	 d dl
Z
G dd deZz(d dlmZ d dlmZmZmZ dZW n ey   d	ZY n0 dd
dZddd	ddeeeeee	eeegef f f   dddZeeZe G dd dZdS )    N)Enum)AnyCallableOptionalUnionc                   @   s,   e Zd ZdZdZdZdZdZdZdZ	dZ
d	S )
RequestStatusz5Status of a generation request through its lifecycle.pendingZ
prefillingZprefilling_splitZsplit_pending_remainderZdecodingfinishedfailedN)__name__
__module____qualname____doc__ZPENDING
PREFILLINGPREFILLING_SPLITZSPLIT_PENDING_REMAINDERDECODINGFINISHEDZFAILED r   r   V/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/utils/metrics.pyr   
   s   r   )metrics)Status
StatusCode
get_tracerTFc                    s   t sdd S  fdd}|S )a  
    Decorator that attaches a tracer to a class.

    This decorator should be applied to classes that need OpenTelemetry tracing.
    It adds a tracer attribute to the class instance that can be used by the traced decorator.

    Args:
        tracer_name_template: Optional template string for the tracer name.
            If provided, it should contain {module} which will be replaced with the class's full module path
            and {class_name} for the class name.
            If None, a default naming scheme will be used where:
              - If the module already starts with "transformers.", it will use that directly
              - Otherwise, it will prepend "transformers." to the module name

    Returns:
        Class decorator function
    c                 S   s   | S )Nr   )clsr   r   r   <lambda>2       zattach_tracer.<locals>.<lambda>c                    s*    j t fdd}| _  S )Nc                    sr   | g|R i |  j } j}d u rV|drD| d| }qdd| d| }nj||d}t|| _d S )Ntransformers..)module
class_name)r   r   
startswithformatr   tracer)selfargskwargsmodule_namer   Ztracer_name)r   original_inittracer_name_templater   r   init_with_tracer7   s    
z:attach_tracer.<locals>.decorator.<locals>.init_with_tracer)__init__	functoolswraps)r   r)   r(   )r   r'   r   	decorator4   s
    z attach_tracer.<locals>.decorator)_has_opentelemetry)r(   r.   r   r-   r   attach_tracer   s    r0   )	span_name
standaloneadditional_attributes)r3   c                   s$    fdd}| du r|S || S )a  
    Decorator to trace function calls with OpenTelemetry.

    Can be used as @traced or @traced(span_name="custom_name")

    Args:
        func: The function to trace
        span_name: Optional custom name for the span (defaults to function name)
        standalone: If True, creates a parentless span
        additional_attributes: Optional list of additional attributes to set on the span.
          Each item is a tuple of (instance_attribute_name, span_attribute_key, value_or_transform_function)
          where:
            - instance_attribute_name: Name of the attribute to get from the class instance
            - span_attribute_key: Key to use when setting the attribute on the span
            - value_or_transform_function: Either a raw value to use directly, or a function to transform
              the attribute value before setting it on the span

    Returns:
        Decorated function with tracing
    c                    s0   t s S dd l}|  fdd}|S )Nr   c                     s\  | r t dr jd ur | d nd }|d u}|rBt |drB|j}ntdj dj }pbj}rn|jn|j}||}|dj |dj |d| | rt	| D ]T\}}	t
|	ttttfs|	d u r|d	| t|	 q|d	| tt|	 q|rv| D ]Z\}
}t
|ttttfs@|d u rX|d
|
 t| n|d
|
 tt| q r|r҈ D ]J}|\}}}t ||rt||}t|r||}n|}||| qz"| i |}|W W  d    S  ty6 } z(|ttj ||  W Y d }~n
d }~0 0 W d    n1 sN0    Y  d S )N__self__r   r"   r   r   zfunction.namezfunction.modulezfunction.is_methodzargs.zkwargs.)hasattrr4   r"   r   r   r   Z
start_spanZstart_as_current_spanZset_attribute	enumerate
isinstancestrintfloatbooltypeitemsgetattrcallable	ExceptionZ
set_statusr   r   ERRORZrecord_exception)r$   r%   instanceZ	is_methodr"   nameZspan_fnspaniargkeyvalueZattr_configZinstance_attribute_nameZspan_attribute_keyZvalue_or_transform_functionZattribute_valueZtransformed_valueresulte)r3   funcr1   r2   r   r   wrapperp   sJ    $





z*traced.<locals>.decorator.<locals>.wrapper)r/   r+   r,   )rK   r+   rL   r3   r1   r2   )rK   r   r.   j   s    0ztraced.<locals>.decoratorNr   )rK   r1   r2   r3   r.   r   rM   r   tracedN   s    9rN   c                   @   s   e Zd ZdZedddZdd Zeee	ddd	d
Z
eeddddZeddddZeeeddddZeee	ddddZdS )ContinuousBatchProcessorMetricsz0Metrics collection for ContinuousBatchProcessor.)max_batch_tokensc                 C   s   || _ |   dS )zInitialize metrics for continuous batch processor.

        Args:
            max_batch_tokens: Maximum number of tokens in a batch
        N)rP   _setup_metrics)r#   rP   r   r   r   r*      s    z(ContinuousBatchProcessorMetrics.__init__c                 C   s  t std dS td| _g d}| jjddd|d| _| jjd	d
dd| _	| jjdddd| _
g d}| jjddd|d| _| jjdddd| _| jjdddd| _| jjdddd| _g d}| jjddd|d| _| jjddd d| _| jjd!d"d d| _dS )#zIInitialize OpenTelemetry metrics and tracing if the library is available.zIOpenTelemetry is not installed. Metrics and tracing will not be recorded.Nz2transformers.generation.continuous_batch_processor)
      2   K   d         i,    i        '  Zttft_millisecondsz#Time to first token in millisecondsms)rC   descriptionunitZ#explicit_bucket_boundaries_advisoryZactive_requests_countz3Number of active requests currently being processedrequests)rC   r_   r`   Zwaiting_requests_countz*Number of requests waiting to be processed)rT   rV      rY   rZ   r[   r\   r]   i N  i0u  i`  Zrequest_latency_millisecondsz9End-to-end latency for completed requests in millisecondsZdecode_prefill_ratioz3Ratio of decode tokens to prefill tokens in a batchratioZprefill_tokens_processedz"Number of prefill tokens processedtokensZdecode_tokens_processedz!Number of decode tokens processed)   rR         (   rT   <   F   P   Z   _   b   rV   Zbatch_fill_percentagez5Percentage of max_batch_tokens utilized in each batchpercentZkv_cache_free_memory_bytesz/Free memory of the PagedAttentionCache in bytesbytesZkv_cache_memory_bytesz0Memory usage of the PagedAttentionCache in bytes)r/   loggerinfor   Z	get_meterZmeterZcreate_histogramttft_histogramZcreate_gaugeactive_requests_gaugewaiting_requests_gaugerequest_latency_histogramdecode_prefill_ratio_gaugeZcreate_counterprefill_tokens_counterdecode_tokens_counterbatch_fill_percentage_histogramkv_cache_free_memory_gaugekv_cache_memory_gauge)r#   Zttft_bucketsZlatency_bucketsZbatch_fill_bucketsr   r   r   rQ      sx    
z.ContinuousBatchProcessorMetrics._setup_metricsN)created_time
request_idreturnc              
   C   s|   t sdS t | d }z*| j| td| d|dd W n4 tyv } ztd|  W Y d}~n
d}~0 0 dS )zRecord Time to First Token (TTFT).

        Args:
            created_time: The time the request was created
            request_id: The ID of the request
        N     @@zRecorded TTFT for request : .2fr^   zFailed to record TTFT metric: )r/   timers   recordrq   debugr@   warning)r#   r}   r~   Zttft_msrJ   r   r   r   record_ttft_metric
  s    z2ContinuousBatchProcessorMetrics.record_ttft_metric)requests_in_batchr   c           	      C   s(  t r|sdS d}d}|D ]:}|jtjkr2|d7 }q|jtjtjfv r|t|j7 }q|| }z|dkrr| j	| |dkr| j
	| |dkr|| }| j| || j d }| j| td| d| d|dd	| d
| j d W n6 ty" } ztd|  W Y d}~n
d}~0 0 dS )zRecord metrics about the batch composition including decode/prefill ratio and batch fill percentage.

        Args:
            requests_in_batch: List of request states in the current batch
        Nr      g      Y@zBatch metrics: z decode tokens, z prefill tokens, batch fill: r   z% (/)z Failed to record batch metrics: )r/   statusr   r   r   r   lenZ
prompt_idsrx   addry   rw   setrP   rz   r   rq   r   r@   r   )	r#   r   Zdecode_tokensZprefill_tokensstateZtotal_batch_tokensrc   Zfill_percentagerJ   r   r   r   record_batch_metrics  s>    
z4ContinuousBatchProcessorMetrics.record_batch_metrics)r   c                 C   s  t sdS z|jt|j }t|j}|jtjtjfv r:dnd}|| |j	 |j
 |j d | }|t|j |j	 |j
 |j d | }| j| | j| td|d dd| d|j d	||j d
 dd	 W n6 ty } ztd|  W Y d}~n
d}~0 0 dS )a&  Record memory usage of the PagedAttentionCache without GPU synchronization.

        This calculates the theoretical memory usage based on cache configuration
        and the number of blocks currently in use.

        Args:
            cache: The PagedAttentionCache object to measure
        N      zKV Cache memory: i   r   zMB, Used blocks: r   z (rV   z.1fz%)z*Failed to record KV cache memory metrics: )r/   Z
num_blocksr   Z_free_blocksZ	key_cacheZdtypetorchZfloat16Zbfloat16
block_sizeZnum_key_value_headsZhead_dimr|   r   r{   rq   r   r@   r   )r#   cacheZnum_used_blocksZ
num_layersZbytes_per_parameterZmemory_bytesZfree_memory_bytesrJ   r   r   r   record_kv_cache_memory_metricsF  s^    


z>ContinuousBatchProcessorMetrics.record_kv_cache_memory_metrics)active_requestswaiting_requestsr   c              
   C   sv   t sdS z4| j| | j| td| d| d W n4 typ } ztd|  W Y d}~n
d}~0 0 dS )zRecord metrics about active and waiting requests.

        Args:
            active_requests: Number of active requests
            waiting_requests: Number of waiting requests
        NzQueue metrics: z active requests, z waiting requestsz Failed to record queue metrics: )r/   rt   r   ru   rq   r   r@   r   )r#   r   r   rJ   r   r   r   record_queue_metrics|  s    z4ContinuousBatchProcessorMetrics.record_queue_metricsc              
   C   s|   t sdS t | d }z*| j| td| d|dd W n4 tyv } ztd|  W Y d}~n
d}~0 0 dS )zRecord metrics about a completed request.

        Args:
            created_time: The time the request was created
            request_id: The ID of the request
        Nr   z Recorded request completion for r   r   r^   z,Failed to record request completion metric: )r/   r   rv   r   rq   r   r@   r   )r#   r}   r~   Z
latency_msrJ   r   r   r   record_request_completion  s    z9ContinuousBatchProcessorMetrics.record_request_completion)r   r   r   r   r9   r*   rQ   rN   r:   r8   r   listr   r   r   r   r   r   r   r   rO      s   
Q(5rO   )N)N)r+   loggingr   enumr   typingr   r   r   r   r   r   Zopentelemetryr   Zopentelemetry.tracer   r   r   r/   ImportErrorr0   r   tupler8   rN   	getLoggerr   rq   rO   r   r   r   r   <module>   s0   

0 &Z
