a
    h45                     @   s   d dl mZmZ d dlmZ ddlmZmZ ddlm	Z	 ddl
mZmZ G dd	 d	eZe G d
d deZe G dd deZeedZdS )    )ABCabstractmethod)deque   )attach_tracertraced   )PagedAttentionCache)RequestStateRequestStatusc                   @   s   e Zd ZdZdeedddZeedddZ	ee
ee d	d
dZeedddZedeedddZeeee
 dddZdS )	Schedulerz
    Abstract base class for scheduling requests in the continuous batch processor.
    It is expected that cache allocation and scheduling logic will be implemented in subclasses.
    F)cacheretain_cache_on_finishc                 C   s$   i | _ i | _t | _|| _|| _d S N)active_requestswaiting_requestsr   waiting_requests_orderr   r   )selfr   r    r   q/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/generation/continuous_batching/scheduler.py__init__   s
    zScheduler.__init__statec                 C   s   dS z"Add a request to the waiting list.Nr   )r   r   r   r   r   add_waiting_request$   s    zScheduler.add_waiting_requesttoken_budgetreturnc                 C   s   d S r   r   )r   r   r   r   r   schedule_batch)   s    zScheduler.schedule_batch)r   c                 C   s   t | jpt | jS )z2Check if there are requests ready to be processed.)lenr   r   )r   r   r   r   has_pending_requests-   s    zScheduler.has_pending_requestsT
request_idevict_from_cachec                 C   s   dS )z:Finish processing a request and free its allocated blocks.Nr   r   r"   r#   r   r   r   finish_request2   s    zScheduler.finish_request)r"   r   c                 C   s   || j v r| j | jS g S r   )r   Zstatic_outputs)r   r"   r   r   r   !get_active_request_static_outputs7   s    
z+Scheduler.get_active_request_static_outputsN)F)T)__name__
__module____qualname____doc__r	   boolr   r   r
   r   intlistr   r   r    strr%   r&   r   r   r   r   r      s   r   c                       s   e Zd Zdeeed fddZeee	dddZ
ed	d
ee	ee dddZeedddZee	ee dddZedeedddZ  ZS )FIFOSchedulerF        )r   r   safety_marginc                    s   t  || || _d S r   )superr   r1   )r   r   r   r1   	__class__r   r   r   @   s    zFIFOScheduler.__init__r   len_next_tokensc                 C   st   |  }t|j| jj | }||k s4t|jdkrp|| d | jj d }| j||j}|sddS |j| dS Nr   r   FTcurrent_lenr   allocated_blocksr   
block_sizeZallocate_blocksr"   extendr   r   r6   r9   Z	occupancyZblocks_neededZ	allocatedr   r   r   _allocate_blocks_if_neededD   s    z(FIFOScheduler._allocate_blocks_if_neededprepare_requestZ	span_namer   r   "request_ids_to_remove_from_waitingc                 C   s   |j tjkr|jn|j}t||k rv|j tjkrR|| j|j< tj	|_ |
|j q|j tjkrtj	|_ |j|_g |_n^|j tjkr|| j|j< tj|_ |
|j n|j tjkrtj|_ ||d |_|d| |_dS z6Prepare a request for processing in the current batch.Nstatusr   SPLIT_PENDING_REMAINDERZremaining_prompt_ids
prompt_idsr   ZPENDINGr   r"   Z
PREFILLINGaddZPREFILLING_SPLITr   r   r   rB   Zrequest_tokensr   r   r   _prepare_request_for_processingR   s&    z-FIFOScheduler._prepare_request_for_processingr   c                 C   sd   | j rF|j| jv rF| j|j}|jt|jd |_|j|_|j|_|| j	|j< | j
|j dS r   r   r"   r   poprG   r   Zfull_prompt_idsr:   Zposition_offsetr   r   appendr   r   	old_stater   r   r   r   o   s    z!FIFOScheduler.add_waiting_requestr   c                    sj  g }g }g j  D ]0}|jtjkr0|| |jtjkr|| qjD ]}|j|  qN|| }t	  j
jj }|D ]}j }||k }	|	rr|jtjkr qL||  t|j}
|t|jstjjdkr qLqttdfdd}|| ||
8 }ttd fdd}|| |dkr qLqt fddjD _S )	Nr   r   c                    s     |  d S r   rM   r   scheduled_requestsr   r   _add_to_scheduled_requests   s    z@FIFOScheduler.schedule_batch.<locals>._add_to_scheduled_requestsc                    s&   | j }|jv r"j|=  | d S r   r"   r   rH   r   req_idrB   r   r   r   _remove_from_waiting_requests   s    
zCFIFOScheduler.schedule_batch.<locals>._remove_from_waiting_requestsc                    s   g | ]}| vr|qS r   r   .0rV   rB   r   r   
<listcomp>       z0FIFOScheduler.schedule_batch.<locals>.<listcomp>)r   valuesrE   r   DECODINGrM   rF   r   r   setr1   r   Z
num_blocksZget_num_free_blocksrJ   r   rG   r>   _free_blocksr   r
   r   )r   r   priority_statessecond_priority_statesr   rV   
candidatesZsafety_marginsZnum_free_blocksZoutside_safety_marginrequest_lenrS   rX   r   rB   rR   r   r   r   z   sN    




zFIFOScheduler.schedule_batchTr!   c                 C   s&   |r"| j | || jv r"| j|= d S r   r   Zfree_blocksr   r$   r   r   r   r%      s    
zFIFOScheduler.finish_request)Fr0   )T)r'   r(   r)   r	   r+   floatr   r   r
   r,   r>   r`   r.   rJ   r   r-   r   r%   __classcell__r   r   r3   r   r/   >   s   

=r/   c                   @   s   e Zd ZeeedddZeddeeee dddZ	eed	d
dZ
eeee dddZedeedddZdS )PrefillFirstSchedulerr5   c                 C   st   |  }t|j| jj | }||k s4t|jdkrp|| d | jj d }| j||j}|sddS |j| dS r7   r8   r=   r   r   r   r>      s    z0PrefillFirstScheduler._allocate_blocks_if_neededr?   r@   rA   c                 C   s   |j tjkr|jn|j}t||k rv|j tjkrR|| j|j< tj	|_ |
|j q|j tjkrtj	|_ |j|_g |_n^|j tjkr|| j|j< tj|_ |
|j n|j tjkrtj|_ ||d |_|d| |_dS rC   rD   rI   r   r   r   rJ      s&    z5PrefillFirstScheduler._prepare_request_for_processingr   c                 C   sd   | j rF|j| jv rF| j|j}|jt|jd |_|j|_|j|_|| j	|j< | j
|j dS r   rK   rN   r   r   r   r      s    z)PrefillFirstScheduler.add_waiting_requestr   c           
         s2  g }g }g j  D ]2}|jtjkr2|| q|jtjkr|| qjD ]}|j|  qP|| }t	  |D ]}
||  t|j}|t|jstjjdkrx qqxttdfdd}|| ||8 }ttd fdd}	|	| |dkrx qqxt fddjD _S )	Nr   r   c                    s     |  d S r   rP   r   rQ   r   r   rS     s    zHPrefillFirstScheduler.schedule_batch.<locals>._add_to_scheduled_requestsc                    s&   | j }|jv r"j|=  | d S r   rT   rU   rW   r   r   rX     s    
zKPrefillFirstScheduler.schedule_batch.<locals>._remove_from_waiting_requestsc                    s   g | ]}| vr|qS r   r   rY   r[   r   r   r\   *  r]   z8PrefillFirstScheduler.schedule_batch.<locals>.<listcomp>)r   r^   rE   r   rF   rM   r_   r   r   r`   rJ   r   rG   r>   r   ra   r   r
   r   )
r   r   rb   rc   r   rV   rd   re   rS   rX   r   rf   r   r      sD    


z$PrefillFirstScheduler.schedule_batchTr!   c                 C   s&   |r"| j | || jv r"| j|= d S r   rg   r$   r   r   r   r%   /  s    
z$PrefillFirstScheduler.finish_requestN)T)r'   r(   r)   r   r
   r,   r>   r`   r.   rJ   r   r-   r   r+   r%   r   r   r   r   rj      s   

6rj   )ZfifoZprefill_firstN)abcr   r   collectionsr   Zutils.metricsr   r   r   r	   classesr
   r   r   r/   rj   ZSCHEDULER_MAPPINGr   r   r   r   <module>   s   ' w