a
    h	1                     @   s   d dl Z d dlmZ d dlZddlmZ ddlmZm	Z	 ddl
mZmZmZ e	eZG dd	 d	eZeed
dG dd deZdS )    N)Union   )TruncationStrategy)add_end_docstringslogging   )ArgumentHandlerChunkPipelinebuild_pipeline_init_argsc                   @   s    e Zd ZdZdd Zdd ZdS )%ZeroShotClassificationArgumentHandlerz
    Handles arguments for zero-shot for text classification by turning each possible label into an NLI
    premise/hypothesis pair.
    c                 C   s"   t |trdd |dD }|S )Nc                 S   s   g | ]}|  r|  qS  )strip.0labelr   r   k/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/pipelines/zero_shot_classification.py
<listcomp>       zGZeroShotClassificationArgumentHandler._parse_labels.<locals>.<listcomp>,)
isinstancestrsplit)selflabelsr   r   r   _parse_labels   s    
z3ZeroShotClassificationArgumentHandler._parse_labelsc                    s   t |dkst |dkr td |d  krBtd  dt|trR|g}g }|D ]| fdd|D  qZ||fS )Nr   z>You must include at least one label and at least one sequence.z"The provided hypothesis_template "z" was not able to be formatted with the target labels. Make sure the passed template includes formatting syntax such as {} where the label should go.c                    s   g | ]}  |gqS r   )formatr   hypothesis_templatesequencer   r   r   '   r   zBZeroShotClassificationArgumentHandler.__call__.<locals>.<listcomp>)len
ValueErrorr   r   r   extend)r   	sequencesr   r   sequence_pairsr   r   r   __call__   s    

z.ZeroShotClassificationArgumentHandler.__call__N)__name__
__module____qualname____doc__r   r$   r   r   r   r   r      s   r   T)Zhas_tokenizerc                       s   e Zd ZdZdZdZdZdZe f fdd	Z	e
dd Zddejfdd	Zd
d Zeeee f d fddZdddZdd ZdddZ  ZS )ZeroShotClassificationPipelinea  
    NLI-based zero-shot classification pipeline using a `ModelForSequenceClassification` trained on NLI (natural
    language inference) tasks. Equivalent of `text-classification` pipelines, but these models don't require a
    hardcoded number of potential classes, they can be chosen at runtime. It usually means it's slower but it is
    **much** more flexible.

    Any combination of sequences and labels can be passed and each combination will be posed as a premise/hypothesis
    pair and passed to the pretrained model. Then, the logit for *entailment* is taken as the logit for the candidate
    label being valid. Any NLI model can be used, but the id of the *entailment* label must be included in the model
    config's :attr:*~transformers.PretrainedConfig.label2id*.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> oracle = pipeline(model="facebook/bart-large-mnli")
    >>> oracle(
    ...     "I have a problem with my iphone that needs to be resolved asap!!",
    ...     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
    ... )
    {'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}

    >>> oracle(
    ...     "I have a problem with my iphone that needs to be resolved asap!!",
    ...     candidate_labels=["english", "german"],
    ... )
    {'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['english', 'german'], 'scores': [0.814, 0.186]}
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This NLI pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"zero-shot-classification"`.

    The models that this pipeline can use are models that have been fine-tuned on an NLI task. See the up-to-date list
    of available models on [huggingface.co/models](https://huggingface.co/models?search=nli).
    FTc                    s0   || _ t j|i | | jdkr,td d S )NzFailed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.)_args_parsersuper__init__entailment_idloggerwarning)r   Zargs_parserargskwargs	__class__r   r   r-   Z   s    
z'ZeroShotClassificationPipeline.__init__c                 C   s2   | j jj D ]\}}| dr|  S qdS )NZentailr*   )modelconfigZlabel2iditemslower
startswith)r   r   indr   r   r   r.   c   s    
z,ZeroShotClassificationPipeline.entailment_idc           	   
   K   s   | j }| jjdu r(td | jj| j_z| j|||||d}W nL ty } z4dt|v rt| j||||tj	d}n|W Y d}~n
d}~0 0 |S )ze
        Parse arguments and tokenize only_first so that hypothesis (label) is not truncated
        NzfTokenizer was not supporting padding necessary for zero-shot, attempting to use  `pad_token=eos_token`)add_special_tokensreturn_tensorspadding
truncationz	too short)
	framework	tokenizerZ	pad_tokenr/   errorZ	eos_token	Exceptionr   r   ZDO_NOT_TRUNCATE)	r   r#   r=   r;   r>   r2   r<   inputser   r   r   _parse_and_tokenizej   s2    
z2ZeroShotClassificationPipeline._parse_and_tokenizec                 K   sz   | dd ur$|d |d< td i }d|v rD| j|d |d< d|v rX|d |d< i }d|v rp|d |d< |i |fS )NZmulti_classmulti_labelzThe `multi_class` argument has been deprecated and renamed to `multi_label`. `multi_class` will be removed in a future version of Transformers.candidate_labelsr   )getr/   r0   r+   r   )r   r2   Zpreprocess_paramsZpostprocess_paramsr   r   r   _sanitize_parameters   s    z3ZeroShotClassificationPipeline._sanitize_parameters)r"   c                    sR   t |dkrn0t |dkr0d|vr0|d |d< ntd| t j|fi |S )a  
        Classify the sequence(s) given as inputs. See the [`ZeroShotClassificationPipeline`] documentation for more
        information.

        Args:
            sequences (`str` or `list[str]`):
                The sequence(s) to classify, will be truncated if the model input is too large.
            candidate_labels (`str` or `list[str]`):
                The set of possible class labels to classify each sequence into. Can be a single label, a string of
                comma-separated labels, or a list of labels.
            hypothesis_template (`str`, *optional*, defaults to `"This example is {}."`):
                The template used to turn each label into an NLI-style hypothesis. This template must include a {} or
                similar syntax for the candidate label to be inserted into the template. For example, the default
                template is `"This example is {}."` With the candidate label `"sports"`, this would be fed into the
                model like `"<cls> sequence to classify <sep> This example is sports . <sep>"`. The default template
                works well in many cases, but it may be worthwhile to experiment with different templates depending on
                the task setting.
            multi_label (`bool`, *optional*, defaults to `False`):
                Whether or not multiple candidate labels can be true. If `False`, the scores are normalized such that
                the sum of the label likelihoods for each sequence is 1. If `True`, the labels are considered
                independent and probabilities are normalized for each candidate by doing a softmax of the entailment
                score vs. the contradiction score.

        Return:
            A `dict` or a list of `dict`: Each result comes as a dictionary with the following keys:

            - **sequence** (`str`) -- The sequence for which this is the output.
            - **labels** (`list[str]`) -- The labels sorted by order of likelihood.
            - **scores** (`list[float]`) -- The probabilities for each of the labels.
        r   r   rG   z%Unable to understand extra arguments )r   r    r,   r$   )r   r"   r1   r2   r3   r   r   r$      s    $z'ZeroShotClassificationPipeline.__call__NThis example is {}.c           
      c   s`   |  |||\}}tt||D ]:\}\}}| |g}	||d |t|d kd|	V  q d S )Nr   r   candidate_labelr   is_last)r+   	enumerateziprE   r   )
r   rC   rG   r   r#   r"   irL   Zsequence_pairZmodel_inputr   r   r   
preprocess   s    z)ZeroShotClassificationPipeline.preprocessc                    s    d } d } fdd| j jD }| jdkr8| jjn| jj}dt|jv rXd|d< | jf i |}|| d d	|}|S )
NrL   r   c                    s   i | ]}| | qS r   r   )r   krC   r   r   
<dictcomp>   r   z;ZeroShotClassificationPipeline._forward.<locals>.<dictcomp>ptZ	use_cacheFrM   rK   )	r@   Zmodel_input_namesr?   r5   forwardcallinspect	signature
parameters)r   rC   rL   r   Zmodel_inputsZmodel_forwardoutputsmodel_outputsr   rS   r   _forward   s    z'ZeroShotClassificationPipeline._forwardc                    sB  dd |D  dd |D }| j dkr<tdd |D }ntdd |D }|jd }t }|| }|||df}|st d	kr| j}	|	dkrdnd}
|d
|
|	gf }t|t|jddd }|d }n,|d
| jf }t|t|jddd }t	t
|d  }|d  fdd|D |d|f  dS )Nc                 S   s   g | ]}|d  qS )rL   r   r   r[   r   r   r   r      r   z>ZeroShotClassificationPipeline.postprocess.<locals>.<listcomp>c                 S   s   g | ]}|d  qS )r   r   r^   r   r   r   r      r   rU   c                 S   s   g | ]}|d     qS logits)floatnumpyr   outputr   r   r   r      r   c                 S   s   g | ]}|d    qS r_   )rb   rc   r   r   r   r      r   r   r*   r   .T)Zkeepdims).r   c                    s   g | ]} | qS r   r   )r   rP   rG   r   r   r     r   )r   r   scores)r?   npZconcatenateshaper   Zreshaper.   expsumlistreversedZargsorttolist)r   r\   rF   r"   r`   NnZnum_sequencesZreshaped_outputsr.   Zcontradiction_idZentail_contr_logitsrf   Zentail_logitsZtop_indsr   re   r   postprocess   s,    


z*ZeroShotClassificationPipeline.postprocess)NrJ   )F)r%   r&   r'   r(   Z_load_processorZ_load_image_processorZ_load_feature_extractorZ_load_tokenizerr   r-   propertyr.   r   Z
ONLY_FIRSTrE   rI   r   r   rk   r$   rQ   r]   rp   __classcell__r   r   r3   r   r)   ,   s    '	

*-
r)   )rX   typingr   rb   rg   Ztokenization_utilsr   utilsr   r   baser   r	   r
   Z
get_loggerr%   r/   r   r)   r   r   r   r   <module>   s   
