a
    hQ                     @   s   d dl Z d dlZd dlZddlmZ ddlmZmZm	Z	m
Z
 ddlmZmZmZmZmZ e	 rvd dlZddlmZmZ e rd dlZddlmZmZ G d	d
 d
eZeeddG dd deZdS )    N   )GenerationConfig)add_end_docstringsis_tf_availableis_torch_availablerequires_backends   )ArgumentHandlerDatasetPipelinePipelineExceptionbuild_pipeline_init_args),MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES)/TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES3TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMESc                   @   s   e Zd ZdZdddZdS )%TableQuestionAnsweringArgumentHandlerzB
    Handles arguments for the TableQuestionAnsweringPipeline
    Nc                 K   st  t | d dd l}|d u r&tdn|d u rt|tr^|dd ur^|dd ur^|g}nt|trt|dkrtdd |D stdd	d |D  |d dd ur|d dd ur|}ntd
|d 	  dn:t
d urt|t
st|tjr|S tdt| dn||dg}|D ]@}t|d |js.|d d u rZtd||d |d< q.|S )Npandasr   z(Keyword argument `table` cannot be None.querytablec                 s   s   | ]}t |tV  qd S N)
isinstancedict.0d r   k/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/pipelines/table_question_answering.py	<genexpr>6       zATableQuestionAnsweringArgumentHandler.__call__.<locals>.<genexpr>z:Keyword argument `table` should be a list of dict, but is c                 s   s   | ]}t |V  qd S r   )typer   r   r   r   r   8   r   zIf keyword argument `table` is a list of dictionaries, each dictionary should have a `table` and `query` key, but only dictionary has keys z `table` and `query` keys.zZInvalid input. Keyword argument `table` should be either of type `dict` or `list`, but is ))r   r   zTable cannot be None.)r   r   
ValueErrorr   r   getlistlenallkeysr
   typesGeneratorTyper    Z	DataFrame)selfr   r   kwargspdZtqa_pipeline_inputsZtqa_pipeline_inputr   r   r   __call__&   sB    

&$
"z.TableQuestionAnsweringArgumentHandler.__call__)NN)__name__
__module____qualname____doc__r-   r   r   r   r   r   !   s   r   T)Zhas_tokenizerc                       s   e Zd ZdZdZdZdZdZdZdZ	e
ddZe f fdd	Zd	d
 Zdd Z fddZdddZdddZdddZdd Z  ZS )TableQuestionAnsweringPipelinea  
    Table Question Answering pipeline using a `ModelForTableQuestionAnswering`. This pipeline is only available in
    PyTorch.

    Unless the model you're using explicitly sets these generation parameters in its configuration files
    (`generation_config.json`), the following default values will be used:
    - max_new_tokens: 256

    Example:

    ```python
    >>> from transformers import pipeline

    >>> oracle = pipeline(model="google/tapas-base-finetuned-wtq")
    >>> table = {
    ...     "Repository": ["Transformers", "Datasets", "Tokenizers"],
    ...     "Stars": ["36542", "4512", "3934"],
    ...     "Contributors": ["651", "77", "34"],
    ...     "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
    ... }
    >>> oracle(query="How many stars does the transformers repository have?", table=table)
    {'answer': 'AVERAGE > 36542', 'coordinates': [(0, 1)], 'cells': ['36542'], 'aggregator': 'AVERAGE'}
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This tabular question answering pipeline can currently be loaded from [`pipeline`] using the following task
    identifier: `"table-question-answering"`.

    The models that this pipeline can use are models that have been fine-tuned on a tabular question answering task.
    See the up-to-date list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=table-question-answering).
    ztable,queryTF   )Zmax_new_tokensc                    s   t  j|i | || _| jdkr6t }|t nt }|t	 | 
| t| jjdd opt| jjdd | _t| jjdrdnd | _d S )Ntfaggregation_labelsZnum_aggregation_labelstapas)super__init___args_parser	frameworkr   copyupdater   r   r   Zcheck_model_typegetattrmodelconfig	aggregatehasattrr    )r*   Zargs_parserargsr+   mapping	__class__r   r   r8      s    



z'TableQuestionAnsweringPipeline.__init__c                 K   s   | j f i |S r   )r>   )r*   inputsr   r   r   batch_inference   s    z.TableQuestionAnsweringPipeline.batch_inferencec                    sn  | j dkrg }g }d}|d jd }|d | j}|d | j}|d | j}d}	t|D ].}
|durR|	dddf }t|  }||
 }	t|jd D ]}|	dddf 	 | }|	dddf 	 | d }|	ddd	f 	 | d }|dkr|dkr|dkrt
|||f ||< qt|tj| j|	dddf< ||
 }||
 }||
 }	| j|d|d|	dd
}|j}| jr||j || tjj|d}|j|tj|jj }tt t| 	 D ]\}}|	dddf 	 | }|	dddf 	 | d }|	ddd	f 	 | d }|dkr|dkr|dkr ||f | q fdd D }qbtt|d}| js|fS |tt|dfS g }g }d}|d jd }|d }|d }|d  }d}	t|D ],}
|dur|	dddf }tj|tj d}||
 }	t|jd D ]}|	dddf 	 | }|	dddf 	 | d }|	ddd	f 	 | d }|dkrN|dkrN|dkrNt
|||f ||< qN||	dddf< ||
 }||
 }||
 }	| jtj!|ddtj!|ddtj!|	ddd
}|j}| jrH||j || t"j#$t"%|t"jt"%|t"j }tt tt"| 	 D ]\}}|	dddf 	 | }|	dddf 	 | d }|	ddd	f 	 | d }|dkr|dkr|dkr ||f | q fdd D }qt"&t|d}| jsV|fS |t"&t|dfS dS )z
        Inference used for models that need to process sequences in a sequential fashion, like the SQA models which
        handle conversational query related to a table.
        ptN	input_idsr   attention_masktoken_type_ids   r   r   )rI   rJ   rK   )logitsc                    s$   i | ]}|t  |  d kqS g      ?nparraymeanr   keyZcoords_to_probsr   r   
<dictcomp>   r   zGTableQuestionAnsweringPipeline.sequential_inference.<locals>.<dictcomp>)Zdtype)Zaxisc                    s$   i | ]}|t  |  d kqS rN   rO   rS   rU   r   r   rV     r   )'r:   shapetoZdevicerangerP   Z
zeros_likecpunumpytolistinttorchZ
from_numpyr    longr>   Z	unsqueezerM   r@   appendZlogits_aggregationdistributionsZ	BernoulliZprobsZfloat32collectionsdefaultdictr$   	enumerateZsqueezecattupleZint32Zexpand_dimsr4   mathZsigmoidcastconcat)r*   rF   Z
all_logitsZall_aggregationsZprev_answersZ
batch_sizerI   rJ   rK   Ztoken_type_ids_exampleindexZprev_labels_exampleZmodel_labelsiZ
segment_idZcol_idZrow_idZinput_ids_exampleZattention_mask_exampleoutputsrM   Zdist_per_tokenZprobabilitiespcolrowZlogits_batchr   rU   r   sequential_inference   s    
&

"


z3TableQuestionAnsweringPipeline.sequential_inferencec                    s<   | j |i |}t j|fi |}t|dkr8|d S |S )a  
        Answers queries according to a table. The pipeline accepts several types of inputs which are detailed below:

        - `pipeline(table, query)`
        - `pipeline(table, [query])`
        - `pipeline(table=table, query=query)`
        - `pipeline(table=table, query=[query])`
        - `pipeline({"table": table, "query": query})`
        - `pipeline({"table": table, "query": [query]})`
        - `pipeline([{"table": table, "query": query}, {"table": table, "query": query}])`

        The `table` argument should be a dict or a DataFrame built from that dict, containing the whole table:

        Example:

        ```python
        data = {
            "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
            "age": ["56", "45", "59"],
            "number of movies": ["87", "53", "69"],
            "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
        }
        ```

        This dictionary can be passed in as such, or can be converted to a pandas DataFrame:

        Example:

        ```python
        import pandas as pd

        table = pd.DataFrame.from_dict(data)
        ```

        Args:
            table (`pd.DataFrame` or `Dict`):
                Pandas DataFrame or dictionary that will be converted to a DataFrame containing all the table values.
                See above for an example of dictionary.
            query (`str` or `list[str]`):
                Query or list of queries that will be sent to the model alongside the table.
            sequential (`bool`, *optional*, defaults to `False`):
                Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
                inference to be done sequentially to extract relations within sequences, given their conversational
                nature.
            padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
                Activates and controls padding. Accepts the following values:

                - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
                  sequence if provided).
                - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
                  acceptable input length for the model if that argument is not provided.
                - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
                  lengths).

            truncation (`bool`, `str` or [`TapasTruncationStrategy`], *optional*, defaults to `False`):
                Activates and controls truncation. Accepts the following values:

                - `True` or `'drop_rows_to_fit'`: Truncate to a maximum length specified with the argument `max_length`
                  or to the maximum acceptable input length for the model if that argument is not provided. This will
                  truncate row by row, removing rows from the table.
                - `False` or `'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
                  greater than the model maximum admissible input size).


        Return:
            A dictionary or a list of dictionaries containing results: Each result is a dictionary with the following
            keys:

            - **answer** (`str`) -- The answer of the query given the table. If there is an aggregator, the answer will
              be preceded by `AGGREGATOR >`.
            - **coordinates** (`list[tuple[int, int]]`) -- Coordinates of the cells of the answers.
            - **cells** (`list[str]`) -- List of strings made up of the answer cell values.
            - **aggregator** (`str`) -- If the model has an aggregator, this returns the aggregator.
        r   r   )r9   r7   r-   r%   )r*   rB   r+   Zpipeline_inputsresultsrD   r   r   r-     s
    Kz'TableQuestionAnsweringPipeline.__call__Nc                 K   s   i }|d ur||d< |d ur$||d< i }|d ur8||d< t | dd d urR| j|d< t | dd d urv| j|d< | j|d< ||i fS )Npadding
truncation
sequentialassistant_modelassistant_tokenizer	tokenizer)r=   ru   rw   rv   )r*   rt   rr   rs   r+   Zpreprocess_paramsZforward_paramsr   r   r   _sanitize_parametersm  s    


z3TableQuestionAnsweringPipeline._sanitize_parametersc                 C   sv   |d u r| j dkrd}nd}|d |d  }}|jr<td|d u sL|dkrTtd| j||| j||d	}||d< |S )
Nr6   Zdrop_rows_to_fitZdo_not_truncater   r   ztable is empty zquery is empty)Zreturn_tensorsrs   rr   )r    emptyr"   rw   r:   )r*   Zpipeline_inputrt   rr   rs   r   r   rF   r   r   r   
preprocess  s    
z)TableQuestionAnsweringPipeline.preprocessc                 K   st   | d}| jdkr<|r*| jf i |}qd| jf i |}n(d|vrN| j|d< | jjf i ||}|||d}|S )Nr   r6   generation_config)model_inputsr   rl   )popr    rp   rG   r|   r>   generate)r*   r}   rt   Zgenerate_kwargsr   rl   model_outputsr   r   r   _forward  s    


z'TableQuestionAnsweringPipeline._forwardc                    s  |d }|d |d }j dkrNjr|d d \}}j|||}|\}}fddt|D  jjj fddt|D }	n&|d	 }j||}|d	 }i  i }	g }
t|D ]n\}}fd
d|D } |d}|	|d}|d	| |fdd|D d}|r"||d< |

| qt|d	krhtdjjdndd jj|ddD }
t|
dkrz|
S |
d	 S )Nr}   r   rl   r6   r   c                    s    i | ]\}}| j jj| qS r   )r>   r?   r5   r   rk   pred)r*   r   r   rV     r   z>TableQuestionAnsweringPipeline.postprocess.<locals>.<dictcomp>c                    s&   i | ]\}}|kr| | d  qS )z > r   r   )aggregatorsno_agg_label_indexr   r   rV     s   r   c                    s   g | ]} j | qS r   Ziatr   Z
coordinater   r   r   
<listcomp>  r   z>TableQuestionAnsweringPipeline.postprocess.<locals>.<listcomp>ry   z, c                    s   g | ]} j | qS r   r   r   r   r   r   r     r   )answercoordinatescells
aggregatorzTable question answeringzEmpty answerc                 S   s   g | ]}d |iqS )r   r   )r   r   r   r   r   r     r   T)Zskip_special_tokensr   )r    r@   rw   Zconvert_logits_to_predictionsrd   r>   r?   Zno_aggregation_label_indexr#   joinr`   r%   r   Zname_or_pathZbatch_decode)r*   r   rF   rl   rM   Z
logits_aggZpredictionsZanswer_coordinates_batchZagg_predictionsZaggregators_prefixZanswersrj   r   r   r   Zaggregator_prefixr   r   )r   r   r*   r   r   postprocess  sD    
z*TableQuestionAnsweringPipeline.postprocess)NNN)NTN)F)r.   r/   r0   r1   Zdefault_input_namesZ_pipeline_calls_generateZ_load_processorZ_load_image_processorZ_load_feature_extractorZ_load_tokenizerr   Z_default_generation_configr   r8   rG   rp   r-   rx   r{   r   r   __classcell__r   r   rD   r   r2   V   s&   " R


r2   )rb   r(   r[   rP   Z
generationr   utilsr   r   r   r   baser	   r
   r   r   r   r^   Zmodels.auto.modeling_autor   r   Z
tensorflowr4   Zmodels.auto.modeling_tf_autor   r   r   r2   r   r   r   r   <module>   s   5