a
    ht                     @   s   d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 e	
eZdd Zdd	 Zd
d Zdd Zdd Zdd Zd,ddZdd Zdd Zdd Zdd Zdd Zd-dd Zd.d"d#Zd$d% Zd&d' Zd(d) Zd*d+ ZdS )/a  
Very heavily inspired by the official evaluation script for SQuAD version 2.0 which was modified by XLNet authors to
update `find_best_threshold` scripts for SQuAD V2.0

In addition to basic functionality, we also compute additional statistics and plot precision-recall curves if an
additional na_prob.json file is provided. This file is expected to map question ID's to the model's predicted
probability that a question is unanswerable.
    N   )BasicTokenizer)loggingc                 C   s4   dd }dd }dd }dd }||||| S )	zALower text and remove punctuation, articles and extra whitespace.c                 S   s   t dt j}t |d| S )Nz\b(a|an|the)\b )recompileUNICODEsub)textregex r   c/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/data/metrics/squad_metrics.pyremove_articles'   s    z)normalize_answer.<locals>.remove_articlesc                 S   s   d |  S )Nr   )joinsplitr
   r   r   r   white_space_fix+   s    z)normalize_answer.<locals>.white_space_fixc                    s"   t tj d fdd| D S )N c                 3   s   | ]}| vr|V  qd S Nr   ).0chexcluder   r   	<genexpr>0       z8normalize_answer.<locals>.remove_punc.<locals>.<genexpr>)setstringpunctuationr   r   r   r   r   remove_punc.   s    
z%normalize_answer.<locals>.remove_puncc                 S   s   |   S r   )lowerr   r   r   r   r   2   s    znormalize_answer.<locals>.lowerr   )sr   r   r   r   r   r   r   normalize_answer$   s
    r!   c                 C   s   | sg S t |  S r   )r!   r   )r    r   r   r   
get_tokens8   s    r"   c                 C   s   t t| t|kS r   )intr!   )a_golda_predr   r   r   compute_exact>   s    r&   c           	      C   s   t | }t |}t|t|@ }t| }t|dksHt|dkrTt||kS |dkr`dS d| t| }d| t| }d| | ||  }|S )Nr         ?   )r"   collectionsCountersumvalueslenr#   )	r$   r%   Z	gold_toksZ	pred_tokscommonZnum_same	precisionZrecallf1r   r   r   
compute_f1B   s    r1   c                    s   i }i }| D ]x}|j }dd |jD }|s0dg}||vrHtd|  q||  t fdd|D ||< t fdd|D ||< q||fS )zV
    Computes the exact and f1 scores from the examples and the model predictions
    c                 S   s    g | ]}t |d  r|d  qS r   )r!   )r   Zanswerr   r   r   
<listcomp>[   r   z"get_raw_scores.<locals>.<listcomp>r   zMissing prediction for c                 3   s   | ]}t | V  qd S r   )r&   r   aZ
predictionr   r   r   f   r   z!get_raw_scores.<locals>.<genexpr>c                 3   s   | ]}t | V  qd S r   )r1   r3   r5   r   r   r   g   r   )qas_idanswersprintmax)examplespredsexact_scores	f1_scoresexampler6   Zgold_answersr   r5   r   get_raw_scoresR   s    r?   c                 C   sF   i }|   D ]4\}}|| |k}|r8t||  ||< q|||< q|S r   )itemsfloat)scoresna_probsqid_to_has_ansZna_prob_threshZ
new_scoresqidr    Zpred_nar   r   r   apply_no_ans_thresholdl   s    
rF   c              	      s   |sHt  }tddt   | fddt  | fd|fgS t |}tddt fdd|D  | fddtfdd|D  | fd|fgS d S )Nexact      Y@r0   totalc                 3   s   | ]} | V  qd S r   r   r   k)r<   r   r   r      r   z!make_eval_dict.<locals>.<genexpr>c                 3   s   | ]} | V  qd S r   r   rJ   )r=   r   r   r      r   )r-   r)   OrderedDictr+   r,   )r<   r=   qid_listrI   r   )r<   r=   r   make_eval_dictw   s      rN   c                 C   s$   |D ]}|| | | d| < qd S )N_r   )	main_evalZnew_evalprefixrK   r   r   r   
merge_eval   s    rR   c                    s   t fddD }|}|}d}t  fddd}t|D ]R\}	}
|
|vrPq>|
 rb||
 }n| |
 rpd}nd}||7 }||kr>|} |
 }q>d	\}}|D ],}
|
 sq|d
7 }|
|vrq|||
 7 }qd| t| |d| | fS )Nc                 3   s   | ]} | sd V  qdS    Nr   rJ   rD   r   r   r      r   z&find_best_thresh_v2.<locals>.<genexpr>        c                    s    |  S r   r   rK   rC   r   r   <lambda>   r   z%find_best_thresh_v2.<locals>.<lambda>keyr   )r   r   rT   rH   r'   r+   sorted	enumerater-   )r;   rB   rC   rD   
num_no_ans	cur_score
best_scorebest_threshrM   irE   diffZhas_ans_scoreZhas_ans_cntr   rC   rD   r   find_best_thresh_v2   s4    

rg   c                 C   s\   t ||||\}}}t ||||\}	}
}|| d< || d< |	| d< |
| d< || d< || d< d S )N
best_exactbest_exact_threshbest_f1best_f1_threshhas_ans_exact
has_ans_f1)rg   )rP   r;   	exact_rawf1_rawrC   rD   rh   exact_threshrl   rj   	f1_threshrm   r   r   r   find_all_best_thresh_v2   s    rr   c                    s   t fddD }|}|}d}t  fddd}t|D ]R\}	}
|
|vrPq>|
 rb||
 }n| |
 rpd}nd}||7 }||kr>|} |
 }q>d	| t| |fS )
Nc                 3   s   | ]} | sd V  qdS rS   r   rJ   rU   r   r   r      r   z#find_best_thresh.<locals>.<genexpr>rV   c                    s    |  S r   r   rW   rX   r   r   rY      r   z"find_best_thresh.<locals>.<lambda>rZ   r\   r   rH   r]   )r;   rB   rC   rD   r`   ra   rb   rc   rM   rO   rE   re   r   rf   r   find_best_thresh   s$    

rs   c           
      C   sH   t ||||\}}t ||||\}}	|| d< || d< || d< |	| d< d S )Nrh   ri   rj   rk   )rs   )
rP   r;   rn   ro   rC   rD   rh   rp   rj   rq   r   r   r   find_all_best_thresh   s    rt   r'   c                 C   s   dd | D }dd |  D }dd |  D }|d u rFt|d}t| |\}}t||||}	t||||}
t|	|
}|rt|	|
|d}t||d |rt|	|
|d}t||d	 |rt|||||| |S )
Nc                 S   s   i | ]}|j t|jqS r   )r6   boolr7   )r   r>   r   r   r   
<dictcomp>   r   z"squad_evaluate.<locals>.<dictcomp>c                 S   s   g | ]\}}|r|qS r   r   r   r6   Z
has_answerr   r   r   r2      r   z"squad_evaluate.<locals>.<listcomp>c                 S   s   g | ]\}}|s|qS r   r   rw   r   r   r   r2      r   rV   )rM   ZHasAnsZNoAns)r@   dictfromkeysr?   rF   rN   rR   rt   )r:   r;   Zno_answer_probsZno_answer_probability_thresholdZqas_id_to_has_answerZhas_answer_qidsZno_answer_qidsrG   r0   Zexact_thresholdZf1_thresholdZ
evaluationZhas_ans_evalZno_ans_evalr   r   r   squad_evaluate   s(    
rz   Fc                 C   sf  dd }t |d}d||}|| }|dkrT|rPtd|  d| d |S |t|  d	 }||\}	}
||\}}t|	t|kr|rtd
|	 d| d |S i }| D ]\}}|||< qd}||v r|| }||
v r|
| }|du r|rtd |S d}||v r4|| }||
v r4|
| }|du rR|rNtd |S |||d	  }|S )z;Project the tokenized prediction back to the original text.c                 S   sP   g }t  }t| D ](\}}|dkr&q||t|< || qd|}||fS )Nr   r   )r)   rL   r_   r-   appendr   )r
   Zns_charsZns_to_s_maprd   cZns_textr   r   r   _strip_spaces  s    
z%get_final_text.<locals>._strip_spaces)do_lower_caser   r\   zUnable to find text: 'z' in ''rT   z*Length not equal after stripping spaces: 'z' vs 'NzCouldn't map start positionzCouldn't map end position)r   r   tokenizefindloggerinfor-   r@   )Z	pred_text	orig_textr~   verbose_loggingr}   	tokenizertok_textZstart_positionZend_positionZorig_ns_textZorig_ns_to_s_mapZtok_ns_textZtok_ns_to_s_mapZtok_s_to_ns_maprd   Z	tok_indexZorig_start_positionZns_start_positionZorig_end_positionZns_end_positionZoutput_textr   r   r   get_final_text   sL    








r   c                 C   sN   t t| dd dd}g }tt|D ]"}||kr6 qJ||| d  q&|S )z"Get the n-best logits from a list.c                 S   s   | d S )NrT   r   xr   r   r   rY   ^  r   z#_get_best_indexes.<locals>.<lambda>Tr[   reverser   )r^   r_   ranger-   r{   )Zlogitsn_best_sizeZindex_and_scoreZbest_indexesrd   r   r   r   _get_best_indexes\  s    r   c                 C   s|   | sg S d}| D ]}|du s$||kr|}qg }d}| D ]$}t || }|| ||7 }q6g }|D ]}|||  qd|S )z,Compute softmax probability over raw logits.NrV   )mathexpr{   )rB   Z	max_scoreZscoreZ
exp_scoresZ	total_sumr   probsr   r   r   _compute_softmaxh  s     

r   c           8      C   s  |rt d|  |r(t d|  |r@|
r@t d|  tt}|D ]}||j | qNi }|D ]}|||j< qltdg d}t	 }t	 }t	 }t
| D ]\}}|| }g }d}d}d}d}t
|D ]*\}}||j }t|j|}t|j|}|
rF|jd |jd  } | |k rF| }|}|jd }|jd }|D ]}!|D ]}"|!t|jkrjqR|"t|jkr~qR|!|jvrqR|"|jvrqR|j|!dsqR|"|!k rqR|"|! d	 }#|#|krڐqR||||!|"|j|! |j|" d qRqJq|
r&|||dd||d t|d
d dd}tdg d}$i }%g }&|D ]}'t|&|krl q:||'j }|'jdkr|j|'j|'jd	  }(|j|'j })|j|'j }*|j|)|*d	  }+||(},|, },d|, },d|+}-t|,|-||	}.|.|%v rqTd|%|.< nd}.d|%|.< |&|$|.|'j|'jd qT|
rd|%vr^|&|$d||d t|&d	kr|&d|$dddd |&s|&|$dddd t|&d	k rt dg }/d}0|&D ]*}1|/|1j|1j  |0s|1j!r|1}0qt"|/}2g }3t
|&D ]F\}4}1t	 }5|1j!|5d< |2|4 |5d< |1j|5d< |1j|5d< |3|5 qt|3d	k r\t d|
sv|3d d ||j#< n<||0j |0j }6|6||j#< |6|krd||j#< n|0j!||j#< |3||j#< q|rt$|d(}7|7%t&j'|ddd  W d   n1 s0    Y  |rRt$|d(}7|7%t&j'|ddd  W d   n1 sH0    Y  |r|
rt$|d(}7|7%t&j'|ddd  W d   n1 s0    Y  |S )zHWrite final predictions to the json file and log-odds of null if needed.Writing predictions to: zWriting nbest to: zWriting null_log_odds to: PrelimPrediction)feature_indexstart_index	end_indexstart_logit	end_logit@B r   FrT   c                 S   s   | j | j S r   )r   r   r   r   r   r   rY     r   z,compute_predictions_logits.<locals>.<lambda>Tr   NbestPrediction)r
   r   r   r   r   emptyrV   No valid predictionsNr
   probabilityr   r   w   indent
)(r   r   r)   defaultdictlistexample_indexr{   	unique_id
namedtuplerL   r_   r   start_logits
end_logitsr-   tokenstoken_to_orig_maptoken_is_max_contextgetr^   r   r   r   
doc_tokensconvert_tokens_to_stringstripr   r   r   r   r   insert
ValueErrorr
   r   r6   openwritejsondumps)8all_examplesall_featuresall_resultsr   max_answer_lengthr~   output_prediction_fileoutput_nbest_fileoutput_null_log_odds_filer   version_2_with_negativeZnull_score_diff_thresholdr   example_index_to_featuresfeatureunique_id_to_resultresult_PrelimPredictionall_predictionsall_nbest_jsonscores_diff_jsonr   r>   featuresprelim_predictions
score_nullZmin_null_feature_indexZnull_start_logitZnull_end_logitr   Zstart_indexesZend_indexesZfeature_null_scorer   r   length_NbestPredictionseen_predictionsnbestpred
tok_tokensorig_doc_startorig_doc_endorig_tokensr   r   
final_texttotal_scoresbest_non_null_entryentryr   
nbest_jsonrd   output
score_diffwriterr   r   r   compute_predictions_logits  s   






		

	








888r   c           8      C   sb  t dg d}t dg d}td|  t t}|D ]}||j | q>i }|D ]}|||j< q\t 	 }t 	 }t 	 }t
| D ]\}}|| }g }d}t
|D ]\}}||j }|j}t||}t|D ]}t|	D ]}|j| }|j| } ||	 | }!|j|! }"|j|! }#| |jd kr.q|#|jd kr@q|j| dsRq|#| k r^q|#|  d }$|$|krvq|||| |#||"d qqqt|d	d
 dd}i }%g }&|D ]}'t|&|kr q||'j }|j|'j|'jd  }(|j|'j })|j|'j }*|j|)|*d  }+||(},|, },d|,  },d|+}-t!|drR|j"}.n|j#}.t$|,|-|.|}/|/|%v rtqd|%|/< |&||/|'j%|'j&d q|&s|&|dddd g }0d}1|&D ]"}2|0|2j%|2j&  |1s|2}1qt'|0}3g }4t
|&D ]F\}}2t 	 }5|2j(|5d< |3| |5d< |2j%|5d< |2j&|5d< |4|5 qt|4dk rTt)d|1du rft)d|}6|6||j*< |1j(||j*< |4||j*< qt+|d(}7|7,t-j.|ddd  W d   n1 s0    Y  t+|d(}7|7,t-j.|ddd  W d   n1 s
0    Y  |
r^t+|d(}7|7,t-j.|ddd  W d   n1 sT0    Y  |S )z
    XLNet write prediction logic (more complex than Bert's). Write final predictions to the json file and log-odds of
    null if needed.

    Requires utils_squad_evaluate.py
    r   )r   r   r   start_log_probend_log_probr   )r
   r   r   r   r   rT   Fc                 S   s   | j | j S r   )r   r   r   r   r   r   rY     r   z/compute_predictions_log_probs.<locals>.<lambda>Tr   r   r~   r   g    .Nr
   r   r   r   r   r   r   r   r   )/r)   r   r   r   r   r   r   r{   r   rL   r_   Z
cls_logitsminr   r   Zstart_top_indexr   Zend_top_indexZparagraph_lenr   r   r^   r-   r   r   r   r   r   r   r   r   r   r   hasattrr~   Zdo_lowercase_and_remove_accentr   r   r   r   r
   r   r6   r   r   r   r   )8r   r   r   r   r   r   r   r   Zstart_n_topZ	end_n_topr   r   r   r   r   r   r   r   r   r   r   r   r   r>   r   r   r   r   Zcur_null_scorerd   jr   r   Zj_indexr   r   r   r   r   r   r   r   r   r   r   r   r~   r   r   r   r   r   r   r   r   r   r   r   r   compute_predictions_log_probsN  s    




















888r   )N)Nr'   )F)__doc__r)   r   r   r   r   Zmodels.bertr   utilsr   Z
get_logger__name__r   r!   r"   r&   r1   r?   rF   rN   rR   rg   rr   rs   rt   rz   r   r   r   r   r   r   r   r   r   <module>   s6   	

"


^ P