a
    hrV                     @   s   d dl Z d dlmZ d dlm  mZ ddlmZmZm	Z	 ddl
mZmZmZmZmZ e rhd dlmZ e rzd dlmZ e jjdd	 ZG d
d dejZG dd dejZdddZdS )    N   )is_scipy_availableis_vision_availablerequires_backends   )box_iou	dice_lossgeneralized_box_iounested_tensor_from_tensor_listsigmoid_focal_losslinear_sum_assignment)center_to_corners_formatc                 C   s   dd t | |D S )Nc                 S   s   g | ]\}}||d qS ))logits
pred_boxes ).0abr   r   Z/var/www/html/assistant/venv/lib/python3.9/site-packages/transformers/loss/loss_rt_detr.py
<listcomp>+       z!_set_aux_loss.<locals>.<listcomp>)zip)outputs_classoutputs_coordr   r   r   _set_aux_loss&   s    r   c                       s0   e Zd ZdZ fddZe dd Z  ZS )RTDetrHungarianMatchera  This class computes an assignment between the targets and the predictions of the network

    For efficiency reasons, the targets don't include the no_object. Because of this, in general, there are more
    predictions than targets. In this case, we do a 1-to-1 matching of the best predictions, while the others are
    un-matched (and thus treated as non-objects).

    Args:
        config: RTDetrConfig
    c                    sz   t    t| dg |j| _|j| _|j| _|j	| _	|j
| _|j| _| j| j  krj| j  krjdkrvn ntdd S )NZscipyr   z#All costs of the Matcher can't be 0)super__init__r   Zmatcher_class_cost
class_costZmatcher_bbox_cost	bbox_costZmatcher_giou_cost	giou_costuse_focal_lossZmatcher_alphaalphaZmatcher_gammagamma
ValueError)selfconfig	__class__r   r   r   9   s    
(zRTDetrHungarianMatcher.__init__c                 C   s~  |d j dd \}}|d dd}tdd |D }td	d |D }| jrt|d dd}|dd|f }d| j || j  d| d
 	   }	| jd| | j  |d
 	   }
|
|	 }n(|d dd
d}|dd|f  }tj||dd}tt|t| }| j| | j|  | j|  }|||d }dd |D }dd t||dD }dd |D S )a  Performs the matching

        Params:
            outputs: This is a dict that contains at least these entries:
                 "logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates

            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
                 "class_labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
                           objects in the target) containing the class labels
                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates

        Returns:
            A list of size batch_size, containing tuples of (index_i, index_j) where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        r   Nr   r   r   r   c                 S   s   g | ]}|d  qS class_labelsr   r   vr   r   r   r   b   r   z2RTDetrHungarianMatcher.forward.<locals>.<listcomp>c                 S   s   g | ]}|d  qS boxesr   r,   r   r   r   r   c   r   g:0yE>)pc                 S   s   g | ]}t |d  qS r.   lenr,   r   r   r   r   y   r   c                 S   s   g | ]\}}t || qS r   r   )r   icr   r   r   r   z   r   c                 S   s0   g | ](\}}t j|t jd t j|t jd fqS )dtype)torch	as_tensorint64)r   r4   jr   r   r   r   |   r   )shapeflattenr8   catr"   Fsigmoidr#   r$   logZsoftmaxZcdistr	   r   r    r   r!   viewcpu	enumeratesplit)r&   outputstargetsZ
batch_sizeZnum_queriesZout_bboxZ
target_idsZtarget_bboxZout_probZneg_cost_classZpos_cost_classr   r    r!   Zcost_matrixsizesindicesr   r   r   forwardH   s&    &"
zRTDetrHungarianMatcher.forward)	__name__
__module____qualname____doc__r   r8   no_gradrJ   __classcell__r   r   r(   r   r   .   s   
r   c                       s   e Zd ZdZ fddZdddZdddZe d	d
 Z	dd Z
dd ZdddZdd Zdd Zd ddZdd Zedd Zdd Z  ZS )!
RTDetrLossah  
    This class computes the losses for RTDetr. The process happens in two steps: 1) we compute hungarian assignment
    between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth /
    prediction (supervise class and box).

    Args:
        matcher (`DetrHungarianMatcher`):
            Module able to compute a matching between targets and proposals.
        weight_dict (`Dict`):
            Dictionary relating each loss with its weights. These losses are configured in RTDetrConf as
            `weight_loss_vfl`, `weight_loss_bbox`, `weight_loss_giou`
        losses (`list[str]`):
            List of all the losses to be applied. See `get_loss` for a list of all available losses.
        alpha (`float`):
            Parameter alpha used to compute the focal loss.
        gamma (`float`):
            Parameter gamma used to compute the focal loss.
        eos_coef (`float`):
            Relative classification weight applied to the no-object category.
        num_classes (`int`):
            Number of object categories, omitting the special no-object category.
    c                    s|   t    t|| _|j| _|j|j|jd| _	ddg| _
|j| _t|jd }| j|d< | d| |j| _|j| _d S )N)loss_vfl	loss_bbox	loss_giouvflr/   r   r0   empty_weight)r   r   r   matcherZ
num_labelsnum_classesZweight_loss_vflZweight_loss_bboxZweight_loss_giouweight_dictlossesZeos_coefficientZeos_coefr8   ZonesZregister_bufferZfocal_loss_alphar#   Zfocal_loss_gammar$   )r&   r'   rV   r(   r   r   r      s    



zRTDetrLoss.__init__Tc                 C   st  d|vrt dd|vr t d| |}|d | }tjdd t||D dd}tt| t|\}	}
t|	}	|d }td	d t||D }tj	|j
d d
 | jtj|jd}|||< tj|| jd ddd df }tj||jd}|	|j||< |d| }t| }| j|| j d|  | }tj|||dd}|d |j
d  | }d|iS )Nr   #No predicted boxes found in outputsr   z$No predicted logits found in outputsc                 S   s    g | ]\}\}}|d  | qS r.   r   r   _target_r4   r   r   r   r      r   z.RTDetrLoss.loss_labels_vfl.<locals>.<listcomp>r   dimc                 S   s    g | ]\}\}}|d  | qS r*   r   r\   r   r   r   r      r   r   r7   devicer   rX   .r0   r6   none)weight	reductionrR   )KeyError_get_source_permutation_idxr8   r>   r   r   r   detachdiagfullr<   rX   r:   rb   r?   one_hotZ
zeros_liker7   toZ	unsqueezer@   r#   powr$    binary_cross_entropy_with_logitsmeansum)r&   rF   rG   rI   	num_boxesrA   idx	src_boxestarget_boxesZiousr^   
src_logitstarget_classes_originaltarget_classestargetZtarget_score_originalZtarget_scoreZ
pred_scorere   lossr   r   r   loss_labels_vfl   s0    

 zRTDetrLoss.loss_labels_vflc                 C   s   d|vrt d|d }| |}tdd t||D }tj|jdd | jtj|j	d}	||	|< t
|dd|	| j}
d	|
i}|S )
zClassification loss (NLL)
        targets dicts must contain the key "class_labels" containing a tensor of dim [nb_target_boxes]
        r   z#No logits were found in the outputsc                 S   s    g | ]\}\}}|d  | qS r*   r   r\   r   r   r   r      r   z*RTDetrLoss.loss_labels.<locals>.<listcomp>Nr   ra   r   loss_ce)rg   rh   r8   r>   r   rk   r<   rX   r:   rb   r?   Zcross_entropy	transposeZclass_weight)r&   rF   rG   rI   rr   rA   rv   rs   rw   rx   r|   rZ   r   r   r   loss_labels   s    
zRTDetrLoss.loss_labelsc                 C   sf   |d }|j }tjdd |D |d}|d|jd d kd}tj|	 |	 }	d|	i}
|
S )z
        Compute the cardinality error, i.e. the absolute error in the number of predicted non-empty boxes. This is not
        really a loss, it is intended for logging purposes only. It doesn't propagate gradients.
        r   c                 S   s   g | ]}t |d  qS r*   r2   r,   r   r   r   r      r   z/RTDetrLoss.loss_cardinality.<locals>.<listcomp>)rb   r0   r   Zcardinality_error)
rb   r8   r9   Zargmaxr<   rq   nn
functionall1_lossfloat)r&   rF   rG   rI   rr   r   rb   Ztarget_lengthsZ	card_predZcard_errrZ   r   r   r   loss_cardinality   s    zRTDetrLoss.loss_cardinalityc                 C   s   d|vrt d| |}|d | }tjdd t||D dd}i }tj||dd}	|	 | |d	< d
tt	t
|t
| }
|
 | |d< |S )a;  
        Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss. Targets dicts must
        contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]. The target boxes are expected in
        format (center_x, center_y, w, h), normalized by the image size.
        r   r[   c                 S   s    g | ]\}\}}|d  | qS r.   r   )r   tr^   r4   r   r   r   r      r   z)RTDetrLoss.loss_boxes.<locals>.<listcomp>r   r_   rd   rf   rS   r   rT   )rg   rh   r8   r>   r   r?   r   rq   rj   r	   r   )r&   rF   rG   rI   rr   rs   rt   ru   rZ   rS   rT   r   r   r   
loss_boxes   s    
zRTDetrLoss.loss_boxesc                 C   s   d|vrt d| |}| |}|d }|| }dd |D }t| \}	}
|	|}	|	| }	tjj|dddf |	j	dd ddd	}|ddd
f 
d}|	
d}	|	|j	}	t||	|t||	|d}|S )z
        Compute the losses related to the masks: the focal loss and the dice loss. Targets dicts must contain the key
        "masks" containing a tensor of dim [nb_target_boxes, h, w].
        Z
pred_masksz#No predicted masks found in outputsc                 S   s   g | ]}|d  qS )masksr   r   r   r   r   r   r     r   z)RTDetrLoss.loss_masks.<locals>.<listcomp>NZbilinearF)sizemodeZalign_cornersr   r   )Z	loss_maskZ	loss_dice)rg   rh   _get_target_permutation_idxr
   Z	decomposerm   r   r   Zinterpolater<   r=   rB   r   r   )r&   rF   rG   rI   rr   
source_idx
target_idxZsource_masksr   Ztarget_masksZvalidrZ   r   r   r   
loss_masks  s(    





zRTDetrLoss.loss_masksc                 C   s   |d }|  |}tdd t||D }tj|jd d | jtj|jd}	||	|< t	j
|	| jd ddd d	f }
t	j||
d
 dd}|d |jd  | }d|iS )Nr   c                 S   s    g | ]\}\}}|d  | qS r*   r   r\   r   r   r   r   $  r   z.RTDetrLoss.loss_labels_bce.<locals>.<listcomp>r   ra   r   rc   .r0   g      ?rd   r   Zloss_bce)rh   r8   r>   r   rk   r<   rX   r:   rb   r?   rl   ro   rp   rq   r&   rF   rG   rI   rr   rA   rv   rs   rw   rx   ry   rz   r   r   r   loss_labels_bce!  s    
 zRTDetrLoss.loss_labels_bcec                 C   s4   t dd t|D }t dd |D }||fS )Nc                 S   s    g | ]\}\}}t ||qS r   r8   Z	full_like)r   r4   sourcer^   r   r   r   r   1  r   z:RTDetrLoss._get_source_permutation_idx.<locals>.<listcomp>c                 S   s   g | ]\}}|qS r   r   )r   r   r^   r   r   r   r   2  r   r8   r>   rD   )r&   rI   	batch_idxr   r   r   r   rh   /  s    z&RTDetrLoss._get_source_permutation_idxc                 C   s4   t dd t|D }t dd |D }||fS )Nc                 S   s    g | ]\}\}}t ||qS r   r   )r   r4   r^   ry   r   r   r   r   7  r   z:RTDetrLoss._get_target_permutation_idx.<locals>.<listcomp>c                 S   s   g | ]\}}|qS r   r   )r   r^   ry   r   r   r   r   8  r   r   )r&   rI   r   r   r   r   r   r   5  s    z&RTDetrLoss._get_target_permutation_idxc                 C   s   d|vrt d|d }| |}tdd t||D }tj|jd d | jtj|j	d}	||	|< t
j|	| jd dd	d d
f }
t||
| j| j}|d |jd  | }d|iS )Nr   zNo logits found in outputsc                 S   s    g | ]\}\}}|d  | qS r*   r   r\   r   r   r   r   B  r   z0RTDetrLoss.loss_labels_focal.<locals>.<listcomp>r   ra   r   rc   .r0   Z
loss_focal)rg   rh   r8   r>   r   rk   r<   rX   r:   rb   r?   rl   r   r#   r$   rp   rq   r   r   r   r   loss_labels_focal;  s    
 zRTDetrLoss.loss_labels_focalc                 C   sL   | j | j| j| j| j| j| jd}||vr:td| d|| ||||S )N)labelsZcardinalityr/   r   ZbceZfocalrU   zLoss z not supported)r~   r   r   r   r   r   r{   r%   )r&   rz   rF   rG   rI   rr   Zloss_mapr   r   r   get_lossM  s    	zRTDetrLoss.get_lossc           
   	   C   s   | d | d  }}dd |D }|d d j }g }t|D ]\}}|dkrtj|tj|d}	|	|}	t|| t|	ks~J ||| |	f q:|tjdtj|dtjdtj|df q:|S )Ndn_positive_idxdn_num_groupc                 S   s   g | ]}t |d  qS r*   r2   r   r   r   r   r   ^  r   z6RTDetrLoss.get_cdn_matched_indices.<locals>.<listcomp>r   r+   ra   )	rb   rD   r8   Zaranger:   Ztiler3   appendZzeros)
Zdn_metarG   r   r   Znum_gtsrb   Zdn_match_indicesr4   Znum_gtZgt_idxr   r   r   get_cdn_matched_indices[  s     
z"RTDetrLoss.get_cdn_matched_indicesc           
   	      s  dd |  D }||}tdd |D }tj|gtjtt| j	d}tj
|dd }i }jD ]4}|||||fddD | qnd	|v r,t|d	 D ]p\ }||}jD ]T}|d
krqԈ|||||fddD  fdd  D | qqd|v rd|vrHtd|d |}||d d  }t|d D ]x\ }jD ]f}|d
krqi }	j|||||fi |	fddD  fdd  D | qqt|S )a  
        This performs the loss computation.

        Args:
             outputs (`dict`, *optional*):
                Dictionary of tensors, see the output specification of the model for the format.
             targets (`list[dict]`, *optional*):
                List of dicts, such that `len(targets) == batch_size`. The expected keys in each dict depends on the
                losses applied, see each loss' doc.
        c                 S   s   i | ]\}}d |vr||qS )auxiliary_outputsr   r   kr-   r   r   r   
<dictcomp>}  r   z&RTDetrLoss.forward.<locals>.<dictcomp>c                 s   s   | ]}t |d  V  qdS )r+   Nr2   r   r   r   r   	<genexpr>  r   z%RTDetrLoss.forward.<locals>.<genexpr>ra   r   )minc                    s*   i | ]"}|j v r| | j |  qS r   rY   r   r   l_dictr&   r   r   r     r   r   r   c                    s*   i | ]"}|j v r| | j |  qS r   r   r   r   r   r   r     r   c                    s    i | ]\}}|d    |qS )Z_aux_r   r   r4   r   r   r     r   dn_auxiliary_outputsdenoising_meta_valuesz}The output must have the 'denoising_meta_values` key. Please, ensure that 'outputs' includes a 'denoising_meta_values' entry.r   c                    s*   i | ]"}|j v r| | j |  qS r   r   r   r   r   r   r     r   c                    s    i | ]\}}|d    |qS )Z_dn_r   r   r   r   r   r     r   )itemsrW   rq   r8   r9   r   nextitervaluesrb   clampitemrZ   r   updaterD   r%   r   )
r&   rF   rG   Zoutputs_without_auxrI   rr   rZ   rz   r   kwargsr   )r4   r   r&   r   rJ   r  sJ    "






zRTDetrLoss.forward)T)T)T)T)rK   rL   rM   rN   r   r{   r~   r8   rO   r   r   r   r   rh   r   r   r   staticmethodr   rJ   rP   r   r   r(   r   rQ      s    





rQ   c
                 K   s  t |}|| i }| |d< ||d< |jr|	d urdtj||	d dd\}}tj||	d dd\}}t|d d d df dd|d d d df dd}||d	< |d	 t|g|g |	d urt|dd|dd|d
< |	|d< |||}t|	 }|||fS )Nr   r   Zdn_num_splitr   r_   r0   r   r   r   r   r   )
rQ   rm   Zauxiliary_lossr8   rE   r   r}   extendrq   r   )r   r   rb   r   r'   r   r   Zenc_topk_logitsZenc_topk_bboxesr   r   	criterionZoutputs_lossZdn_out_coordZdn_out_classr   Z	loss_dictrz   r   r   r   RTDetrForObjectDetectionLoss  s(    
:
r   )NNNNN)r8   Ztorch.nnr   Ztorch.nn.functionalr   r?   utilsr   r   r   Zloss_for_object_detectionr   r   r	   r
   r   Zscipy.optimizer   Ztransformers.image_transformsr   ZjitZunusedr   Moduler   rQ   r   r   r   r   r   <module>   s(   	
Q  <     