a
    h                    @  s2  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZmZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlZd dlZd dlZd dl Zd dl!m"  m#Z$ d dl%m&Z& d d	l'm(Z( d d
l)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z?m@Z@mAZAmBZB ddlCmDZD ddlEmFZFmGZGmHZHmIZI ddlJmKZKmLZL ddlHmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZ ddl"m[Z[m\Z\m]Z]m^Z^m_Z_m`Z`maZambZbmcZcmdZdmeZemfZf ddlgmhZhmiZi erNddljmkZk edZledZme=ddgZneoepZqi Zrdesd< i Ztdesd < e=ejujv  ZwejhjxZxejhjyZyejhjzZze=ejujv  Z{e=ejujv ej|j}gZ~e=ejujv  Zi Zd!esd"< ejhjZd#d$ Zd%d&d'd(Zd)d*d+d,d-Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zeexjexjexjexjexjexjexjexjexjexjexjexjexjexjg ejejejejejejejejejejejejejd6Zd7d8d9d:Zd;d< Zd=d> Zd?d@dAdBZdCdD ZdEdF ZdGdHdIdJdIdKdLdMdNZdOdP ZdJd@dQdRZdSe1jdSerfdJdTdUdVdWZdXdY ZddZd[Zdd\d]Zdd^d_Zdd`dadbdcddZeej|j}dd@dedf Z}eezjdd@d`dadbdgdhZdSdid`dadbdjdkZeexjjdd@d`dadbdldmZdSdSdnd`dodpdqdrZeezjdd@dd`dodpdsdtZddue1jdSdddSdfdvdwZdxdy Ze  ddzd{ZeexjdSdd|d}d~ ZeexjdSdd|dd Zeexjexjexjexjezjgdd ZeexdreexjɃeǃ eexjdd@dddZeexjdd@dddZeexjgdddZeexj̓dd Zeexj΃dd Zeexjσdd ZeexjЃdd Zeexjj҃dd ZeexjӃdd Zeexjdd@dd Zeezjdd@dd Zeexjdd@dd Zeexj׃dd Zeexjdd@eexjdd@eexjdd@dd Zeexjdd@dd Zeexjdd@dddZeexjdd@dddZeexjdd@dddZeexjdd@dddZdddZeejdd@d`d`d`d7d7d7dad`dddZeejdd@ddd`d`d`d7d7d7dadd`d	ddZeejjdd@d`dd7d7d7dad`dddZeejjdd@ddd`dd7d7d7dadd`dddZeejjdd@d`d`d`d7d7dad`dddZeejjdd@ddd`d`d`d7d7dadd`dddZeexjdddZeexjdd@dd7d7d7dddĄZeexjdd@dd7d7d7dddƄZeexjdd@dd7d7d7dddȄZeexjdd@ddʄ Zeexjdd@ddd̄Zeexjdd@ddd΄Zeexjdd@dddЄZeexjdd@dd҄ Zeexjdd@ddԄ Zeexjdd@ddք Zddd؄ZeexjdddۄZddd݄Zejdd߄ ZdddddZdddddZdddddZdddZdd Zeejhjj dd@dd Z eexjdd@dd Zeexjdd@dd Zeexjjdd@dd Zeexjdd Zeddd Zdd Z	eexj
j҃Zeexj
jZeexjj҃ZeexjjZeexj eexj
dd Z
eexjdd ZeeGjdd@d d ZeeGjdd@dodddZeeGjdd@dd ZeeGjdd@dd ZeeGjdd@d d	d
d`dd7dddZeeGjdd@d d	d7d7d
d`d7dddZd`ddddZd`ddddZeexjj dd@dSdSdddd`d`dIdIddd`dddZeexj!e1j"d@dSdSdd`d`dIdId d!d"Z!d#d$ Z#d%d& Z$d'd( Z%d)d* Z&d+d, Z'd-d. Z(d/d0 Z)d1d2 Z*eexj+ eexj, eexj- eexj.dSd3 eexj/jdSd3 eexj0 eexj1dSd3 eexj2dSd3 ej34 r(eexj5dSd3 eexj6 eexj7 eexj8 eexj9j҃ eexj:j҃ eexj; eexj<j= eexj>j҃ eexj?j҃ eexj@ eexjAdSd3 eexjBe# eexje) eexjCe# eexjDe$ eexjEe$ eexjFe$ eexjG eexjH eexjH eexjI eexjJ eexjK eexjLe# eexjM eexjN eexjO eexjP eexjQ eexjR eexjS eexjT eexjUe$ eexjV eexjWe# eexjX eexjY eexjYjZ eexj[ eexj\ eexj] eexj^ eexj_ eexj` eexja eexjb eexjc eexjd eexje eexjf eexjg eexjh eexji eexjj eexjk eexjl eexjm eexjn eexjo eexjp eexjq eexjr eexjs eexjtju eexjv eexjw eexjx eexjy eexjz eexj{ eexj| eexj}j҃ eexj~jdSd3 eexje# eejjj eejjj eejjj eexj eexj eexje$ eexj eexj eexj eexj eexj eexjj eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe* eexjjҐe* eexjjҐe* eexjjҐe* eexj eexjdd@dҐd4d5Zeexjdd6d7d8Zd9d: Zeexd;rPeexje eezjd<d= Zeexjdd@d7d7d>d?d@Zeexjdd@dӐdAdBZdCdD ZeejexjgddddSdEdFdGZeejdԐdHdIZeejdJdK ZeexjdLdM ZeexjdNdO ZeexjddddPdQdRZdSdT Zeexjdd@dUdV ZdWdX ZeejexjgdddddddYdZd[Zd\d] Zd^d_ ZeexjeeZeedZeed Zd`da ZeexjdddddbdcddZeexjdddddbdedfZeexjdddddbdgdhZeezjj҃didj Zeejexjgdkdl Zeexjdd@d֐dmdnZeexjdd@dאdodpZdqdr ZdؐdsdtZdudv ZdِdwdxZeexjdd@dydz Zeexjdd@d{d| Zeexjdd@dڐd}d~ZeexjŃdېddZŐdd ZƐdd Zeexjdd@dܐddZeeGjdd@dݐddZɐdސddZeexjjdSdZeexjjdSdZeexjdd@dd Zeexjdd@dd Zedd Zeexjdd@dߐddZeexjdd@d7dddZdduddd7ddIdddZeexjdd@ddd7ddddZeexjdd@d7dddZeexjdd@d7dddZeexjdd@d7dddZeexjdd@dudd7dIdddZאddd7dIdddZeexjj҃dddddZeexjj҃dddddZeexjj҃ddddddZeexjj҃ddddddZeexjj҃dddddddZeexjj҃dddddddZܐdd Zeezjj҃dd Zd`ddddÜdĐdńZeexjdd@ddƐdǄZdȐdɐdʜdːd̄ZdȐdȐd͜dΐdτZdȐdȐdȐdМdѐd҄ZddԐdՄZdd֜dאd؄Zdِdڄ Zddۜdܐd݄Zdސd߄ Zeezjdd@dddZd`dddd`dddZeezjdd@dd Zdd Zeexjdd@dddZeexjdd@dddZeexjjdSdZeexjdd@dd ZdddZdd Zdd Zdd ZeexjjdSdZeexjdd ZeexjjdSdZeexjdd Zdd Zeexjd d Zeexjdd Zdd Zeexjj҃dddZeexjjdSdZeexjjdSdZeexjdd@dd	d
Zeexjdd@dddZdd ZeexjjdSdZeexjdd@dddZeexj jdSdZeexj dd@dddZ dd Zdd ZdddddZdd Zeexjddd8ddZdd  Zd!d" Zd#d$ Z	d%d& Z
eexjezjgdddSd'd(d)ZeexjdddSd'd*d+Zd,d- Zed.d/ ZeexjjdSdZeexjjdSdZeexjjdSdZeexjdud0d1d2 Zdd3d4Zeexjd5d6 Zeexjdd@dd7d8Zed9d: Zed;d< Zeexjdud0dd=d>Zeexjgdud0d?d@ ZdAdBdCdDdEZeezjgdud0dFdG Z eexj!exjj gdue1j"d|dHdI Zeexj#ezj#gdud0dJdK Z#eexj$ezj$gddd8dLdMZ%eexj&j҃Z'eexj(j҃Z)eexj*j҃Z+eexj,j҃Z-eexj.j҃Z/eexj&ddNdOZ&eexj(ddPdQZ(eexj*dRdS Z*eexj,dd@ddTdUZ,eexj.dd@ddVdWZ.eexj0ddd8dXdYZ0eexj1ddZd[Z2eexj3dd@d d\d]Z4eexj5dd@dd^d_Z6eezj7ed` eexj8edaZ9eexj:edbZ;eexj<edcejddZ=eexj>edeejddZ?eexj@dudfdgZ@eexjYjZdSdZAeexjYjZdd@dddSdhdidjZBeexjYjdd@ddkdlZYddmdnZCdodpdqdrZDeCexjEZEeDexjFZFeCexjGZGeCexjHZHeexjIZIeDexjJZJeDexjKZKeexjLZLeexjMdudsZMeDexjN eDexjO eexjPZPeexjQZQeexjRZReexjSdtduZSeexjTZTeexjUZUeexjVZVeCexjW eCexjXZXeexjYe1j"d@eX eCexjZ eCexj[ eCexj\ eDexjq eexj]dduejdvZ]eexj^dduejdvZ^eexj_dduejdvZ_eexj`dduejdvZ`eexjaZaeexjbZbeexjcea eexjdeb eexjeZeeexjPZPeCexjfZfeexjg eexjhdwduZheexjσ eexjiejdd eexjjee eexjkejdd eexjlejdd eexjmejdd eexjnejddZneexjoejdd eexjpejdd eCexjq eCexjr eCexjs eCexjt eCexju eCexjv eCexjw eCexjx eCexjy eCexjz eCexj{ eCexj| eCexj} eCexj~ eCexj eCexj ddxlmZmZ dydz ZeD ]ZeexeD ],\ZZZeeeeed{ )qDeezeD ],\ZZZeeeeed{ )q)q0eexjje@dudsZeexjje@dudsZeexjj e@duds eexjjeZeexjj e eexjjeZeexjjeM eexjjeM eexjjҐee eexjjҐeP eexjje eexjje eexjje eexjjeZeexjj e eexjjeZeexjeK eexjeE eexjjea eexjjea eexjjeb eexjjeb eexjjea eexjjea eexjjeb eexjjeb eexjef eexjeh eexje d|d} Zeexjjexjje eexjjexjje eexjjexjje eexjjexjje eexjjexjje eexjjexjje d~d Zeexje@ eexjeQ eexjeR eexjeS eexjeT eexjeU eexjeV eexje eexjj e eexjje eexje] eexje^ eexje_ eexje` eexjeM eexjeI eexjeJ eexjeQ eexjeR eexjeT eexjeU eexjeV eexjexj eexjexj eexjexj eexjexj eexjexj eexjdddZeexjjÃdd ZeexjĐjÃdd ZeexjŃdd Ze8ơ D ]"\ZǐZee9eǃeȃ /qNeejɃdd Zeexjdd Zeejhjːj̃dd Zeejhjΐjσdd ZeejhjxjАjуdd ZeejhjӐd80r>eejhjӐjj҃dd ZeejhjxjՃdd6ddZd dl֐mאZ ee׃ ee*dd Zeejhjِjdd@dd Zeejhjِjdd@dd Zeejhjِjdd@dddddZeej|jdd@ddddddZee(dd@dddddZeejhjzjj҃dd Zeejhjِjdd@dd ZddlmZ e  eeGjdd@dd ZddlEmZ eDe ddlEmZ e  e  ddlEmZ e  ddlEmZ e  ejddpddZdS (      )annotationsN)defaultdict)IterableSequence)AnyCallablecastOptionalTYPE_CHECKINGTypeVarUnion)	ParamSpec)patch)counters)associative_scan_op)triton_kernel_wrapper_mutation)canonicalize_dimcanonicalize_dimscheckdtype_to_typeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDget_computation_dtypeis_boolean_dtypeis_float_dtypeis_integer_dtypeNumber)magic_methodsmethod_to_operator)free_unbacked_symbols)
OrderedSet)CeilDivFloorDivIdentityModularIndexing   )import_submodule   )configinductor_primsirtest_operators)decompositionsget_decompositions)	DtypeView
ExpandViewIndexingConstantIRNode	is_tritonOnlineSoftmaxReductionops_wrapperPermuteView	Pointwise	ReductionSqueezeView	TensorBoxvalidate_irView)ceildivdecode_device
is_dynamicis_gpuis_pointwise_useis_view,needs_fallback_due_to_atomic_add_limitationspad_listlike#register_op_dtype_propagation_rules#register_op_requires_libdevice_fp64sympy_productuse_scatter_fallback)opsV)ReductionType_T_Pztorchvision::roi_alignzaten::index_addz8dict[Union[Callable[..., Any], str], Callable[..., Any]]	loweringsz9dict[torch._ops.OpOverload, Optional[Callable[..., Any]]]_maybe_layout_constraintsz2dict[torch._ops.OpOverload, torch._ops.OpOverload]inplaceable_foreach_opsc                  C  s<   t jjjD ],} | jD ] }|jdkr,|jtv s  dS qq
dS )NZcall_functionTF)rI   graphcurrent_nodeusersoptargetforeach_ops)nodeuser rX   T/var/www/html/assistant/venv/lib/python3.9/site-packages/torch/_inductor/lowering.pycur_node_has_non_foreach_usersx   s
    
rZ   z%Iterable[Union[tuple[Any, Any], Any]])	arg_pairsc                 C  s   t t}d}t| D ]\}}t|ts0d}|f}t|  p>tj}d }|D ]}t|trH|j	
 } qfqH|d usvJ d|r|\}|||f ||f q|S )NFTz.foreach op should have at least one tensor arg)r   list	enumerate
isinstancer   r>   r(   Z#combo_kernel_foreach_dynamic_shapesr9   data
get_deviceappend)r[   outZunpack_argsiargsuse_foreachdevicetrX   rX   rY   group_foreach_args   s$    


rh   zCallable[..., Any]zOptional[Callable[..., Any]])fnreturnc                 C  s&   t | tjjsdS | tv r"t|  S dS )zHGet layout constraints. Returns None if there are no layout constraints.N)r^   torch_ops
OpOverloadrN   ri   rX   rX   rY   maybe_layout_constraints   s
    ro   c                 C  sZ   | t jjjkrtS | t jjjkr$tS | t jjjkr6tS | t jjj	krHd S t
d|  d S )NzUnknown layout constraint tag: )rk   Z_CTagZneeds_exact_stridesconstrain_to_fake_tensorsZneeds_contiguous_stridesrequire_contiguous_stridesZneeds_fixed_stride_orderconstrain_to_fx_stridesZflexible_layoutAssertionError)tagrX   rX   rY   tag_to_layout_constraint   s    rv   c                 C  s   | st d| d S )Nzinductor does not support NotImplementedError)condmsgrX   rX   rY   
assert_nyi   s    r{   c                   sX   t  ttttfr dd  D S t  t  tjj	rTt
 fdd  D  d S )Nc                 S  s   g | ]}t |qS rX   )add_needs_realized_inputs.0xrX   rX   rY   
<listcomp>       z-add_needs_realized_inputs.<locals>.<listcomp>c                 3  s   | ]}t  |V  qd S N)getattr)r~   overloadrn   rX   rY   	<genexpr>   s   z,add_needs_realized_inputs.<locals>.<genexpr>)r^   r\   settupler    needs_realized_inputsaddrk   rl   OpOverloadPacketupdate	overloadsrn   rX   rn   rY   r|      s    
r|   c                 C  s8   t | tjjr,|  D ]}|tt| |< qn|t| < d S r   )r^   rk   rl   r   r   rN   r   )ri   
constraintr   rX   rX   rY   add_layout_constraint   s    r   )r   r'   r%                     	   
         intdtypec                 C  s2   t | ts| S | tv s&J d|  dt|  } | S )Nzid z missing from DTYPE_ID_LOOKUP)r^   r   DTYPE_ID_LOOKUPr   rX   rX   rY   decode_dtype   s
    
r   c                 C  sF   t | tr"t|  p t|  S t | tjr8| jdu S t | tS d S )NT)	r^   r9   r   	get_dtyper   sympyExpr
is_integerr   r   rX   rX   rY   is_integer_type   s
    

r   c                 C  s$   t | trt|  S t | tS d S r   )r^   r9   r   r   boolr   rX   rX   rY   is_boolean_type  s    
r   r   type_promotion_kindc                   s0   dd   fdd|D }t |d| i\}}|S )Nc                 S  s<   t | ttjfr| S t|  }tjdg| |  dS d S )Nr'   r   )	r^   r   r   Basiclenget_sizerk   zerosr   )inpdimrX   rX   rY   construct_input  s    z+get_promoted_dtype.<locals>.construct_inputc                   s   g | ]} |qS rX   rX   )r~   argr   rX   rY   r     r   z&get_promoted_dtype.<locals>.<listcomp>r   )r   )r   rd   Zinps_r   rX   r   rY   get_promoted_dtype  s    r   c                 C  sh   t | ttfs| g} nt| } t| D ]<}t |tjjr&| D ] }t||}|tvr@| 	| q@q&| S r   )
r^   r\   r   rk   rl   r   r   r   rM   ra   )aten_fnri   r   Zother_fnrX   rX   rY   get_overloads  s    
r   c                 C  s6   t | tjjr|| jv S t | tjjr2||  v S dS )NF)r^   rk   rl   r   Z_qualified_op_namerm   name)rS   	namespacerX   rX   rY   in_namespace)  s
    
r   z	list[Any]zdict[str, Any]r   z)Optional[ELEMENTWISE_TYPE_PROMOTION_KIND]z tuple[list[Any], dict[str, Any]])rd   kwargs	broadcastr   convert_input_to_boolrj   c                   s  dd t  D }dd  D }|s4|s4 fS |s<|r|rHtjn4dd  D }|dd  D  t|d|i|r |d  n
|d   fd	d
fdd D  fdd D |rtt	t
 fdd|D fdd|D  }t	|d  }	t||d t| D ]\}
}| |
< q.t||t|d  D ]\}}||< qXtt D ],}
t |
 tjrxt |
 |	 |
< qxD ],}t| tjrt| |	|< q fS )Nc                 S  s   g | ]\}}t |tr|qS rX   r^   r9   r~   rc   r   rX   rX   rY   r   8  r   z"transform_args.<locals>.<listcomp>c                 S  s   g | ]\}}t |tr|qS rX   r   r~   kvrX   rX   rY   r   9  r   c                 S  s*   g | ]"}t |ttjfs"t|d r|qS r   )r^   r   r   r   hasattrr~   arX   rX   rY   r   C  s   c                 s  s   | ]}t |d r|V  qdS )r   N)r   r   rX   rX   rY   r   I  r   z!transform_args.<locals>.<genexpr>r   r   c                   s:   t | trt| S t | tjr2tj| j dS | S d S )Nvaluer   rf   )r^   r9   to_dtyper*   Constantr   )r   )rf   r   rX   rY   promoteT  s
    

ztransform_args.<locals>.promotec                   s   g | ]} |qS rX   rX   r   r   rX   rY   r   \  r   c                   s   i | ]\}}| |qS rX   rX   r   r   rX   rY   
<dictcomp>]  r   z"transform_args.<locals>.<dictcomp>c                 3  s   | ]} | V  qd S r   rX   r~   rc   rd   rX   rY   r   c  r   c                 3  s   | ]} | V  qd S r   rX   r~   r   r   rX   rY   r   d  r   )r]   itemsrk   r   extendvaluesr   r`   broadcast_tensorsr\   	itertoolschainr   zipr   ranger^   r*   r   r/   create)rd   r   r   r   r   Zargs_indicesZkwargs_indicesZpromoting_argsZbroadcastedsizerc   r   r   rX   )rd   rf   r   r   r   rY   transform_args1  sT    r   c                   s>   t   fdd}t| }t| tt|| |S )a  
    Add a foreach lowering to lowerings dict.

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                    s*   t | dksJ  | i |}t| |S )Nr%   )r   r:   )rd   r   rb   	decomp_fnrX   rY   wrapped  s    z+_register_foreach_lowering.<locals>.wrapped)	functoolswrapsr   rU   r   rM   dictfromkeys)r   r   r   Zaten_fnsrX   r   rY   _register_foreach_loweringy  s    
r   c                   s<   t  fdd}t  |t | |S )a  
    Add a lowering to lowerings dict

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                    s   t | } t|}d}t| dkrBt| d t tfrBd}t | d } tdd  D srtdd | D rrJ dt| |\} }|r| g} | i |}t	| |S )	NFr'   r   Tc                 s  s    | ]}|t v pt|d V  qdS )Z_c10d_functionalN)	fallbacksr   )r~   ri   rX   rX   rY   r     s   z6_register_lowering.<locals>.wrapped.<locals>.<genexpr>c                 s  s   | ]}|d kV  qdS )rb   NrX   r}   rX   rX   rY   r     r   zout= ops aren't yet supported)
r\   r   r   r^   r   allanykeysr   r:   )rd   r   unpackedrb   r   r   r   r   r   rX   rY   r     s(    
z#_register_lowering.<locals>.wrapped)r   r   r   r   r   r   )r   r   r   r   r   lowering_dictr   rX   r   rY   _register_lowering  s
    r   Fz.Callable[[Callable[_P, _T]], Callable[_P, _T]])r   rj   c                 C  s   t jt| ||||dS )z+
    Shim to support decorator syntax.
    )r   r   r   r   )r   partialr   )r   r   r   r   r   rX   rX   rY   register_lowering  s    r   c                 C  s   g }t jt| t|tjjdD ]\}}tjjj	j
t|dddrP|| q tjjj	j
t|dddrx|| q tjj|| tt|jtt|jk r|| q || q tt|S )z
    Broadcasting logic based on symbolic shapes.

    We give the shapes 0 and 1 concrete values, while all other shapes
    are symbolic sympy formulas.
    )	fillvaluer'   TZsize_oblivious)r   zip_longestreversedr   SOnerI   rP   sizevars	shape_envevaluate_exprEqra   guard_equalsr   expandZfree_symbolsr   )r   boutputr   yrX   rX   rY   broadcast_symbolic_shapes  s    $

 r   c              
     s.  |d u s|d u sJ d|d u r.|d u r.t j}tdd | D sD| S tdd | D r|pft| d|ifdd  fdd	| D S td
d | D }g }| D ]}t|ttfr|	t
tj|| | dt|  qt|tjr|	t
t|| | dt|  q|	| q|S )NzEonly one of override_return_dtype or type_promotion_kind may be givenc                 s  s    | ]}t |tjttfV  qd S r   )r^   r   r   r   floatr}   rX   rX   rY   r     r   z$promote_constants.<locals>.<genexpr>c                 s  s    | ]}t |tttjfV  qd S r   )r^   r   r   r   r   r}   rX   rX   rY   r     r   r   c                   s8   t | tjr tj|  td dS tj|  td dS d S )Nindexr   rf   r   )r^   r   r   r*   r0   r=   r   r   r   rX   rY   
const_func  s
    
z%promote_constants.<locals>.const_funcc                   s   g | ]} |qS rX   rX   r}   )r   rX   rY   r     r   z%promote_constants.<locals>.<listcomp>c                 s  s$   | ]}t |tttjfr|V  qd S r   )r^   r9   r/   r*   r   r}   rX   rX   rY   r     r   r   r   )r   DEFAULTr   r   r   nextr^   r   r   ra   r/   r   r*   r   r   get_device_or_errorr\   r   r   r   r0   )inputsoverride_return_dtyper   exrb   r   rX   )r   r   rY   promote_constants  sL    

	r  c                   s&   d ddd fdd}|S )Nalphar9   r  c              	     s  d ur*t dd D r*r"J  S t
rd| d urp| dkrpttd | d< n| d u spJ dd D d  
pd   dd  D ]B}t|tjst	t	| ksJ d d	 d	|  qt
jt
jftjd uo@ttjd
d d uo@tjjjd uo@tjjjddo@ v  	fdd}sd }D ]$}t| jrj| } qqj|sd  }p|}tj| |dS )Nc                 s  s    | ]}t |tot|V  qd S r   r^   r1   r2   r~   r   rX   rX   rY   r   5  s   z0make_pointwise.<locals>.inner.<locals>.<genexpr>r'   c                 S  s   g | ]}|  qS rX   make_loaderr}   rX   rX   rY   r   B  r   z1make_pointwise.<locals>.inner.<locals>.<listcomp>r   zndim mismatch  rQ   Zlow_precision_pointwise_barrierFc                   s   t  t ks$J d  d tjkrLd urL fddD  S g }tD ]N\}}| }|  }r|v rtj||dd}t||}|| qX| }rtj|dd}t|S |S d S )Nzwrong ndim r  c                   s   g | ]}| qS rX   rX   )r~   loadr   rX   rY   r   Z  r   zCmake_pointwise.<locals>.inner.<locals>.inner_fn.<locals>.<listcomp>F)Zuse_compute_types)r   rk   r   r]   r   rH   r   ra   )r   Zinputs_loadedZ	inp_indexr  rb   Z	inp_dtypeZdowncast)r   emulate_precision_castsri   r  loaders	low_pr_fpoverride_fn_when_input_boolrangesr  rY   inner_fnW  s     $z/make_pointwise.<locals>.inner.<locals>.inner_fnrf   r   r  r  )r   r  r\   mulr   r   r^   r*   BaseConstantr   rk   bfloat16float16rI   rP   r   rQ   metagetr?   r`   typer6   r   )r  r  otherr  rf   rc   allow_alphari   override_devicer  r  triton_fallback)r   r  r  r  r  r  rY   inner4  s\    


zmake_pointwise.<locals>.innerrX   )ri   r  r"  r  r!  r#  r$  rX   r   rY   make_pointwise,  s    "Kr%  c                   s   dddd fdd}|S )Nr'   r  zlist[list[TensorBox]]r  c                   sV  t tjjjdkp$tjjjtv p$t }d }|D ]}t|t	t
fr.|} qJq.|d usZJ dg }|D ]2}t|t	t
fs||gt |  qb|| qbtt| }d gt | }| D ]\\}}	}
g }|
D ]Z\}} r|d| i}n| }|||< tj|tjr|	r|r|  ||  q|rtj| qtdd |D sRJ |S )Nr   z1at least one input must be a list to a foreach opr  c                 s  s   | ]}|d uV  qd S r   rX   r}   rX   rX   rY   r     r   z8make_foreach_pointwise.<locals>.inner.<locals>.<genexpr>)r   rI   rP   rQ   rR   rT   inplace_foreach_opsrZ   r^   r\   r   ra   rh   r   r   has_featureBackendFeatureFOREACHrealizeget_operation_nameregister_operation_listr   )r  r  Zrealize_outputsZa_list_inputinputZbroadcast_inputsgroupsoutputsrf   re   groupoperation_list
output_indrd   r   r!  pw_fnrX   rY   r$    sT    
z%make_foreach_pointwise.<locals>.innerrX   )r4  r!  r$  rX   r3  rY   make_foreach_pointwise  s    6r5  r9   ztorch.dtyper   r   c                   s>   |    kr |rt| S | S  fdd}t| d| S )Nc                   s   t j|  dS )N)	src_dtype)rH   r   r   r   r7  rX   rY   	_to_dtype  s    zto_dtype.<locals>._to_dtyper  )r   cloner%  )r   r   copyr9  rX   r8  rY   r     s
    r   c                 O  s   ddl m} |}| j}||tjd}t| |j|  W d   n1 sN0    Y  |j}|sfJ t|}dgt	| }	|
 D ]`\\}
}}g }|D ]:\}}||	|< tj|
tjr|r|  ||  q|rtj| qtdd |	D sJ |	S )aI  
    This lowers an invocation of foreach_map
    The way this works is that an arbitrary N-arg func is provided by the user, looped over by the
    polyfill with the same semantics as a foreach op (a loop applying an n-ary function to n args)
    and then traced into a subgraph by dynamo.
    This code allows us to inline the subgraph into the main graph lowering using the PontwiseSubgraphLowering.
    The graph outputs represent the vertically fused sequence of ops, and then register_operation_list
    below registers the buffers as horizontally fuseable in the scheduler.
    r'   )PointwiseSubgraphLowering)Zroot_graph_loweringNc                 s  s   | ]}|d uV  qd S r   rX   r}   rX   rX   rY   r     r   z_foreach_map.<locals>.<genexpr>)subgraph_loweringr=  graph_modulerI   rP   Zset_graph_handlerrunZgraph_outputsrh   r   r   r'  r(  r)  r*  ra   r+  r,  r   )Zsubgraphrd   r   r=  r  ZgmZpw_subgraphZsub_outputsr.  r/  rf   re   r0  r1  r2  r   rX   rX   rY   _foreach_map  s0    (rA  c                 C  sZ   |j s|  j rL|  r6t| |d}tj||  |S ttj	j
dd| |S t| |ddS )Nr   Fadd_to_fallback_setTr<  )
is_complexr   r   
empty_liker*   ZInplaceCopyFallbackr   fallback_handlerprimsconvert_element_typedefaultr   )r   r   dstrX   rX   rY   _convert_element_type  s    rL  rD  c                C  sf   |   }||kr |rt| S | S dd }||}||}||krRttjj| |S tt| |S d S )Nc                 S  s"   | j rt| jS t| jS d S r   )is_floating_pointrk   Zfinfobitsiinfor   rX   rX   rY   _get_primitive_bitwidth	  s    z1to_dtype_bitcast.<locals>._get_primitive_bitwidth)	r   r;  rG  atenviewr   r9   r.   r   )r   r   r<  Zx_dtyperP  Zsrc_bitsZdst_bitsrX   rX   rY   to_dtype_bitcast  s    rS  c                 C  s8   |j s|  j r.ttjtjjj	j
| |S t| |S r   )rE  r   r9   r   r*   ZComplexViewrk   rH   rQ  rR  r   rS  r6  rX   rX   rY   _view_dtype  s
    rT  r<  non_blockingztorch.device)r   rf   c                C  s:   t |}|  |kr$|r t| S | S ttj| ||S r   )r=   r`   r;  r9   r   r*   Z
DeviceCopy)r   rf   r<  rV  rX   rX   rY   	to_device!  s    rW  c                 C  s   t | |d|dS )NTrU  )rW  )r   rf   rV  rX   rX   rY   _device_put(  s    rX  Tc	           
      C  sz   |p| j }t|}	t||| |dur.t|}t|	||||d}	t| |||d|	}	tt|rvttt|d|d|	 |	S )z3A pointwise function that maps ops.{name} to inputsN)r  r  r!  r#  )r   r   r   )r   r   )__name__r4   rD   r%  r   r   rH  r   )
r   r   r   r   r   r  r  r!  r#  ri   rX   rX   rY   register_pointwise-  s>    

rZ  c                    sx   d} t d  fdd} fdd}t|t|tjdgfdd}ttj|}tt| rttt	t| d	d
| |S )z2A pointwise function that maps ops.frexp to inputsfrexpc                    s    | i |d S Nr   rX   rd   r   r[  rX   rY   frexp0_  s    zregister_frexp.<locals>.frexp0c                    s    | i |d S Nr'   rX   r]  r^  rX   rY   frexp1b  s    zregister_frexp.<locals>.frexp1r:  c                    s$    d | i | d | i |fS Nr   r'   rX   r]  )pw_fnsrX   rY   ri   j  s    zregister_frexp.<locals>.fnNr   )
r4   r%  rk   int32r   rQ  r[  r   rH  r   )r   r_  ra  ri   rX   )r[  rc  rY   register_frexpZ  s*    
re  c                 C  s   t ||d}t| |}|S )Nr!  )r5  r   )r   Zpointwise_lowering_fnr!  ri   rX   rX   rY   register_foreach_pointwise|  s    
rg  )r   r   c                   s  dd }t |ttfr"t||}t |ttfr<t||}| ||g t d  d tjd}dd t D }t|t	 fdd|D  D ]\}}| |< qt
t D ]8}t  | tjrt | t |d	    |< qt||d
 d	 t d |t d |S )Nc                  W  s
   t j|  S r   )rH   wherer   rX   rX   rY   ri     s    zwhere.<locals>.fnr'   r%   r   c                 S  s   g | ]\}}t |tr|qS rX   r   r   rX   rX   rY   r     r   zwhere.<locals>.<listcomp>c                   s   g | ]} | qS rX   rX   r   r   rX   rY   r     r   r   r:  )r^   r   r   constant_liker   r   r   r]   r   r   r   r   r*   r   r/   r   r\   r   r%  r   )ry   r   r   ri   r   indicesrc   r   rX   r   rY   rh    s$    
$
&
rh  c                  G  s   t | dkr*t| d ttfr*t| d  S ttdd | D g }g }| D ]H}| }t |t |ks~t	dd t
||D rt||}|| qJ|S )Nr'   r   c                 S  s   g | ]}|  qS rX   )r   r}   rX   rX   rY   r     r   z%broadcast_tensors.<locals>.<listcomp>c                 s  s   | ]~\}}t jjjjt|d ddrDt jjjjt|d dd p|t jjjjt|d dd o|t jjjjt|d ddV  qdS )r'   Tr   N)rI   rP   r   r   r   r   r   r~   r   r   rX   rX   rY   r     s   

	

z$broadcast_tensors.<locals>.<genexpr>)r   r^   r\   r   r   r   reducer   r   r   r   r   ra   )r  rT   r/  r   sizesrX   rX   rY   r     s    
r   c                 C  s   | S r   rX   r   rX   rX   rY   nop  s    rn  
lift_freshc                 C  s   t | tsJ |d u r&tt| jS t |ttjfrDtj	j
|ntdd |D }tt|  |}tt |tsz|fn|}g }t|  D ]4\}}||v rtj	j
jt|ddds|| q||  krt| |S | S )Nc                 s  s   | ]}t jj|V  qd S r   rI   rP   r   evaluate_static_shaper~   drX   rX   rY   r     r   zsqueeze.<locals>.<genexpr>r'   Tr   )r^   r9   r8   r   r_   r   r   r   rI   rP   r   rq  r   r   r   r   r    r]   r   r   ra   rR  )r   r   dims	new_shapers  srX   rX   rY   squeeze  s"    rw  c                 C  s   t t| |S r   )r;  rw  )r   r   rX   rX   rY   squeeze_copy  s    rx  c                 C  s2   t | |}t| tsJ t|ts&J |j| _| S r   )rw  r^   r9   r_   r   r   valrX   rX   rY   squeeze_  s
    
r{  c                 C  s2   t | rt| dtjdS td}t|tjd| S )NFr   isinfr:  r   	full_likerk   r   r4   r%  r   ri   rX   rX   rY   r|    s    r|  c                 C  s2   t | rt| dtjdS td}t|tjd| S )NFr   isnanr:  r}  r  rX   rX   rY   r    s    r  c                 C  s$   t | rt| S td}t|| S )Nceilr   r;  r4   r%  r  rX   rX   rY   r    s    r  c                 C  s$   t | rt| S td}t|| S )Nfloorr  r  rX   rX   rY   r    s    r  c                 C  s(   t | rt| S td}t|| S d S )Nroundr  r  rX   rX   rY   r    s    r  c                 C  s$   t | rt| S td}t|| S )Ntruncr  r  rX   rX   rY   r    s    r  c                 C  s   t | g\} t| tjr(t| t|S t| ts6J t|ttfsHJ t| 	 t|kr`| S t
| 	 stjjt| 	 }|dkrt
|s| tjjt||  tt| jt|S r\  )r  r^   r*   r  r/   r   r   r9   r\   r   r   rI   rP   r   size_hint_or_throwrF   
mark_reuser_   )r   rm  Zx_size_productrX   rX   rY   r   %  s$    
r   c                 C  sL   t |}|D ]}d||< q| }t|D ]\}}|dkr&t||}q&t||S Nr  )r\   r]   	unsqueezer   )r   shapeZbroadcast_dimensionsrv  Zbroadcast_dimensionr   idxr   rX   rX   rY   broadcast_in_dim@  s    
r  c                 C  s   t | | S r   )r   r   )r   r   rX   rX   rY   	expand_asN  s    r  c                   sh  t |   tt krHtjjgtt      t| t  } tt|  ks`J t |  }d}ttD ](}| dkrd}|| |  ||< q||rt|| 	 | 
 dS tdd t D rtt| |S  fdd}t sBt|sBtjjt }|dkrB| tjjt||  |  tj| 
 | 	 |t |d	S )
NFr   Tr   rf   c                 s  s"   | ]\}}|d kp|d kV  qdS r'   NrX   rk  rX   rX   rY   r   e  r   zrepeat.<locals>.<genexpr>c                   st   t | t ksJ t| } tt D ]B}| dkr( | dkrRtjj| |< q(t| | d | | |< q(| S r`  )r   r\   r   r   r   ZZeror$   )r   rc   Zold_sizerepeatsx_loaderrX   rY   r  j  s    zrepeat.<locals>.inner_fnr  )r\   r   r   r   r   r   rR  r   emptyr   r`   r   r   r;  r   r   rI   rP   r   r  rF   r  r  r6   r   )r   r  new_sizeZzero_tensorrc   r  Zold_size_productrX   r  rY   repeatS  s>    
r  c                 C  s2   t | tsJ t |ttfs J tt| j|S r   )r^   r9   r\   r   r;   r   r_   )r   rm  rX   rX   rY   rR    s    rR  c                 C  s6   t | tsJ t |ttfs J tt| jt|S r   )r^   r9   r\   r   r5   r   r_   )r   rt  rX   rX   rY   permute  s    r              c              	   C  s8   t | tsJ t| |d}ttjj| j|||||dS )Nr   clamp)r^   r9   _validate_dimr*   	SliceViewr   r_   )r   r   startendstepr  rX   rX   rY   slice_  s    r  c              	   C  s   t | tr"t | jtjr"| j } |   t| sDtd|  dt	| \}}t
|j|jdd |D dd |D t|pd}ttj||dS )Nzunrealized as_strided(z, ...)c                 S  s   g | ]}t |qS rX   r   r   r~   rv  rX   rX   rY   r     r   zas_strided.<locals>.<listcomp>c                 S  s   g | ]}t |qS rX   r  r  rX   rX   rY   r     r   r   r_   layout)r^   r9   r_   r*   BaseViewunwrap_viewr*  is_storage_and_layoutrx   as_storage_and_layoutFixedLayoutrf   r   r   r   ReinterpretView)r   r   stridestorage_offsetstorageZ
old_layoutZ
new_layoutrX   rX   rY   
as_strided  s    

r  c                 C  s$   t | tsJ t| |||j| _| S r   )r^   r9   r  r_   )r   r   r  r  rX   rX   rY   as_strided_  s    r  c                 C  s   t | |||}t|S r   )r  r;  )r   r   r  r  resultrX   rX   rY   as_strided_copy  s    r  c                   s   g d}D ]*} |||    f d d }qdd D  fdd}td  }d d | < tjd  d  ||dS )Nr   r  c                 S  s   g | ]}|  qS rX   r  r
  rX   rX   rY   r     r   z!pointwise_cat.<locals>.<listcomp>c           
   	     sB  t |  tj}g }g }ttD ]܉  dkr@t dtjnt   d tj}t   d tj}t ||}t ||} dkr|}n" td kr|}nt 	||}|
| t| t   d  < |
t | fddd q&|d }	ttd ddD ] t |  |  |	}	q |	S )Nr   r'   c                     s     S r   rX   rX   )rc   idx_loadinputs_loadersrX   rY   <lambda>  r   z1pointwise_cat.<locals>.inner_fn.<locals>.<lambda>        r  r%   )rH   
index_exprrk   int64r   r   constantgeltand_ra   r\   r#   maskedrh  )
r  idx_dimZmasksZmasked_loadsr  r  Z
start_condZend_condmaskZnext_valr   r  r  Zinputs_ranges)rc   r  rY   r    sD    
zpointwise_cat.<locals>.inner_fnr  )ra   r   r\   r6   r   r`   r   )r  r   Zprev_endr   r  r  rX   r  rY   pointwise_cat  s    0

r  )r-  scaleszero_pointsaxis	quant_min	quant_maxr   rj   c              	     s   t  dksJ dt  dks0J d|  tjkrJt| tj} |  tjksjJ d|    t |  k sJ dt |   |     f	dd}tj	| 
 ||  dS )	Nr'   expect scales 1 dimexpect zero_points 1 dim<Expecting input to have dtype torch.float32, but got dtype: Expecting axis to be < c           
        s   |   f}| }|}|}t tjd\}}jtjkrPt|tj}jtjkrjt|tj}t|}t|| | }t	|t
||}	t|	S Nr   )_create_constantsrk   float32r   rH   r   rd  
reciprocalr  maximumminimum)
r  channel_idxr-  scale
zero_pointqminqmax	inv_scalerz  clamped	r  r   input_loaderr  r  r  scales_loaderr  zero_points_loaderrX   rY   r    s    

z;quantized_decomposed_quantize_per_channel.<locals>.inner_fnr  )r   r   r   rk   r  r   r  r  r6   r   r`   )r-  r  r  r  r  r  r   r  rX   r  rY   )quantized_decomposed_quantize_per_channel  s(    
r  	out_dtypezOptional[torch.dtype])	r-  r  r  r  r  r  r   r  rj   c          	        s   t  dksJ dt  dks0J d|  |ksTJ d| d|    t |  k szJ dt |   d u rtj|     fdd}tj|  ||  d	S )
Nr'   r  r  Expecting input to have dtype , but got dtype: r  c                   s   |   f}| }|}|}j tjkr<t|tj}j tjkrVt|tj}tt|tj|| }t|}|S r   )r   rk   r  rH   r   sub)r  r  r-  r  r  rz  r  r  r  r  r  r  r  rX   rY   r  U  s    
z=quantized_decomposed_dequantize_per_channel.<locals>.inner_fnr  	r   r   r   rk   r  r  r6   r   r`   )	r-  r  r  r  r  r  r   r  r  rX   r  rY   +quantized_decomposed_dequantize_per_channel7  s(    r  r   )r-  r  r  r  r  r   rj   c                   s   |   tjkrt| tj} |   tjks:J d|    |   fdd}tj|   t	j
|t|t|d|  dS )Nr  c           	        sf   | }t d| |tjd\}}t|| | }t tjd\}}tt|||}t| S )N      ?r   )r  rk   r  rH   r  r  r  r   )	r  r  r  r-  r  rz  r  r  r  r   r  r  r  rX   rY   r    s    
zBquantized_decomposed_quantize_per_tensor_default.<locals>.inner_fnr  r  r  )r   rk   r  r   r  r  r6   r   r`   r   r   r   r   r   r-  r  r  r  r  r   r  rX   r  rY   0quantized_decomposed_quantize_per_tensor_defaultl  s    
r  )r-  r  r  r  r  r   r  rj   c                  sv   |   |ks$J d| d|    d u r2tj|    fdd}tj|  tj|t	|t
|d|  dS )Nr  r  c                   sF    | }t ||tjd\}}tt|tj|| }t|}|S r  )r  rk   r  rH   r  r   )r  r  r  r-  rz  r  r  rX   rY   r    s
    zDquantized_decomposed_dequantize_per_tensor_default.<locals>.inner_fnr  r  )r   rk   r  r  r6   r   r`   r   r   r   r   r   r-  r  r  r  r  r   r  r  rX   r  rY   2quantized_decomposed_dequantize_per_tensor_default  s    r  c                   s   |   tjkrt| tj} |   tjks:J d|    t dksrt dkrj d dksrJ dt dkst dkr d dksJ d|     fdd}tj	| 
  ||  dS )	Nr  r   r'   expect scale as scalar tensor"expect zero_point as scalar tensorc                   s   | }t  dkrdnd}t  dkr:dnd}jtjkrZt|tj}jtjkrtt|tj}t|t| | }t	tjd\}}t
t|||}t| S )Nr'   r   rX   r   )r   r   r   rk   r  rH   r   r  r  r  r  r  )r  r-  _scale_zero_pointrz  r  r  r  r   r  r  r  r  scale_loaderr  zero_point_loaderrX   rY   r    s    zAquantized_decomposed_quantize_per_tensor_tensor.<locals>.inner_fnr  )r   rk   r  r   r  r   r   r  r6   r   r`   r  rX   r  rY   /quantized_decomposed_quantize_per_tensor_tensor  s:    r  c                  s   t  dks8t  dkr0 d dks8J dt  dkspt  dkrh d dkspJ d|  |ksJ d| d|   d u rtj|      fdd}tj|  ||  d	S )
Nr   r'   r  r  r  r  c                   s    | }t  dkrdnd}t  dkr:dnd}jtjkrZt|tj}jtjkrtt|tj}tt|tj|| }t|}|S )Nr'   r  rX   )r   r   r   rk   r  rH   r   r  )r  r-  r  r  rz  r  r  r  r  r  r  rX   rY   r    s    zCquantized_decomposed_dequantize_per_tensor_tensor.<locals>.inner_fnr  r  r  rX   r  rY   1quantized_decomposed_dequantize_per_tensor_tensor  s:    r  c           
        s*  | d   jdk}|rttdd | D rt| D ]}|  q,tdd | D rbttjg| R  \} }ttjj| |S t	| dkrt
| d S t| d |d}t| dtjifdd	| D } d
dddddd fddtfdd| D }ddfddtjr"t| |S |r:ttj| |S fddd}d ddddt	| |kst	| tjkrt fd d| D rtfd!dtjjD }tfd"d| D o|}tfd#d| D otfd$d| D  }	|s|	r|st| |S ttj| |S )%Nr   cpuc                 s  s"   | ]}|  tjtjfv V  qd S r   )r   rk   int8uint8r~   r-  rX   rX   rY   r     s   zcat.<locals>.<genexpr>c                 s  s   | ]}t | d kV  qdS )r   N)r   r   r  rX   rX   rY   r   &  r   r'   r   c                   s   g | ]}t | qS rX   r   r
  r   rX   rY   r   1  r   zcat.<locals>.<listcomp>zUnion[TensorBox, ir.StorageBox]	ir.IRNoder   rj   c                 S  s>   t | tr(t | jtjr"| j S | jS t | tjr:| jS | S r   )r^   r9   r_   r*   r  r  
StorageBoxr   rX   rX   rY   unwrap_tensor3  s    

zcat.<locals>.unwrap_tensorc                 S  s   t | tjot | jtjS r   )r^   r*   ComputedBufferr_   r7   rg   rX   rX   rY   is_reduction?  s    zcat.<locals>.is_reductionc                   sJ   t | ttjfr | S | pHt | tjoHt fdd|  D S )Nc                 3  s   | ]} t j|V  qd S r   )rI   rP   
get_buffer)r~   readcan_fuse_reductionrX   rY   r   H  s   z2cat.<locals>.can_fuse_reduction.<locals>.<genexpr>)r^   r9   r*   r  r6   r   get_read_namesr   )r  r  r  rX   rY   r  B  s    zcat.<locals>.can_fuse_reductionc                 3  s   | ]} |V  qd S r   rX   r~   rg   r  rX   rY   r   O  r   r   )rj   c                   sZ   t | r*t j| dd\}}t j| S t| tt jfrF | S t| t jrVdS dS )NF)freezeT)	r*   r  r  ConcatKernelZcan_realize_into_without_copyr^   r9   r  r6   )r   r  r   )should_lower_cat_inputr  rX   rY   r
  Q  s    
z#cat.<locals>.should_lower_cat_inputc                   s\   t | ttjfr | S t | tjs,dS |  j}|  D ]}| tj	
|7 }q>|S r\  )r^   r9   r*   r  r6   Zinner_fn_opcountZnum_opsr  rI   rP   r  )r   countr  )op_countr  rX   rY   r  h  s    
zcat.<locals>.op_countr   r%   torch._ops.OpOverloadrS   c                 S  s   | t jjt jjfv S r   )rQ  catrJ  constant_pad_ndr  rX   rX   rY   additional_pointwise_ops}  s    z%cat.<locals>.additional_pointwise_opsc                 3  s   | ]}| kV  qd S r   rX   r  )MAX_SIMPLE_OP_COUNTr  rX   rY   r     r   c                 3  s   | ]}t | V  qd S r   )r@   )r~   Zuse)r  rX   rY   r     s   c                 3  s   | ]} |V  qd S r   rX   r
  r
  rX   rY   r     r   c                 3  s   | ]} |V  qd S r   rX   r
  r  rX   rY   r     s   c                 3  s   | ]} |V  qd S r   rX   r  r  rX   rY   r     r   )r`   r  r   r*  require_channels_lastrQ  r  rG  rJ  r   r;  r  r   r   r   r   r(   Zforce_pointwise_catr  r9   r*   r	  r   Zmax_pointwise_cat_inputsrI   rQ   rR   )
r  r   Z
cpu_devicer-  r   Zfusable_reductionZMAX_COMPLEX_POINTWISE_CATZpointwise_usesZfuse_pointwise_useZhorizontal_fuse_catrX   )r  r  r  r   r  r  r
  r  rY   r    sb    



r  )offsetdim1dim2c                   s  |   ttdtdtkfdd tjjt	|d}|rtjj
tjj |  d}n(tjj
tjj  | d}d |r| df nd|f fddtD }||  fdd	}ttj| ||S )
N)r  rankc                     s   d  d S )Nz(diagonal dimensions cannot be identical z, rX   rX   r  r  rX   rY   r    r   zdiagonal.<locals>.<lambda>r   )r   r   c                   s    g | ]\}}| fvr|qS rX   rX   )r~   rc   rv  r  rX   rY   r     r   zdiagonal.<locals>.<listcomp>c                   s   | d }dgt  }d}tD ]L}|kr@| d  ||< q"|krZ| d  ||< q"| | ||< |d7 }q"|t d ksJ |S )Nr  r   r'   r%   )r   r   )r  Zdiag_idxZoriginal_idxZcur_dimrs  Zbase_idxr  r  Znum_dimsZoriginal_shaperX   rY   	reindexer  s    
zdiagonal.<locals>.reindexer)r   r   r   r   rI   rP   r   r   r   LtZevaluate_maxZevaluate_minr]   ra   r9   r*   GenericViewr   )r-  r  r  r  Zoffset_negativeZ	diag_sizerm  r  rX   r  rY   diagonal  s:    
r  c                 C  s   t t| |||S r   )r;  r  )r-  r  r  r  rX   rX   rY   diagonal_copy  s    r  c                 C  s$   t | }t||||}t|| |S r   )r;  r  	mutate_to)r-  srcr  r  r  r   rT   rX   rX   rY   diagonal_scatter  s    
r"  c                 C  s,   t ||  | }tt| |||d |S r`  )r;   Zhandle_negative_indexr   rw  r  )r   r   r  rX   rX   rY   select  s    r#  c           
   
   C  s   t | |d}|}t|ttfsd|  | }tjjt	|| d |}|g| }||d |  |d< g }d}|D ](}|| }	|
t| |||	dd |	}qp|S )Nr   r'   r  Fr  )r  r^   r\   r   r   rI   rP   r   rq  r"   ra   r  )
r   rm  r   Zsizes_x_sizechunksr  r  r   r  rX   rX   rY   split  s     
r&  c                 C  s   t | ||S r   )r&  )r   rm  r   rX   rX   rY   split_with_sizes   s    r'  c                   s>   t  d tjj   } fddt|D }|S )Nr   c                   s   g | ]}t  |qS rX   )r#  r   r   r   rX   rY   r   	  r   zunbind.<locals>.<listcomp>)r  rI   rP   r   rq  r   r   )r   r   r$  r  rX   r(  rY   unbind  s    r)  c                   s   |   }t|}t|| |dkr4tt| d|dS |  }tjj}||| |	d t
|| d }||dkr| |t|| | g |d   || d d  |}	 fdd}
ttj| |	|
S )Nr   )r  r'   c                   s:   | d |     }g | d   ||  d d R S )Nr  r'   rX   )r  Zdim_idxr   r  rX   rY   r  #  s    zunfold.<locals>.reindexer)r   r   r   r  r  rI   rP   r   	guard_leqguard_ltr"   r  r  r!   r9   r*   r  r   )r   	dimensionr   r  rm  ndimdim_sizer   Znew_dim_sizeout_sizer  rX   r*  rY   unfold  s"    
(r1  c                 C  s2   t | |d}t|  }||tjj t| |S r`  )r  r\   r   insertr   r   r   rR  )r   r   ru  rX   rX   rY   r  *  s    r  c                 C  s2   t | |}t| tsJ t|ts&J |j| _| S r   )r  r^   r9   r_   ry  rX   rX   rY   
unsqueeze_2  s
    
r3  c                 C  sX   t jjjt|}t|  }|dk r6||| 7 }d|  krN|| k sTn J |S r\  )	rI   rP   r   r   r   r   sympifyr   r   )r   r   r  r.  rX   rX   rY   r  ;  s    r  r  c                 C  sT   t | |d}tjj|  | d }t| |d|}t| |||d }t|t|S )Nr   r%   )	r  rI   rP   r   rq  r   r  r  sigmoid)r   r   Znew_lenr   r   rX   rX   rY   gluD  s
    r6  c                   s$   |rt    fdd}d|_|S )Nc                    s*   dd }t |tjj g| R i |S )Nc                 S  s   t | tjrt| S | S r   )r^   r*   r1   r9   r   r   rX   rX   rY   wrap_tensorsS  s    z7fallback_handler.<locals>.handler.<locals>.wrap_tensors)pytreetree_mapr*   FallbackKernelr   )rd   r   r7  kernelrX   rY   handlerR  s    z!fallback_handler.<locals>.handlerT)r   r   Z_is_fallback_handler)r<  rC  r=  rX   r;  rY   rG  N  s
    
	rG  c                   C  s   t d d S )NzjTorchinductor does not support code generation for complex operators. Performance may be worse than eager.)warningswarnrX   rX   rX   rY   _warn_complex_not_supported`  s    r@  ztorch.Tensorr   c                 C  s   |   rt  dS | jrdS | jtjkr~|s0dS t|jtjj	rb|jt
jjt
jjt
jjt
jjfv pzt|jtjj	ozt|j S dS )z0Do not support reading or writing to this tensorTF)rE  r@  is_metar   rk   Zfloat8_e8m0fnur^   rT   rl   rm   rQ  rR  r  rJ  r;  Z
_scaled_mmrA   )rg   rV   rX   rX   rY   unsupported_input_tensori  s(    rB  c                 C  sL   t jjtjjjjf}|dur2|j|v r2| 	 r2dS t
| |r@dS | joJtjS )z2Do not support writing tensor but can read from itNFT)rQ  rR  r   rk   rH   rH  rI  rJ  rT   rE  rB  Zis_cpur(   Zdisable_cpp_codegen)rg   rV   Zsupported_complex_viewsrX   rX   rY   unsupported_output_tensor  s    

rC  ztorch.fx.NoderV   c                   sv    j tjju rdS  jdkr dS  j tjju r2dS  fdd}tj ji  j	D ]}||ddrR dS qR| ddS )NFplaceholderc                   sp   t | tjjsdS d| jvr dS t| jd D ]:}t |tjjsDq0|rZt	| rj dS q0t
| r0 dS q0dS )NFrz  T)r^   rk   fxNoder  r8  tree_leavesZ_subclassesZ
FakeTensorrC  rB  )Zinp_out_node	is_outputr  rD  rX   rY   check_skip_condition  s    


zCfallback_node_due_to_unsupported_type.<locals>.check_skip_condition)rI  T)
rT   rQ  view_as_complexrJ  rS   lift_fresh_copyr8  Zarg_tree_leavesrd   r   )rV   Zallow_cpu_inputsrJ  r   rX   rD  rY   %fallback_node_due_to_unsupported_type  s    
rM  c                   s   | t vs|sJ d|  |r|ttdr|t| gr|tjrJ| tjj	j
v s||s|tjjjrldtjj_td td|  d fdd}t| tjjr|  D ]}t| |}|| qn8t| tjjtjjfr||  ntd	|  d
t|  d S )Nz*both a fallback and a decomp for same op: CIFznA make_fallback error occurred in suppress_errors config, and suppress_errors is being disabled to surface it.zmake_fallback(a.  ): a decomposition exists, we should switch to it. To fix this error, either add a decomposition to core_aten_decompositions (preferred) or inductor_decompositions, and delete the corresponding `make_fallback` line. Get help from the inductor team if unsure, don't pick arbitrarily to unblock yourself.c                   s.   t |   d urt|   t| d dt| S Nr   )r|   r   r   rG  )op_overloadlayout_constraintrX   rY   register_fallback  s    

z(make_fallback.<locals>.register_fallbackzUnsupported fallback z with type )r,   r   osgetenvr-   r(   fallback_randomrk   _decompZdecompositions_for_rngZextra_random_decompsZ_dynamoZsuppress_errorslogwarningrt   r^   rl   r   r   r   rm   ZHigherOrderOperatorRuntimeErrorr  )rS   rR  r?  Zoverride_decomprS  olrP  rX   rQ  rY   make_fallback  s>    	




r\  c                 C  s$   d}| D ]}|| }qt |tjdS )z
    TorchInductor offset calculation differs from PyTorch eager offset
    calculation for random ops (tl.rand vs torch.rand). In future, we should
    strive for same impl for tl.rand and torch.rand.
    r'   r   tensorrk   r  )r  Znumelrv  rX   rX   rY   philox_rand_offset  s    
r_  c           	        sd   t | | t j|  | |  fdd}tj| |t| d}t	| }||fS )Nc                   sV   t g tj}t g tj}t t | tj|}t ||}t | S r   )rH   r   rk   rd  r   r  rand)r   Zseed_index_exprZoffset_index_exprZrand_index_exprr  r   Zoffset_loader
random_posseed_loaderrX   rY   r    s    zphilox_rand.<locals>.inner_fnr  )
r*   r  FlexibleLayoutcontiguous_stridesmake_indexerr  r6   r   r\   r_  )	r   seedr  r  rf   r   r  Zrandom_values_nodeZoffset_noderX   ra  rY   philox_rand  s"    
rh  c              	   C  s2   t jr&ttjtjtj	j
| ||S tdd S )Nz&should be handled in replace_random.py)r(   rV  r8  r9  r9   r   r*   r:  rQ  native_dropoutrJ  rt   )r   ptrainrX   rX   rY   ri  "  s    ri  c                 G  sj   t js |  tdks J d|   t|dksBt|d trJt	j
jnt	j
j}tj|| g|R   | S )Nr  Tthis should be handled in decomps unless config.fallback_random or the device is CPUr   )r(   rV  r`   rk   rf   r*  r   r^   r   rQ  
bernoulli_Tensorr*   ZInplaceBernoulliFallback)r   rd   rP  rX   rX   rY   rm  -  s    

rm  c                 G  s4   t js |  tdks J dtt| g|R  S )Nr  rl  )r(   rV  r`   rk   rf   rm  r;  )r   rd   rX   rX   rY   bernoulli_p<  s    ro  c                 C  s   t d S r   rt   r   rX   rX   rY   _foobarE  s    rr  c                 C  s   t d d S )Nz1using triton random, expect difference from eager)rX  info)saltrX   rX   rY   _warn_triton_randomJ  s    ru  c                   C  s   t tjj d S r   )ru  rI   rP   Zcreation_timerX   rX   rX   rY   warn_triton_randomO  s    rv  c                  O  sJ   | dd d urt| i |S tjr>|dd  t| i |S tdd S N	generatorz-should have been handled in replace_random.py)r  fallback_rand_generatorr(   rV  popfallback_rand_defaultrt   r]  rX   rX   rY   r`  [  s    r`  c                  O  sJ   | dd d urt| i |S tjr>|dd  t| i |S tdd S rw  )r  fallback_randn_generatorr(   rV  rz  fallback_randn_defaultrt   r]  rX   rX   rY   randne  s    r~  c                 C  s   t |}t j| |S r   )r*   get_stride_orderExternKernelrequire_stride_order)Zinput_tensorr  stride_orderrX   rX   rY   inductor_force_stride_ordero  s    
r  rf   c                 C  s   t dd S )Nz.should be handled in fuse_seed_creation_pass()rp  r  rX   rX   rY   inductor_seedu  s    r  c                 C  s   t   tt| t|S r   )rv  r9   r   r*   ZRandomSeedsr=   )r  rf   rX   rX   rY   inductor_seedsz  s    r  c                   s(    fdd}t j  |g dS )Nc                   s   t   S r   )rH   Z	load_seedget_namerq  r   seedsrX   rY   r    s    z&inductor_lookup_seed.<locals>.inner_fnr  )r6   r   r`   r   )r  r   r  rX   r  rY   inductor_lookup_seed  s    r  r  z	list[int]str)r   rg  moder  c                  s   t jr
J  dv sJ g | } tj}| }tj||| tj| |d	 |
  fdd}tj|||g | d}|  |S )N)r`  r~  r  c                   s"   t t g t| tjS r   )r   rH   r  rk   rd  r  r  rb  rc  rX   rY   r    s    z!inductor_random.<locals>.inner_fnr  )r(   rV  rk   r  r  r*   r  rd  re  rf  r  r6   r   r*  )r   rg  r  r  r   rf   r  r  rX   r  rY   inductor_random  s$    

r  )lowhighr   rg  r  c                  sp   t jr
J g |}tj}| }tj|||tj||d	 |
  fdd}tj|||g |dS )Nr  c              	     s6   t g t | tjt tjt  tjS r   )rH   Z	randint64r  rk   rd  r  r  r  r  rb  rc  rX   rY   r    s    z"inductor_randint.<locals>.inner_fnr  )r(   rV  rk   r  r  r*   r  rd  re  rf  r  r6   r   )r  r  r   rg  r  r   rf   r  rX   r  rY   inductor_randint  s    

r  z.tuple[str, sympy.Expr, sympy.Expr, sympy.Expr])tbrj   c                 C  s4   |   |  d |  d |  d  |  d fS Nr  r   )r  r   
get_strider  rX   rX   rY   _boundaries_helper  s
    

r  ztuple[str, sympy.Expr]c                 C  s   |   |  d fS r  )r  r  r  rX   rX   rY   _sorter_helper  s    r  	out_int32rightsidesorterzOptional[str]zOptional[TensorBox])sorted_sequenceselfr  r  r  r  rj   c          
        s   dd }|r(||r(d urH|sHt tjjdd|||dS |d ur\|dkr\d|rftjntj |   d ur  t	
 dkr fd	d
}n fdd
}| }tj| ||jd}	|	  |	S )Nc                 S  s   t j| tjS r   )rI   rP   r'  r(  	BUCKETIZEr  rX   rX   rY   r    s   zsearchsorted.<locals>.<lambda>FrB  r  r  Tr'   c              	     s@   | }t j|td d u r&d ntd u r8d nddS )Nr   r  Zsorter_indicesrH   	bucketizer  r  )r  rz  index_dtyper  r  r  Zvalues_loaderrX   rY   r  	  s    zsearchsorted.<locals>.inner_fnc              	     s\    }dd fdd}t j|t|d u r>d ntd u rPd n|dS )Nr9   r  c                   s>   |   }tttjdd t|d d  d d D S )Nc                 s  s   | ]\}}|| V  qd S r   rX   )r~   rv  rc   rX   rX   rY   r   	  r   zNsearchsorted.<locals>.inner_fn.<locals>.get_flattened_index.<locals>.<genexpr>r  )r  rH   r  r   rl  operatorr   r   )r  strides)r  r  rX   rY   get_flattened_index	  s    &z;searchsorted.<locals>.inner_fn.<locals>.get_flattened_indexr  r  )r  rz  r  r  r  rY   r  	  s    	r  )rG  rQ  searchsortedrn  rk   rd  r  r  r*  r   r   r`   r6   r   r  )
r  r  r  r  r  r  Zvalidate_bucketizer  rf   r  rX   r  rY   r    sH    
r  r  r  )r-  
boundariesr  r  c                  s   t   dksJ tj| tjr4tj tjsPttj	j
dd|  |dS    |  }|  |rrtjntj fdd}tj|||  d}|  |S )Nr'   FrB  r  c                   s"   | }t |t d}|S r\  )rH   r  r  )r   rz  rj  r  r  r  r  rX   rY   r  S	  s    zbucketize.<locals>.inner_fnr  )r   r   rI   rP   r'  r(  r  rG  rQ  r  rn  r*  r`   r  rk   rd  r  r6   r   )r-  r  r  r  rf   r  r  rX   r  rY   r  5	  s*    
r  c                 O  s$   t tjtjj||f\}}||fS r   )r8  tree_map_onlyr*   r1   r  Zrequire_stride1r   rd   r   rX   rX   rY   require_denser	  s    r  c                 O  s$   t tjtjj||f\}}||fS r   )r8  r  r*   r1   r  require_contiguousr  rX   rX   rY   r  y	  s    r  c                 O  s$   t tjtjj||f\}}||fS r   )r8  r  r*   r1   r  rr   r  rX   rX   rY   rr   	  s    rr   c                 O  s$   t tjtjj||f\}}||fS r   )r8  r  r*   r1   r  r  r  rX   rX   rY   r  	  s    r  c                   s|   t  tjr,dd  D }tj |S t  trN fdd  D S t  tt	frxt
 dd t D S  S )Nc                 S  s$   g | ]}t |tjr|jjn|qS rX   r^   rk   SymIntrV   exprr  rX   rX   rY   r   	  s   z,constrain_to_fake_tensor.<locals>.<listcomp>c                   s    i | ]}|t  | | qS rX   constrain_to_fake_tensorr~   keyr   fake_argrX   rY   r   	  s   z,constrain_to_fake_tensor.<locals>.<dictcomp>c                 s  s   | ]\}}t ||V  qd S r   r  )r~   r   Zf_arX   rX   rY   r   	  s   z+constrain_to_fake_tensor.<locals>.<genexpr>)r^   r*   r1   r  r  require_exact_stridesr   r   r   r\   r  r   )r   r  meta_stride_exprrX   r  rY   r  	  s    
r  c                   s6   t dd t| |D }  fdd| D }| |fS )Nc                 s  s   | ]\}}t ||V  qd S r   r  )r~   r   r  rX   rX   rY   r   	  s   z,constrain_to_fake_tensors.<locals>.<genexpr>c                   s    i | ]\}}|t | | qS rX   r  r   fake_kwargsrX   rY   r   	  r   z-constrain_to_fake_tensors.<locals>.<dictcomp>)r   r   r   )rd   r   Z	fake_argsr  rX   r  rY   rq   	  s
    rq   c                   sJ    fdd t  fddt|jD } fdd| D }||fS )Nc                   s^   t  tjr6tjd  tjjj	}tj
 |S t  trZ fdd  D S  S )Nrz  c                   s    i | ]}| | | qS rX   rX   r  )apply_constraintr   fx_argrX   rY   r   	  r   zEconstrain_to_fx_strides.<locals>.apply_constraint.<locals>.<dictcomp>)r^   r*   r1   r  r  r  rI   rP   r   r   r  r  r   r   )r   r  r  r  )r   r  rY   r  	  s    
z1constrain_to_fx_strides.<locals>.apply_constraintc                 3  s   | ]\}} ||V  qd S r   rX   )r~   r   r  r  rX   rY   r   	  s   z*constrain_to_fx_strides.<locals>.<genexpr>c                   s"   i | ]\}}| |j | qS rX   r   r   r  fx_noderX   rY   r   	  r   z+constrain_to_fx_strides.<locals>.<dictcomp>)r   r   rd   r   r  rd   r   rX   r  rY   rs   	  s    

rs   c                   sN   fdd t  fddtt|jD } fdd| D }||fS )Nc                   s2  t |tjs|S |jd }dd | D }t| }|rb|d dkrbtttt	|
 }jtjjkr| dv rt	|dksJ d}|jstj||S d	 jtjjjjko| d
k}t |tsJ t	|
 dvr|S t| rttj||S t |tr>| d ur>t| r>ttj||S |r~t|
 }g }| }	tt	|
 d D ]F}
tjj||
 ds|	d urptjj|	|
 drp||
 qpdgt	| }d|d< d}tt	|d ddD ]}
||
d  dkr|||
d   }|
|v r@tjj||
d    dr@d||
< qtjj|  dsdt|   }|||
< qtj ||S t| rttj||S t |tr| d urt| rttj||S  fdd}t |j!tj"r$||s$||# r$ttj||S tj||S )Nrz  c                 S  s$   g | ]}t |tjr|jjn|qS rX   r  r  rX   rX   rY   r   	  s   z=sdpa_constraint.<locals>.apply_constraint.<locals>.<listcomp>r  r   )r   r   r   )r   r'   r%   r   r   r   r   r   r'   r%   c                   s   t jj|  d   dkS r  )rI   rP   r   	size_hintr   r   Z	ALIGNMENTrX   rY   
is_aligned.
  s    z=sdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned)$r^   r*   r1   r  r  r  r\   r   r   r   r   rT   rQ  0_scaled_dot_product_efficient_attention_backwardrJ  Zis_cudar  r  rk   rH   '_scaled_dot_product_efficient_attentionr9   Zis_aligned_realized_tensorZtry_match_insignificant_stridesZrealize_inputmaybe_get_striderI   rP   r   statically_known_equalsra   r<   r  r_   r  r  )r  r   r  Zmeta_valr  r  Zeffn_attn_fwd_biasr0  Zexpanded_dimsZmaybe_striderc   Zout_stridesr  r  )r  r  rY   r  	  s    







z)sdpa_constraint.<locals>.apply_constraintc                 3  s"   | ]\}\}} |||V  qd S r   rX   )r~   r  r   r  r  rX   rY   r   :
  s   
z"sdpa_constraint.<locals>.<genexpr>c                   s$   i | ]\}}| d |j | qS r  r   r   r  rX   rY   r   >
  r   z#sdpa_constraint.<locals>.<dictcomp>)r   r]   r   rd   r   r  rX   r  rY   sdpa_constraint	  s    {r  )r?  c                 C  sn   |}|   |  kr"t||   }|  | kr@t||  }|  | krft||  }t|S t|S r   )r`   rW  r   r   r   r   r;  )r  r!  rV  r   rb   rX   rX   rY   r<    s    r<  )memory_formatc                C  s&   t j|  |  |  t|  dS Nr  )r6   r   r`   r   r  r\   r   )r   r  rX   rX   rY   r;  $  s    
r;  c                 C  s   g }t | trLt | jtjrL| j} t | tjrD||   | j} q"t| } t| } |r| j} |d d d D ]}tj| |d} qlt| } | S )Nr  r  )r^   r9   r_   r*   r  ra   Z
get_layoutr;  )r   Zreinterpret_view_layoutsr  rX   rX   rY   clone_preserve_reinterpret_view/  s    r  rL  c                  s(    fdd}t jt| || gdS )Nc                   s   t j| d    dS )Nr   r   rH   r  r  r   r  r  rX   rY   ri   Q  s    ziota.<locals>.fnr  )r6   r   r=   )lengthr  r  r   rf   requires_gradri   rX   r  rY   iotaG  s    
r  )r   r   c                   s   |   |  ksJ |  t|  d tjjtdrN| 	    tjj
d tjj| 	    tt| | 	 }|  fdd}tj|  |   |t| 	 dS )Nr   c              	     s6   t t t |   tjt tj| | S r   )rH   rh  eqr  rk   rd  r  r   r   
src_loaderr  rX   rY   r  h  s    z select_scatter.<locals>.inner_fnr  )r   r  r  rI   rP   r   r   r   r  r   r+  r,  r   r  r6   r   r`   r\   )r   r!  r   r   r  rX   r  rY   select_scatter\  s     

r  c                   s     |  ksJ  t d    tj \t }t d  | < t	||}|  fdd}t
j   |t dS )Nr   r'   c              
     s4  dkr kr dkr | S t |  tj}t|  t|    < g }dkr~|t |t t	tj kr|t 
|t t	tj dkr|t t t|   dtjt dtj |sJ tt j|}t | fddtrdnd}t ||| S )Nr   r'   c                     s    S r   rX   rX   )src_idxr  rX   rY   r    r   z1slice_scatter.<locals>.inner_fn.<locals>.<lambda>r  )rH   r  rk   r  r\   r"   ra   r  r   r   r  r  r$   r  r   rl  r  r  r   rh  )r  r  r  Zsrc_valr   r/  r  r  r  r  r   r  )r  rY   r    sR    zslice_scatter.<locals>.inner_fnr  )r   r  r  r   r*   r  Znormalize_start_endr\   r"   r   r6   r   r`   )r   r!  r   r  r  r  Zsrc_sizer  rX   r  rY   slice_scatterz  s     
.
r  c                 C  s*   t | ttfr&t| dkr&t| d S | S r\  )r^   r\   r   r   _unwrapr   rX   rX   rY   r    s    r  r   rf   r  
pin_memoryc                  s  t |d tjfv d|  t | d tt tr@p<tjnpJt g }t tj	rl fdd}nt t
tfr fdd}nft dkst d t
tfrt dkr|tt   fdd}ntjtj |d	S tjt|||d
S )Nlayout=r  c                   s   t  S r   r  r  r_   r   rX   rY   r    s    ztensor.<locals>.inner_fnc                   s   t  S r   rH   r  r  r  rX   rY   r    s    r   r   c                   s8    fdd t dkr*tdS  dt S )Nc              	     sr   | |k sJ ||  dkr(t |  S ||  d |  }t t t d tjt |tj | | ||S )Nr'   r%   r   )rH   r  rh  r  r  rk   r  )r  r  mid)binary_searchr_   r   r   rX   rY   r    s    z/tensor.<locals>.inner_fn.<locals>.binary_searchr   )r   rH   r  r  r  )r  r   rY   r    s    r  r  )r{   rk   stridedr^   r  r   r  get_default_dtyper   r   r   r   ra   IntegerrI   rP   Zadd_tensor_constantr^  r6   r   r=   )r_   r   rf   r  r  r  r  rX   r  rY   r^    s,    *r^  c                 C  s@   t | tr2|d urt| |} |d ur.t| |} | S t| ||dS )Nr  )r^   r9   r   rW  r^  )r_   r   rf   rX   rX   rY   	as_tensor  s    


r  c                 C  s   t | tjdS r  r]  r_   rX   rX   rY   long_tensor  s    r  c                 C  s   ddl m} |tjjjtjjjd }|d us2J t|dksFJ |t	t
| \}}t||| }tj||_tj| tjjjd }t|tjtjtjfr|jjS t|S d S )Nr   )resolve_unbacked_bindingsunbacked_bindingsr'   rz  )%torch.fx.experimental.symbolic_shapesr  rI   rP   r   r   rQ   r  r   r   iterr   r*   ZDynamicScalarregister_bufferr   register_operationr^   rk   r  ZSymFloatZSymBoolrV   r  r   r4  )r_   r  r  Zbinding_symZkeypathbufferrz  rX   rX   rY   _local_scalar_dense  s    r  c                 C  s   d S r   rX   )r_   rz   rX   rX   rY   _assert_scalar<  s    r  )rf   r  c                C  s   d S r   rX   )r   r   r  r   rf   r  rX   rX   rY   _assert_tensor_metadataF  s    r  c                   s   | t | ttfs"tdr"jt ttfr@ fdd}nDt tjr\ fdd}n(t dkspJ 	 fdd}t
j| |t|dS )Nr   c                   s   t  S r   r  r  r   r   rX   rY   r  T  s    z_full.<locals>.inner_fnc                   s   t  S r   r  r  r  rX   rY   r  Y  s    r   c                   s    g S r   rX   r  )value_loaderrX   rY   r  `  s    r  )r^   r   r   r   r   r   r   r   r   r  r6   r   r\   )
fill_valuerf   r   r   r  rX   )r   r   r  rY   _fullM  s     r  c                 K  s   t t|| fi |S r   create_tensor_liketensor_constructor)r   r  r   rX   rX   rY   r~  k  s    r~  c                   s    d d d d dd d fdd
}|S )NF)namesr   rf   r  r  r  c                   s   t | d u d t |d tjfv d|  t | d t|}|pFt }t|dkrvt|d tttj	frvt|d }|D ]}t|tj
rzJ qzdd |D }t |||S )Nnamed tensorsr  r  r'   r   c                 S  s   g | ]}t |qS rX   r  r  rX   rX   rY   r     r   z5tensor_constructor.<locals>.inner.<locals>.<listcomp>)r{   rk   r  r=   r  r   r^   r\   r   Sizer  r  )r  r   rf   r  r  r  r   rv  r  rX   rY   r$  r  s    	"z!tensor_constructor.<locals>.innerrX   )r  r$  rX   r  rY   r  p  s    r  )r  r   r  rf   r  r  c                 G  sX   t | d u d t|}t|dkrDt|d tttjfrDt|d }t|d ||||dS )Nr  r'   r   r   r  rf   r  )	r{   r=   r   r^   r\   r   rk   r   empty_strided)r  r   r  rf   r  r  r   rX   rX   rY   r    s    
"r  c                   s   dddddd fdd
}|S )zZ
    Shim to convert X_like(...) into X(...).  For example zeros_like() into zeros().
    NF)r   rf   r  r  r  c                  sj   t | d t |d tjfv d|  |d u r8|  }nt|}|pJ|  }t|  } |||||dS )Nr  r  r  )r{   rk   r  r   r   r`   r\   r   )r   r   rf   r  r  r  r   creation_fnrX   rY   _constant_like  s    

z*create_tensor_like.<locals>._constant_likerX   )r  r  rX   r  rY   r    s    
r  c                 C  s   t t| S r   r  r  rX   rX   rY   ri    s    ri  c                   s   d d d d d fdd
}|S )Nr  c                  st   t |ttfsJ t| d t|d tjfv d|  t|pF|  }|pR|  }dd |D }t	 t
|||S )Nr  r  c                 S  s   g | ]}t |qS rX   )r   r  r  rX   rX   rY   r     r   z7new_constant.<locals>._new_constant.<locals>.<listcomp>)r^   r\   r   r{   rk   r  r   r   r`   r  r=   r   r   r   r  rf   r  r  rX   rY   _new_constant  s    z#new_constant.<locals>._new_constantrX   )r  r  rX   r  rY   new_constant  s    r	  r  c                C  s8   |d u r|   }|d u r |  }t|d ||t||dS Nr  r   r`   r  r=   r  rX   rX   rY   	new_empty  s    r  c                C  s  t | ttfsJ t |tttd fs*J t| d t|d tjfv d|  t|p^t }|pnt	dj
}t|}td||| d}|  |jj}tj|jdgt|  d|_t |tjsJ dd | D } |rd	d |D n
tj| }tj||| |d
|_|S )Nr  r  r  r   )r  rf   r   r   )r  c                 S  s   g | ]}t |qS rX   r  r  rX   rX   rY   r     r   z!empty_strided.<locals>.<listcomp>c                 S  s   g | ]}t |qS rX   r  r  rX   rX   rY   r     r   )rf   r   r   r  )r^   r\   r   r  r{   rk   r  r   r  r^  rf   r=   r  r*  r_   dataclassesreplacer   r*   r  rd  re  r  r  )r   r  r   r  rf   r  Z	pointwiser  rX   rX   rY   r    s0    
r  c                C  s8   |d u r|   }|d u r |  }t||||t||dS r
  r  )r   r   r  r   r  rf   r  rX   rX   rY   new_empty_strided  s    r  c                 C  s2   dd |D }t tt||jd}tj| |S )Nc                 S  s   g | ]}t jj|qS rX   )rI   rP   r   r  r  rX   rX   rY   r     r   z copy_strided.<locals>.<listcomp>)r  )sortedr   r   __getitem__r*   r  r  )r   r  r  rX   rX   rY   copy_strided  s    r  c                 K  s*   | dd usJ dt|| fi |S )Nr   z(dtype should be handled by decomposition)r  r  )r   r  r   rX   rX   rY   full  s    r  c                   s   t | tsJ | dkr(t| | S |  tdk}t|  | |r^t| dg} dg|  |  fdd}t	j
|  |  || dS )Nr   r'   c                   s@   t | } t|   }t| dkr0|g} n||  < | S r\  )r\   rH   indirect_indexingr   )r  Z
gather_idxr   index_loaderr   r  rX   rY   ri   1  s    zgather.<locals>.fnr  )r^   r9   	get_numelr  r   r   r  r   r  r6   r   r`   r   )r   r   r   Zsparse_gradr  ri   rX   r  rY   gather  s$    	r  c                   s   |rt tjj| ||||S |r$J t| ts2J t|ts@J dt| v sTJ |  |  t	|
 | 
 g |
 dd   fdd}tj|  |  |dS )Nr   r'   c                   s\   t | t ks"J |  d  | d  }t|d gg | d   }|S )Nz != r   )r   rH   r  )r  Z	var_indexZ
weight_idxindices_loaderZindices_ndimr  Zweight_loaderZweight_sizerX   rY   ri   T  s    "
zembedding.<locals>.fnr  )rG  rQ  	embeddingrJ  r^   r9   r  r   r  r   r   r6   r   r`   )weightrj  Zpadding_idxZscale_grad_by_freqsparseri   rX   r  rY   r  B  s(    

r  c                   s   t dd  D s*J ddd  D  tdd  D rDtddd t D }t|d	ksjJ d
d gt  }t|t fdd|D  D ]$\}}| |krtd|||< q||fS )Nc                 s  s2   | ]*}|d ur|  tjtjtjtjfv V  qd S r   )r   rk   r  rd  r   r  r   rX   rX   rY   r   e  s   z.check_and_broadcast_indices.<locals>.<genexpr>z)indices must be int64, byte or bool. Got c                 S  s   g | ]}|d ur|  qS r   r   r   rX   rX   rY   r   j  r   z/check_and_broadcast_indices.<locals>.<listcomp>c                 s  s*   | ]"}|d ur|  tjtjfv V  qd S r   )r   rk   r   r  r   rX   rX   rY   r   l  s   zFallback for bool indicesc                 S  s   g | ]\}}t |tr|qS rX   r   r   rX   rX   rY   r   q  r   r   z"requires at least 1 non-None indexc                   s   g | ]} | qS rX   rX   r   rj  rX   rY   r   t  r   z.Fallback when indices is on a different device)r   r   rx   r]   r   r   r   r`   )rj  rf   Z
valid_idxsZnew_indicesrc   r   rX   r  rY   check_and_broadcast_indicesd  s"    
$
r   c	              
     s   dt dd  D ]\}	}
|
|	 dkrdq
fddtD g 
tt d  d }r| nd |  |d    	f
dd}|fS )	NFr'   Tc                   s    g | ]\}}|d u r | qS r   rX   )r~   rc   rz  r$  rX   rY   r     r   z2index_output_size_and_inner_fn.<locals>.<listcomp>r   c           	   	     s  t | t ksJ t t ks(J t }g }d }rDdn|}d}td d D ]}||krp||7 }| d u r|t | k sJ || |  |d7 }q\| }|d usJ | }|tj|| |||  | d q\g || |d  }	d u r|S 	|S )Nr   r  r'   r   wrap_neg)r   r   ra   rH   r  )	r  r  	new_indexfirst_tensor_indexZstart_offsetZnext_idxrc   loaderr   )
r   indexed_sizerj  indices_loadersnon_consecutive_tensorsoutput_sizetensor_indicestensor_sizer#  r  rX   rY   ri     s>    

z*index_output_size_and_inner_fn.<locals>.fn)r   r]   r   )r$  rj  r+  r,  r(  r'  r  r   r#  previouscurrentr%  ri   rX   )r   r'  rj  r(  r)  r*  r+  r,  r#  r  r$  rY   index_output_size_and_inner_fn~  s"     


"r/  c                 C  s,   t | ||\}}}tj|  |  ||dS r  )index_impl_helperr6   r   r`   r   )r   rj  r   r*  r  r   rX   rX   rY   
index_impl  s    r1  c           
        s   t ttfsJ |  t|  \}t|dks@J ddd D }t|d   }|  fddttD }|rd|v rd|vrt	dfddttD }t
||||d ||d	\}  fd	d
}	||	 fS )Nr   z Must have at least one valid idxc                 S  s    g | ]}|d ur|  nd qS r   r  r   rX   rX   rY   r     r   z%index_impl_helper.<locals>.<listcomp>c                   s    g | ]} | d ur| qS r   rX   r   )rj  r$  rX   rY   r     r   z0index is out of bounds for dimension with size 0c                   s   g | ]} | qS rX   rX   r   r!  rX   rY   r     r   r"  c                   s    | S r   rX   r  )index_inner_fnr  rX   rY   r    s    z#index_impl_helper.<locals>.inner_fn)r^   r\   r   r  r   r`   r   r   r   
IndexErrorr/  )
r   rj  r   r#  r+  r(  r,  r'  r*  r  rX   )r2  rj  r  r$  rY   r0    s0    
r0  c                 C  sF   zt | |ddW S  ty@   |   ttjjdd| | Y S 0 d S )NTr   FrB  )r1  rx   r*  rG  rQ  r   rn  r   rj  rX   rX   rY   r     s    r   c                 C  s   t | |ddS )NFr4  )r1  r5  rX   rX   rY   _unsafe_index  s    r6  c                 C  s   t t| |||dddS )NTFr   may_realizeindex_put_impl_r;  r   rj  r   
accumulaterX   rX   rY   	index_put  s    r=  c                 C  s   t t| |||dddS )NFr7  r9  r;  rX   rX   rY   _unsafe_index_put  s    r>  c                 C  sB   |  |   krt||   }|r,t| |}t| t|d || S r\  )r`   rW  r   r   rh  )r  rj  r   r<  rX   rX   rY   index_put_as_masked_fill  s
    
r?  c                 C  s   t tjjj| ||| | S r   )r*   ZIndexPutFallbackrI   rP   rQ   rT   r  rj  r   r<  rX   rX   rY   index_put_fallback&  s    rA  c                 C  s   t | |||dddS )NTr7  r:  r@  rX   rX   rY   
index_put_+  s    rC  c                 C  s   t | |||dddS )NFTr7  rB  r@  rX   rX   rY   _unsafe_index_put_2  s    rD  c              
     s  |rBdd }dd  || |  v rBt fdd|D sB|  | dkrt|dkr|d  tjtjfv r|d }t	t|
 t| 
 D ]}t|d	}qt| |g||S t rt| |||S |D ]2}	|	d ur|	 tjtjfv rt| |||  S q| 
 t}
|rbt|  rb|
dkr<t| dg} t| |||} |
dkr^t| g } | S t||  }zt||  \}}W n" ty   t| ||| Y S 0 d
d |D }t| tsJ |   |
dkrt| dg} t||d  
 }fddt	t|D }t|||||d |d\}}t||}tj|  |  | |||r^dnd d}tjd t| |d}tj !||_"tj #| |
dkrt| g } | S )Nc                 S  sP   t | tjr| j} t | tjr&|  } t | tjr8| j} t | tjrL|  S d S r   )	r^   r*   r9   r_   r  r  r  ZBufferr  r   rX   rX   rY   try_get_name<  s    z%index_put_impl_.<locals>.try_get_namec                 S  sd   t | tr`t | jtjr`| j } t | tjo^t | jtjo^t| jdd o^| jj	j
tjjjjkS dS )Nr  F)r^   r9   r_   r*   r  r  r  r  r   r  rT   rk   rH   rQ  randpermrJ  )indicerX   rX   rY   indice_slice_from_randpermE  s    
z3index_put_impl_.<locals>.indice_slice_from_randpermc                 3  s   | ]} |V  qd S r   rX   )r~   rG  )rH  rX   rY   r   S  s   z"index_put_impl_.<locals>.<genexpr>r'   r   r  c                 S  s    g | ]}|d ur|  nd qS r   r  r   rX   rX   rY   r     r   z#index_put_impl_.<locals>.<listcomp>c                   s   g | ]} | qS rX   rX   r   r!  rX   rY   r     r   r4  
atomic_addrf   r   r  r  output_indexerZscatter_moder   r  r_   )$r  r   r*  r  r   r   rk   r   r  r   r   r  r?  $are_deterministic_algorithms_enabledrA  rB   rR  r   r   r`   rx   r^   r9   r\   r/  r   r*   Scatterr  r  MutationLayoutSHOULDREMOVErI   rP   r  r   r  )r  rj  r   r<  r   r8  rE  r  r   r   Zx_ndimr+  r(  r,  r'  Zexpected_vals_sizer  scatterr  rX   )rH  r$  rY   r:  9  s    	









r:  rB  c                   sT   t | |ddd\}}  |   fdd}tj|  |  ||dS )NFr"  c                   sB   j tjkr t tj}n }t| fddS )Nc                     s    S r   rX   rX   )_unsafe_index_fnr  self_loaderrX   rY   r    r   z8_unsafe_masked_index.<locals>.inner_fn.<locals>.<lambda>)r   rk   r   rH   r   r  )r  Zmask_valrQ  fillr  Zmask_loaderrR  r  rY   r    s    z&_unsafe_masked_index.<locals>.inner_fnr  )r0  r  r6   r   r`   r   )r  r  rj  rT  r  r   r  rX   rS  rY   _unsafe_masked_index  s    rU  c                   s@   t ||d}|   fddtt D }t| ||ddS )Nr   c                   s6   g | ].} | r.t  | |  | d  ndqS r  r  r   rj  r  rX   rY   r     s   z7_unsafe_masked_index_put_accumulate.<locals>.<listcomp>T)r<  )rh  r   r   r   r>  )r   r  rj  r   Zmasked_valueZclamped_indicesrX   rV  rY   #_unsafe_masked_index_put_accumulate  s    
rW  c                 C  s   t |t || S r   )rH   r  r  r   minmaxrX   rX   rY   r    s    r  c                 C  s$   t | }t||||}t|| |S r   )r;  r  copy_)r  r!  r   r  r  r   Zoutput_viewrX   rX   rY   as_strided_scatter  s    
r\  r   c                 K  s   t t| |||fi |S r   )scatter_r;  )r   r   r   r!  r   rX   rX   rY   rP    s    rP  rl  include_selfr  )rP  r   rl  r`  c             	   C  sf   t |t}t| || ttj|r(| nt||r>| jnd|rbt	j
| ||||||d |S d S )Nznot implr_  )r^   r9   rG   r   r   rk   r   r  r`   r*   ZScatterFallback)rP  r  r   r   r!  rl  r`  Zsrc_is_tensorrX   rX   rY   scatter_fallback  s(    

	ra  rl  )r   rl  c                C  sr   |dv sJ |d u rHt tjtjjjj}t|| ||||d}|d urH|S |dkrVd}n|dkrbd}t	| ||||S )N)Nr   multiplyrb  r   sumrc  prod)
r   rQ  r^  rI   rP   rQ   rT   Z_overloadnamera  scatter_reduce_)r  r   r   r!  rl  rP  fallback_resultrX   rX   rY   r^    s    r^  c                 C  s   t t| |||S r   )scatter_add_r;  r   r   r   r!  rX   rX   rY   scatter_add)  s    rj  c                 C  s   t | |||dS )Nrd  )rf  ri  rX   rX   rY   rh  .  s    rh  c                 K  s   t t| ||||fi |S r   )rf  r;  )r   r   r   r!  reduction_typer   rX   rX   rY   scatter_reduce3  s    rl  )r`  )r   r`  c             	     s2  |dv sJ t tj dkr,dtj v s4J dttrHtttjj |||d}|rj|S tt	sxJ dt
| v sJ t  }|dkrtdgtt	rt  dkrtdgt|t	rt | dkrt|dg}| dkr
S t    | tt	r8 nd  fdd	}fd
d}	dd }
|stj  fdd| |d d}tjd t|d}tj||_tj| tj  |	| ||
|d}tjd t|d}tj||_tj| |dkr.tg S )N)Nrd  re  meanamaxaminr'   twozKaten.scatter_reduce_.two is not the unique overload of aten.scatter_reduce_r_  r   r   c                   sD     }t|}t| }tj| |dkr.dn|  dd| < |S )Nr   r'   F)r#  )r   r   r\   rH   r  )r  r  r.  Zindirect_idx)r   r  r  rX   rY   rK  f  s    
z'scatter_reduce_.<locals>.output_indexerc                   s    r| S t   S d S r   rH   r  r   r  )r  r!  r  rX   rY   ri   p  s    zscatter_reduce_.<locals>.fnc                 S  s    | dkrdS | d u sJ d S d S )Nrd  rI  rX   rb  rX   rX   rY   backend_reduce_strw  s    z+scatter_reduce_.<locals>.backend_reduce_strc                   s   t d  S r\  rq  r  )r  rX   rY   r    r   z!scatter_reduce_.<locals>.<lambda>rJ  rL  )r   rQ  rf  r   r^   r   r~  ra  rp  r9   r  r   r   rR  r  r  r*  r  r*   rN  r`   r  rO  rI   rP   r  r   r  )r  r   r   r!  rl  r`  rg  r.  rK  ri   rr  Zzero_outr  rP  rX   )r   r  r  r!  r  rY   rf  8  s    







rf  ztuple[Optional[float], ...])scales_xnexactc           
        s   |    |  |   d  |  d   }dd D t|ksRJ |}dd t|D t|D ]\}}|d urrd| |< qr fddfdd}	tj|  | 	 |	g ||d	S )
Nc                 S  s   g | ]}t jj|qS rX   rp  r   rX   rX   rY   r     r   z&upsample_nearestnd.<locals>.<listcomp>c                 S  s   g | ]\}}|| qS rX   rX   )r~   rc   orX   rX   rY   r     r   r  c                   s\   t | tj}  r(t | t dtj} t | t |tj} t | tj} t j	| |ddS )N      ?Fr4  )
rH   r  rk   r  r   r  r  r   rd  r  )r   r  r   )ru  rX   rY   scale_fn  s    z$upsample_nearestnd.<locals>.scale_fnc                   sB   |  d  }| d   }g |fddt | D S )Nc                   s   g | ]\}}} |||qS rX   rX   )r~   rc   rv  r   )rx  rX   rY   r     r   z2upsample_nearestnd.<locals>.fn.<locals>.<listcomp>)r   )r  r   r   )i_sizes
inv_scalesrt  rx  r  rX   rY   ri     s
     zupsample_nearestnd.<locals>.fnr  )
realize_hintr  r   r   r   r]   r6   r   r`   r   )
r   r*  rs  rt  ru  batchZo_sizesrc   r  ri   rX   )ru  ry  rz  rt  rx  r  rY   upsample_nearestnd  s&    
r}  zOptional[float])r  c                 C  s   t | ||fddS )Nr'   rt  r}  r   r*  r  rX   rX   rY   upsample_nearest1d  s    r  c                 C  s   t | ||fdddS )Nr'   Trt  ru  r  r  rX   rX   rY   _upsample_nearest_exact1d  s    r  )scales_hscales_wc                 C  s   t | |||fddS )Nr%   r~  r  r   r*  r  r  rX   rX   rY   upsample_nearest2d  s    r  c                 C  s   t | |||fdddS )Nr%   Tr  r  r  rX   rX   rY   _upsample_nearest_exact2d  s    r  )scales_dr  r  c                 C  s   t | ||||fddS )Nr   r~  r  r   r*  r  r  r  rX   rX   rY   upsample_nearest3d  s    r  c                 C  s   t | ||||fdddS )Nr   Tr  r  r  rX   rX   rY   _upsample_nearest_exact3d  s    r  c                   s   t  fdd|D S )Nc                 3  s   | ]}t | V  qd S r   r  r   r   rX   rY   r   	  r   z$_create_constants.<locals>.<genexpr>)r   )r   rd   rX   r   rY   r    s    r  c                   s:   |   |   fdd}tj|  |  |dS )Nc                   sF   t | } t| tksJ  D ]}| d | |  | |< q | S r`  )r\   r   )r  r   rt  rm  r  rX   rY   r&    s
    zrev.<locals>.loaderr  )r  r   r6   r   r`   r   )r   rt  r&  rX   r  rY   rev  s    r  zSequence[int])r   paddingr  rj   c                 C  s  dd }| rdS t |dks.t |  dkr2dS |   t| tjrt| jtjrt| jjtjszt	j
rt| jjtjr| jjjsdS |   t| \}}|j}|d dkrdS |d dks|d dks|d dkrdS |d }|dkrdS |d }|jd }	||	| k rdS | jjj}
|jd |jd | g}|tjj|
< t| ||j|j}t|d|	|	| d	}t|| td
 d  d7  < |S )z
    This optimization changes the semantics of padding from 'clone'
    style to 'view' style.

    Thanks to functionalization, this change can still maintain numerical
    correctness.
    c                  S  sL   t jj} | du rdS t| j}t|dkrH|d jtjj	tj
j	fv rHdS dS )a  
        Conservatively check if padding can be fused with downstream op.
        1. if the downstream op is a sum, then there is little benefit to
           do inplace padding
        2. if the downstream op is a matmul, doing inplace padding can
           save membw.
        NTr'   r   F)rI   rP   rQ   r   rR   r   rT   rQ  mmrJ  Zaddmm)rQ   rR   rX   rX   rY   _padding_can_be_fused-  s    
z6inplace_constant_pad_nd.<locals>._padding_can_be_fusedNr   r%   r'   r   r   )r   r  r  inductorinplace_padding)r   r   r*  r^   r*   r9   r_   r  r  r(   Zcan_inplace_pad_graph_inputZInputBufferr   Zfreeze_layoutr  r  r   rI   rP   Zbuffer_to_padded_sizer  r  r  fill_r   )r   r  r  r  r   r  r  ZnpadZstride0ZrowsizeZbufnameZpadded_sizeZ	resized_xZsliced_xrX   rX   rY   inplace_constant_pad_nd"  s\    

$


r  c              	     sv  t |d dksJ tdd |D r.t| S tjrHt| |}|rH|S |  }tttt	|d d d |dd d  t |t   g  D ] \}}
tjj||f qt|d  }g t	 |d  D ].\\}}	}

|
 |
t|
| |	  qt |t |ks J t|   fddfdd	}|  tj|  |  ||d
S )Nr%   r   c                 s  s   | ]}|d kV  qdS r   NrX   r~   rj  rX   rX   rY   r     r   z"constant_pad_nd.<locals>.<genexpr>r'   c                   s~   g }t  d  D ]>\}\}}}|dkr>|t|d |dkr|t|| qttj|}t| fddS )Nr   c                     s    S r   rX   rX   )r   r  rX   rY   r    r   z/constant_pad_nd.<locals>.mask.<locals>.<lambda>)	r   ra   range_mask_lowrange_mask_highr   rl  rH   r  r  )r   r  r  r  r  r  )boundsr  
mask_sizesrt  r  r  rY   r    s    "zconstant_pad_nd.<locals>.maskc                   sZ   t | d  }t| d   D ]\}\}}|||  q"t|t| ksRJ |S r   )r\   r   ra   r   )r   r$  r  r  Z_high)bounds_precompr  rt  rX   rY   	offset_fn  s
    z"constant_pad_nd.<locals>.offset_fnr  )r   r   r;  r(   r  r  r   r\   r   r   ra   rI   rP   r   Zlookup_precomputed_sizer   r   r   r   r  r6   r   r`   )r   r  r  rb   rm  lhr*  r  r  r   r  rX   )r  r  r  r  r  rt  r  rY   r    s:    *

r  z
sympy.ExprzUnion[sympy.Expr, int]rc   r  c                 C  s&   t t | tjt t|tjS r   )rH   r  r  rk   r  r   r  r  rX   rX   rY   r    s    r  rc   r  c                 C  s    t t | tjt |tjS r   )rH   r  r  rk   r  r  rX   rX   rY   r    s    r  rc   r  r  c                 C  s   t t| |t| |S r   )rH   r  r  r  r  rX   rX   rY   
range_mask  s    r  r  c                   sF       d   p&dg   fdd}|S )Nr   c                   s|   | d   |  d   t tj fddtD }rbt| fddS t| 	fddS )Nc                   s.   g | ]&}t |  | |  |  qS rX   )r  r   )r  ih	padding_hrX   rY   r     r   z=constant_boundary_condition.<locals>.load.<locals>.<listcomp>c                     s   t  dg S )Nr]  )constant_boundary_conditionrX   )r   r  pad_fill_valueprefixr   rX   rY   r    s   
z;constant_boundary_condition.<locals>.load.<locals>.<lambda>c                     s   g  S r   rX   rX   )r  r  r  rX   rY   r    r   )r   rl  rH   r  r   r  )r   r  r   r  r  r  r  r  r   r  )r  r  rY   r    s    z)constant_boundary_condition.<locals>.load)r   r  )r   r  r  r  r   r  rX   r  rY   r    s
    r  dilationc          	      C  s$  |d u rdgt | }t| d||   || || d   || d  || }|rt| d||   || || d   d|| d   || }tjj|d ||  |  ||  dkr|d8 }tjjd|||  |  ||   tjj|| dkrtjj|| d}n|}||fS )Nr'   r%   r   F)r   r"   rI   rP   r   r  r+  r   )	r   rc   kernel_sizer  r  	ceil_moder  Zx_outZx_altrX   rX   rY   pooling_size  s2    .
*$r  c                C  s    t | |} ttj| }|dkS )N   )rC   r   rl  r  r  )r  n_dimwindow_sizerX   rX   rY   %should_fallback_max_pool_with_indices
  s    
r  assert_fallbackc                C  s   |dkrdg| }|dkr$dg| }|s,|}t ||}t ||}t ||}t ||}t| tsbJ t||ksrJ t||ksJ t||ksJ t||ksJ t|  |d |d fv sJ t||d}|d ur||ksJ |||||fS )Nr   r'   r%   r  )rC   r^   r9   r   r   r  )r   r  r  r  r  r  r  Zuse_fallbackrX   rX   rY   max_pool_checks  s(    





 r  c             
     sR  |    | jd   }| j d  t fddtD  \} | j}	|	tju rddn|	jrrtdn
t	|	j
}
t|t| }tst stdd D rt| |
dn|  fdd	}tjd
| |  |	|	||d}tjd| |  tj|	||d}t|jjtr2|  t|jjtrJ|  ||fS )Nc                   s&   g | ]}t | | d qS )r  r  rr  )r  dhwr  r  r  r  rX   rY   r   <  s   z*_max_pool_with_offsets.<locals>.<listcomp>F-infc                 s  s   | ]}|d kV  qdS r  rX   rr  rX   rX   rY   r   L  r   z)_max_pool_with_offsets.<locals>.<genexpr>r]  c                   sJ   | d   }|  d    fddt D }g ||S )Nc                   s4   g | ],} | |  | |   |  qS rX   rX   r   bhr  r  reduction_idxr  rX   rY   r   T  s   z<_max_pool_with_offsets.<locals>.fn_inner.<locals>.<listcomp>r   )r  r  r  r  )r  r  r  r  r  r  r  rY   fn_innerQ  s    z(_max_pool_with_offsets.<locals>.fn_innerrZ  rk  
input_noderf   	dst_dtyper7  r  r  reduction_rangesargmax)r{  r  r   r   r   rk   r   rM  r   rO  rY  r\   r   r  r  r7   r   r`   r  r^   r_   r*  )r   r  r  r  r  r  r  r|  Zdhw_outr   Z	min_valuer  r  r  offsetsrX   )r  r  r  r  r  r  r  r  rY   _max_pool_with_offsets-  sV    
	"	

r  c           
   
   C  s   t |}t| |||||dd\}}}}}tjdd: t| ||||||d\}}	|t|	tjfW  d    S 1 st0    Y  d S )NFr  r  unroll_reductions_thresholdr  )r   r  r(   r   r  r   rk   r  )
r   r  r  r  r  r  r  r   r  r  rX   rX   rY   !_low_memory_max_pool_with_offsetsx  s*    	

	r  z"Sequence[Union[int, torch.SymInt]]zxCallable[[Sequence[Union[int, torch.SymInt]], Sequence[Union[int, torch.SymInt]]], torch._inductor.virtualized.OpsValue])r  r  
input_sizeincrements_to_indexrj   c                   sZ   t |  tttj fdd}tj	| 
 tj||  d}|S )Nc                   sJ   | }t |}t|} | |}t t| d  tjS r   )rH   r  r)   Z_flattened_index_to_ndr  Z_flatten_indexrk   r  )r  r  Zoffset_sympyr  Zidhwr  r  r  r  Zoffsets_loaderr  rX   rY   offsets_to_indices  s    
z4_pool_offsets_to_indices.<locals>.offsets_to_indicesr  )r   r  r   r4  r   rl  r  r  r6   r   r`   rk   r  r   )r  r  r  r  r  rj  rX   r  rY   _pool_offsets_to_indices  s    		r  c                   s(   t | fdd}t| |||S )Nc                   s,   |  d    fddt D S )Nc                   s4   g | ],} | |  | |   |  qS rX   rX   r   r  rX   rY   r     s   zX_low_memory_max_pool_offsets_to_indices.<locals>.increments_to_index.<locals>.<listcomp>r  r  r  r  r  r  r  r  rY   r    s    zD_low_memory_max_pool_offsets_to_indices.<locals>.increments_to_index)r   r  )r  r  r  r  r  r  r  rX   r  rY   '_low_memory_max_pool_offsets_to_indices  s
    r  c              	   C  s^   t | |||||d\}}}}}t| ||||||d\}}	t|	|| j| d  |||}
||
fS )Nr  )r  r  r  r  )r   r  r  r  r  r  r  r   rb   r  rj  rX   rX   rY   _max_pool_with_indices  s    	
	r  c              	   C  s   t | |||||ddS Nr%   r  r  r   r  r  r  r  r  rX   rX   rY   max_pool2d_with_indices  s    	r  c              	   C  s   t | |||||ddS Nr   r  r  r  rX   rX   rY   max_pool3d_with_indices  s    	r  c                   s\  dkrddg|dkr ddg}s(t |ts6J tdksFJ tdksVJ tdksfJ t|dksvJ t| dv sJ |   |  }t |trt |jjtr|jj}	tj	d tj
|	 |	 |	 d|	d}
|
  |
 }n| }|d ur|d dkp(|d uo(|d dk}tdd |D rTt| ||||S | ^ }}
|  ^ }| |   t| }tfd	dtd d D tfd
dtd d D 		 }|dkr t| ||||S |  	
fdd}tj|  |  ||d}|rTtj|S |S d S )Nr   r'   r%   r  )rf   r   r   rL  c                 s  s   | ]}|d kV  qdS r  rX   rr  rX   rX   rY   r   9  r   z3max_pool2d_with_indices_backward.<locals>.<genexpr>c                 3  s:   | ]2}t |d   t d | d   d    dV  qdS r   r'   NrZ  r~   r  r  r  rX   rY   r   F  s   c                 3  s:   | ]2}t |d   t d| d   d    d V  qdS r'   r   Nr  r~   wr  rX   rY   r   J  s   r  c                   s^  | ^ }}}t |
 | tj}|d  }|d  }t t|d  d  d tj}t t|d  d  d tj}t t|d d tj}t t|d d tj}t |t dtj}t |t dtj}t |t tj}t |t tj}d }	tD ]"}
t	D ]}t 	|t |
tj}t 	|t |tj}g |t j
t |t |t dtjd ddt j
t |t |t dtjd dd}|} |}t ||}|	d u r
t ||t dtj}	n:t t t ||t |||}t |t 	|	||	}	q4q&|	d usZJ |	S )Nr   r'   Fr4  r  r  )rH   r  rk   rd  r"   r  r  r  r   r   r  r  r  rh  r  r  r  )r  r  r  r  Z
index_testphstartpwstartphendpwendgradientph_pw_phpwZ
grad_indexZindex_actualZ	grad_partr   r  grad_loaderh_window_sizer  Zindices_sizer  r  pooled_heightpooled_widthr  w_window_sizewidthrX   rY   ri   Y  sj      


z,max_pool2d_with_indices_backward.<locals>.fnr  )r^   r9   r   r   r{  r  r_   r6   r*   r  rd  r`   r   Zdecide_layoutr  r   )fallback_max_pool2d_with_indices_backwardr  r\   rZ  r   r   r  r  )grad_outputr   r  r  r  r  r  rj  Z	gO_strider_   Zx_bufferZx_strideZis_channels_last_batch_heightr   r  r  ri   rb   rX   r  rY    max_pool2d_with_indices_backward  s|    	

 ;r  r  c                   s   |    fdd}|S )Nc              
     s   |\|\ |\}}t t t   tjt |tjt t  tjt |tj}t | fddS )Nc                     s   g    S r   rX   rX   )h_start_indexr  iwr  w_start_indexr  rX   rY   r    r   z3pad_adaptive_loader.<locals>.load.<locals>.<lambda>)rH   r  r  r  rk   r  r  )r  Z
incrementsZstart_indicesZend_indicesh_end_indexw_end_indexr  pad_valr  )r  r  r  r  r  rY   r    s$    z!pad_adaptive_loader.<locals>.loadr  )r   r  r  rX   r  rY   pad_adaptive_loader  s    r  c           
      C  sL   t j| ||d}t j|||d}t j| ||d}t j|||d}	||||	fS )N)out_diminp_dim)r   r   )
start_index	end_indexh_inw_inh_outw_outr  r  r  r  rX   rX   rY    compute_indices_adaptive_pooling  s
    r  c                   sD   |\}}|\}}	t | |||||	\  fdd}
|
S )Nc                   s   | ^ }}}|} |}|}|}d }	t td td D ]:\}
}|||
|g||g||g}|	d u r||}	qL||	}	qL|	S rb  )r   productr   )r  r&  r  r  bwr  r  r  r  r  r  r  rz  h_end_index_fnh_start_index_fnkernel_maxes
pooling_fnw_end_index_fnw_start_index_fnrX   rY   ri     s"    $z _adaptive_pooling_fn.<locals>.fnr  )r   r  r  in_sizes	out_sizesr  r  r  r  r  ri   rX   r	  rY   _adaptive_pooling_fn  s    r  c           
        sF   |\}|\}}t | ||||\  fdd}	|	S )Nc                   s   | ^ }}}|} |}|}|}d }	d }
t td td D ]|\}}||||g||g||g}t||  | | tj}|
d u r|}
ntt||	||
}
|	d u r|}	qP||	}	qP|
S rb  )	r   r  r   rH   r  rk   r  rh  gt)r  r&  r  r  r  r  r  r  r  maxvalZmaxindexr  r  rz  r   r
  r  r  r  r  r  r  rX   rY   ri     s0    $z)_adaptive_pooling_fn_with_idx.<locals>.fnr  )
r   r  r  r  r  r  r  r  r  ri   rX   r  rY   _adaptive_pooling_fn_with_idx  s    #r  c                   s    tjkrtdtts$J t|dks4J    ^ }}}t	j
j|}t	j
j|}|\}}||kr||krtS |dks|dkrg |||}t|   dS || dkr|| dkr|| || g}t|S t|| d |}	t|| d |}
t|||g }  }|	|
 }|dkrHt|S dd }d	d
 }t|||	|
g||g||gtjd tt fdd}tj |||d}|S )Nz0'adaptive_avg_pool2d' not implemented for 'Long'r%   r   r  r'   r  c                 S  s   t | | |S r   r"   r   r  r  rX   rX   rY   r   O  s    z)_adaptive_avg_pool2d.<locals>.start_indexc                 S  s   t | d | | d |S r`  r  r  rX   rX   rY   r  R  s    z'_adaptive_avg_pool2d.<locals>.end_indexr   r  r  r  r  r  c                   s   t  | t | S r   )rH   truedivr  r  fn_sumZones_loaderr   rX   rY   ri   `  s    z _adaptive_avg_pool2d.<locals>.fnr  )r   rk   r  rZ  r^   r9   r   r{  r   rI   rP   r   rq  r;  r  r`   
avg_pool2dr<   r\   fallback_adaptive_avg_pool2dr  rH   r   r  	ones_liker6   r   )r   r*  r|  r  r  r  r  o_sizer  h_kernel_maxw_kernel_maxr  r   r  r   r  ri   rvrX   r  rY   _adaptive_avg_pool2d)  sV    


	r%  c                   s    tjkrtdtts$J t|dks4J    ^ }}}t	j
j|}t	j
j|}|\}}|dks|dkrg |||}t|   dt|tj dfS || dkr|| dkrtt|| d |}t|| d |}	t|||g }
  }||	 }|dkr.t|S dd }d	d
 }t||||	g||g||gtjdt||||	g||g||gtjd fdd} fdd}tj |||
d}tj tj||
d}||fS )Nz,adaptive_max_pool2d not implemented for Longr%   r   r  r'   r  c                 S  s   t | | |S r   r  r  rX   rX   rY   r     s    z(adaptive_max_pool2d.<locals>.start_indexc                 S  s   t | d | | d |S r`  r  r  rX   rX   rY   r    s    z&adaptive_max_pool2d.<locals>.end_indexr  c                   s    | t tdS Nr  r  r   r  )inner_func_max_valr   rX   rY   inner_fn_max_val  s    z-adaptive_max_pool2d.<locals>.inner_fn_max_valc                   s    | t tdS r&  r'  r  )inner_func_max_idxr   rX   rY   inner_fn_max_idx  s    z-adaptive_max_pool2d.<locals>.inner_fn_max_idxr  )r   rk   r  rZ  r^   r9   r   r{  r   rI   rP   r   rq  r  r`   
ValueErrorr<   r\   fallback_adaptive_max_pool2dr  rH   r  r  r6   r   )r   r*  r|  r  r  r  r  r!  r"  r#  r  r   r  r   r  r)  r+  r$  rirX   )r*  r(  r   rY   adaptive_max_pool2dt  sp    

		r/  c                   s<            fdd}|S )Nc           	        s   g | d   }t | }t  tj}t d tj}t t |tjt |tj}t t 	|dd|}t 
|| | t 
||  }t |tj}t ||}t t |||tS )Nr'   r   )rH   r  r   rk   r  r  r   float64rh  r  r  r  r  r   r4  )	r  rc   sampleZi_exprdiffZout_sz_exprr  Zseq_ir  r   in_sz	kernel_szndimsout_szsamplesZsamples_loaderrX   rY   r    s     z)_fractional_pooling_offsets.<locals>.loadr  )r8  r4  r7  r5  r   r6  r  rX   r3  rY   _fractional_pooling_offsets  s    r9  c                 C  s   t | |||ddS r  _fractional_max_poolr   r  r*  random_samplesrX   rX   rY   fractional_max_pool2d  s    r>  c                 C  s   t | |||ddS r  r:  r<  rX   rX   rY   fractional_max_pool3d  s    r?  c                   s<  |    | jd   | j d   }tjdd fddtD  |  fdd} fddt|t }|  }tj	d	| | 
 ||||d
}	tj	d| | 
 tj|||d
}
t|	jjtr|	  t|
jjtr |
  t|
| j}|	|fW  d    S 1 s.0    Y  d S )Nr  r  c              
     s    g | ]}t  |d qS ))r8  r4  r7  r5  r6  r   )r9  rr  )inp_dhwr  r  r*  r=  rX   rY   r     s   	z(_fractional_max_pool.<locals>.<listcomp>c                   s$   | d   }g | | |S r   rX   )r  r  r  )r  r  r  rX   rY   r    s    z&_fractional_max_pool.<locals>.fn_innerc                   s8   | d   |  d    fddt D S )Nc                   s&   g | ]}|  | |  qS rX   rX   rr  )bdhwdhw_index_fnr  r  rX   rY   r     s   zE_fractional_max_pool.<locals>.increments_to_index.<locals>.<listcomp>r  r  )rB  r  )rA  r  r  rY   r    s
    z1_fractional_max_pool.<locals>.increments_to_indexrZ  r  r  )r{  r  r(   r   r   r  r\   r   r7   r   r`   rk   r  r^   r_   r*  r  )r   r  r*  r=  r  r|  r  r  r   r  r  rj  rX   )rB  r  r@  r  r  r*  r=  r  rY   r;    sN    "	


r;  c                   s       ^ }}}tjj|}tjj|}|^ }}}	|| dkrr||	 dkrrt|| ||	 gddS t||}
t||	}dd fdd}t||
|g||g||	gt	j
d  fd	d
}tj  |t|d}|S )Nr   r'   )divisor_overridec                 S  s   t | | t|S r   )r!   r   r4  r  rX   rX   rY   r   ;  s    z0upsample_nearest2d_backward.<locals>.start_indexc                   s    | d ||S r`  rX   r  )r   rX   rY   r  >  s    z.upsample_nearest2d_backward.<locals>.end_indexr  c                   s    | t S r   )r  r  )r  r   rX   rY   ri   J  s    z'upsample_nearest2d_backward.<locals>.fnr  )r{  r   rI   rP   r   rq  r  r<   r  rH   r   r6   r   r`   r   r\   )r   r*  r  r  r  r  Zinp_hZinp_wZout_hZout_wr"  r#  r  ri   r$  rX   )r  r   r   rY   upsample_nearest2d_backward)  s6    

	rD  rX   c              
   C  s   t | ||||||ddS )Nr%   r]  _avg_poolndr   r  r  r  r  count_include_padrC  rX   rX   rY   r  _  s    
r  c              
   C  s   t | ||||||ddS )Nr   r]  rE  rG  rX   rX   rY   
avg_pool3du  s    
rI  c              	     s"  
s
sdg t t 

t t| tsBJ tksRJ t
ksbJ tksrJ t|  d d fv sJ |   |  d   }|   d  t 
fddtD  \}	}
tst|
r
t	| ddd}n| 
 d	}t|t|	 }|  ttj}|d
krdkrVt}ndkrft}ntd || 
 |S 
fdd|r|r|r|n|jrd 		fdd}nfdd}n
f	dd}tj|  ||d}|S )Nr   r'   r%   c              	     s"   g | ]}t | | qS rX   r  r   )r  r  r  r  r  rX   rY   r     s   z_avg_poolnd.<locals>.<listcomp>r  r]  TFr  r   zUnknown dim: c                   s   | d   }|  d   d }t jfddtD  D ]J fddtD }|g ||}|d u rz|}q<t||}q<|S )Nc                   s   g | ]}t  | qS rX   r  r   )r  rX   rY   r     r   z/_avg_poolnd.<locals>.fn_sum.<locals>.<listcomp>c                   s,   g | ]$} | |  |  |  qS rX   rX   r   )r   r  r  r  rX   rY   r     r   )r   r  r   rH   r   )r  r&  r  totalr   rz  )r   r  r  r  )r   r  rY   r    s     z_avg_poolnd.<locals>.fn_sumc                   s   t | t  S r   )rH   r  r  r  )r   r  r  r  rX   rY   ri     s    z_avg_poolnd.<locals>.fnc                   s   t | t  S r   )rH   truncdivr  r  )divisorr   r  r  rX   rY   ri     s    c                   s   |  d  }g }t D ]x}|| |  |  }t||  | |  } svt|d}t|| }t|| tj}|| qt	
tj|}jrt| |S t| |S r\  )r   r   ZMinMaxrH   r  rk   rd  ra   r   rl  r  rM  r  rK  )r  r  Zdivide_factorsrc   hstarthendfactordivide_factor)	rH  r   r   r  r  r  r  r  r  rX   rY   ri     s     r  )rC   r^   r9   r   r   r{  r   r   r   r  r  r\   r   r   rl  r  r  fallback_avg_pool2dfallback_avg_pool3dr,  rM  r6   r   r`   )r   r  r  r  r  rH  rC  r   r|  r  Z
ceil_modeshad_paddingr  r  fallbackri   r$  rX   )r  rH  r   rL  r   r  r  r  r  r  r  r  rY   rF    st    




 



rF  c                   s  d u sdksJ ds s,ddgt | ts:J t |tsHJ tdksXJ tdkshJ tdksxJ t| dv sJ |   | ^ }td|\}	}
td|\}}|  d pd p|
p||  ^ }	
t| }| }t	fddt
d d D t	fddt
d d D  }|d	krt| ||S fd
d  	
fdd}tj|  |||d}|S )Nr   divisor must be not zeror%   r  r'   c                 3  s:   | ]2}t |d   t d | d   d    dV  qdS r  r  r  r  rX   rY   r   3  s   z&avg_pool2d_backward.<locals>.<genexpr>c                 3  s:   | ]2}t |d   t d| d   d    d V  qdS r  r  r  r  rX   rY   r   7  s   r  c              	     sX  t d tj}t d tj}t d tj}t d tj}t d tj}t d tj}t t | ||}t t |||}	t t ||t t  tj|}
t t |	|t t tj|}t 	|t dtj}t 	|	t dtj}	t |
t  tj}
t |t tj}t t |
|t ||	}|S )z{
        This computes the scaling factor that we will divide an element
        by when `count_include_pad=False`
        r   r'   )
rH   r  rk   rd  r  r  r  r   r  r  )r  r  stride_hstride_wpad_hpad_wkernel_hkernel_wrN  wstartrO  wendrQ  )heightr  r  r  r  rX   rY   !compute_pool_size_without_paddingJ  s,    

z>avg_pool2d_backward.<locals>.compute_pool_size_without_paddingc                   s`  | ^ }}}|d  }|d  }t t|d  
d  
d tj}t t|d  
d  
d tj}t t|
d d tj}t t|
d d tj}t |t dtj}t |t dtj}t |t tj}t |t 	tj}d }tD ]:}	tD ](}
t 	|t |	tj}t 	|t |
tj}d ur`}n(sls~d d  }n
 ||}t 
g |t jt |t |t dtjddt jt |t |t dtj	dd|}t t ||t ||}|d u r0t ||t dtj}nt |t 	|||}qq|d us\J |S )Nr   r'   Fr4  r  )rH   r  r"   rk   rd  r  r  r  r   r   r  r  r  r  r  rh  r  )r  r  r  r  r  r  r  r  r  r  r  r  r  r  partr  )r`  rH  rC  r  r  rT  r  r  r  r  r  r  rX   rY   ri   f  st      

	


zavg_pool2d_backward.<locals>.fnr  )r^   r9   r   r   r{  r  r  r\   r   rZ  r   fallback_avg_pool2d_backwardr6   r   r`   )r  r   r  r  r  r  rH  rC  r   _h_outZ
ceil_mode1_w_outZ
ceil_mode2r  r   r  ri   r$  rX   )r`  rH  rC  r  r  rT  r_  r  r  r  r  r  r  r  rY   avg_pool2d_backward
  sb    
"Are  c                   s  d u sdksJ ds 	
s,g d
t | ts:J t |tsHJ t	dksXJ tdkshJ t
dksxJ t| dv sJ |   | ^ }td	
|\}	}
td	
|\}}td	
|\}}|  t
p|
p|p||  ^ }t| }|	 }	fdd	t
dD \  }|d
krzt| |	
|S 	
fdd  	
fdd}tj|  |||d}|S )Nr   rV  )r   r   r   r   )r   r   r'   r%   c                 3  s4   | ], t  fd dt  d D V  qdS )c                 3  s:   | ]2}t |   t d |       dV  qdS r  r  rr  )rc   r  r  rX   rY   r     s   z0avg_pool3d_backward.<locals>.<genexpr>.<genexpr>r%   N)rZ  r   )r~   r  )rc   rY   r     s   z&avg_pool3d_backward.<locals>.<genexpr>}   c              	     s  dd D \}}}dd D \}}}dd D \}	}
}dd t | ||g|||g|||gD \}}}dd t |||g|	|
|g g|||gD \}}}dd |||fD \}}}dd t |||g gD \}}}ttt||t||t||}|S )	Nc                 s  s   | ]}t |tjV  qd S r   rH   r  rk   rd  r  rX   rX   rY   r     r   zQavg_pool3d_backward.<locals>.compute_pool_size_without_padding.<locals>.<genexpr>c                 s  s   | ]}t |tjV  qd S r   rg  r  rX   rX   rY   r     r   c                 s  s   | ]}t |tjV  qd S r   rg  r   rX   rX   rY   r     s   c                 s  s(   | ] \}}}t t |||V  qd S r   )rH   r  r  )r~   rj  rv  padrX   rX   rY   r     s   c              
   s  s<   | ]4\}}}}t t ||t t |tj|V  qd S r   )rH   r  r   r  rk   rd  )r~   r  r   r   rh  rX   rX   rY   r     s   
c                 s  s$   | ]}t |t d tjV  qdS r  rH   r  r  rk   rd  )r~   r  rX   rX   rY   r     s   c                 s  s(   | ] \}}t |t |tjV  qd S r   rH   r  r  rk   rd  )r~   r  r   rX   rX   rY   r     s   )r   rH   r  r  )pdr  r  Zstride_drW  rX  Zpad_drY  rZ  Zkernel_dr[  r\  ZdstartrN  r]  ZdendrO  r^  rQ  )depthr_  r  r  r  r  rX   rY   r`    s8    $z>avg_pool3d_backward.<locals>.compute_pool_size_without_paddingc                   sV  | ^ }}}}dd t |||gD \}}}dd t |||gD \}}}dd t |||gD \}}	}
dd |||fD \}}}dd t ||	|
g	
gD \}}	}
d }tD ]}tD ]t}tD ]d}dd t |||g|||gD \}}}d ur}n2s s:d d	  d
  }n |||}tg |tjt|t|td	tj		ddtjt|t|	td	tj	
ddtjt|t|
td	tj	dd|}t
t
t||t||	t||
}|d u r(t||tdtj}qt|t|||}qqq|d usRJ |S )Nc                 s  s   | ]\}}|| V  qd S r   rX   )r~   r   rh  rX   rX   rY   r     r   z2avg_pool3d_backward.<locals>.fn.<locals>.<genexpr>c                 s  s0   | ](\}}}t t|| | |tjV  qd S r   rH   r  r"   rk   rd  )r~   r   r   rv  rX   rX   rY   r   !  s   c                 s  s*   | ]"\}}t t||d  tjV  qdS r  rm  )r~   r   rv  rX   rX   rY   r   &  s   c                 s  s$   | ]}t |t d tjV  qdS r  ri  )r~   pstartrX   rX   rY   r   +  s   c                 s  s(   | ] \}}t |t |tjV  qd S r   rj  )r~   ZpendZ
pooled_dimrX   rX   rY   r   /  s   c                 s  s(   | ] \}}t |t |tjV  qd S r   )rH   r   r  rk   rd  )r~   rn  Zp_rX   rX   rY   r   ;  s   r   r'   r%   Fr4  r  )r   r   rH   r  r  r  r  r  rk   rd  r  r  rh  r  r   )r  r  rs  r  r  Zpdstartr  r  Zpdendr  r  r  Zpd_r  r  rk  r  r  r  ra  r  )r`  rH  d_window_sizerC  r  r  rT  r  r  pooled_depthr  r  r  r  rX   rY   ri     s     
	

zavg_pool3d_backward.<locals>.fnr  )r^   r9   r   r   r{  r  r  r   r\   r   r   fallback_avg_pool3d_backwardr6   r   r`   )r  r   r  r  r  r  rH  rC  r  Z_d_outZceil_mode_drc  Zceil_mode_hrd  Zceil_mode_wr   r  r   r  ri   r$  rX   )r`  rH  ro  rl  rC  r  r  rT  r_  r  r  rp  r  r  r  r  r  rY   avg_pool3d_backward  sf    
%&Wrr  c                 C  s   |   }t|tr|g}n|s*tt|}t|dkrTt|dv sPJ d| g S t|}tt|D ]j}|| dk r||  t|rt|nd7  < d||   krt|k shn t|dkr|| dkshJ qhtt|t|ksJ d|S )Nr   )rX   r  r  zinvalid axis: r'   zreduction axis not unique)r   r^   r   r   r   r   r\   r    )r   r  r   rc   rX   rX   rY   _validate_reduction_axis}  s    
 <rs  c          
        s   |d urt | |} |  tt t| |}g }g g }g ttD ]>}||v rn| ||  qH| ||  qH fdd}rt}	D ]}t	j
j|	|< qn|}	|   t|  |p|  |  ||	|dS )Nc                   s   t |t ksJ r>t  t ks,J  fddD  t  t ksRJ d gt  t |  }tt t|D ]\}}|||< q|S )Nc                   s   g | ]} | qS rX   rX   r   r  rX   rY   r     r   z9_make_reduction_inner.<locals>.loader.<locals>.<listcomp>)r   r   r   r   )r   Zreduction_indexr$  r  varZinner_loaderkeepdimsZkept_idxZreduced_idxr   r  rY   r&    s    
z%_make_reduction_inner.<locals>.loader)rf   r  r7  r  r  r  )r   r   r    r   rs  r   r   ra   r\   r   r   r   r  r   r`   r   )
r   r  rv  r   r  Z
kept_sizesZreduced_sizesrc   r&  r  rX   ru  rY   _make_reduction_inner  s:    



rw  rJ   )rk  c                   s   dd d fdd}|S )NFr   c                  sB   t | ||| d}tjf | d|}t|jjtr>|  |S )Nr  rv  r   r  )rk  r  )rw  r7   r   r^   r_   r*  )r   r  rv  r   r   r  r  rk  rX   rY   r$    s    zmake_reduction.<locals>.inner)NFrX   )rk  r  r$  rX   ry  rY   make_reduction  s    rz  c                C  sB   |d urt | |} t| |}t|  |  f|  f|  |dS )N)rf   dtypes	inner_fnsr   r  )r   r  r   r`   r   r  r   )r   r  r   rX   rX   rY   _make_scan_inner  s    

r}  c                  s   |d urt | |} |   t| |}|  }|tjtjfv rHt | tj} t| ||}t	 fdd|D }t
j||  |  d}t|t| }t t|||S )Nc                 3  s   | ]} | V  qd S r   rX   r   r   rX   rY   r     r   zmean.<locals>.<genexpr>r   )r   r   rs  r   rk   r  r  r   sum_rF   r*   r0   r`   r/   r   r\   div)r   r  keepdimr   Zoutput_dtype
sum_resultdenomrX   r~  rY   rm    s    

rm  c           
        s   |d u rd}|    t| |}t| |dd}|r8|  tt| |}t|||}t fdd|D }|r|t	|| d}t
j||  |  d}t|t|  }t||}	|s|	fS |r|nt||}|	|fS )Nr'   T)r  c                 3  s   | ]} | V  qd S r   rX   r   r~  rX   rY   r      r   z var_mean_sum_.<locals>.<genexpr>r   r   )r   rs  rm  r*  squarer  r  rF   r   rM  r*   r0   r   r`   r/   r   r\   r  rw  )
r   r  
correctionr  return_meanZx_meanZdiffsr  r  Zx_varrX   r~  rY   var_mean_sum_  s&    

r  c                 C  sV   t | |}t| ||d d d}|d }t|d }t|tjoTt|tjk oTt|dkS )Nrx  r  r  r'   )	rs  rw  rF   r^   r   r  r   r(   r  )r   r  r  r   r  reduction_numelrX   rX   rY   use_two_step_variance  s    


r  c                  s    d u rd t | ||d d d}|d}|d |d tjjf |fd|  d|\}}}	|  |  |  t| |}t	fdd	|D d
d  fdd}
t
|
|}|r|  ||fS |fS )Nr'   rx  r  r  r7  Zwelford_reduce)r|  rk  r   c                 3  s   | ]} | V  qd S r   rX   r   r~  rX   rY   r   3  r   z$var_mean_welford_.<locals>.<genexpr>c                 S  s4   t | tjr(| js(tt| tj|S t	| |S r   )
r^   r   r   Z	is_numberrH   r   r  rk   r  r  r6  rX   rX   rY   get_constant_or_index_expr5  s    z5var_mean_welford_.<locals>.get_constant_or_index_exprc                   s4    }}t d}| t |||  S r\  )rH   r  r  )r_   cNzero)r  r   r  rnumelrX   rY   rx  :  s    

z#var_mean_welford_.<locals>.scale_fn)rw  rz  r*   ZWelfordReductionr   r   r*  r   rs  rF   r%  )r   r  r  r  r  r   r&  rm  m2r   rx  rt  rX   )r  r   r  r  r   rY   var_mean_welford_  s6    




r  c                  s   |    t }t| |dd} t| ||||d}t| ||drLtf i |ntf i |}t fdd|D }|s||d S |S )NFrD  )r   r  r  r  r  )r  r  c                 3  s   | ]}t | d dV  qdS )FrD  Nr  r}   r  rX   rY   r   X  r   z#var_mean_helper_.<locals>.<genexpr>r   )r   r   r   r   r  r  r  r   )r   r  r  r  r  Zcompute_dtyper   r   rX   r  rY   var_mean_helper_H  s     	r  )r  r  c                C  s   t | |||ddS )NFr  r  r  r  r  r   r  r  r  rX   rX   rY   var_\  s    
r  c                C  s   t | |||ddS )NTr  r  r  rX   rX   rY   var_meanc  s    
r  c                 C  st   |dk rt t| | |S |dkr0td|S |dkr<| S t | |d |}t||}|d dkrpt|| }|S )Nr   r'   r%   )pow_recursiverH   r  r  r  )r   r   r   r  rX   rX   rY   r  j  s    r  c                 C  s   t | |S r   )rH   powr   r   rX   rX   rY   
pow_nativey  s    r  )r   c                   sd  t tr$tkr$t tS t tr>dkr>t S t trXdkrXt S tdd  fD }t|}t tod  k odk n  p|odk}|rވ   fdd	}t	j
    |  d
S t  tr dk rtdS  dkrt rtS |rZt  tr:t S t trPt S t S t S )Nrw  r'   c                 s  s"   | ]}t |tjr| V  qd S r   )r^   r*   r9   r   r}   rX   rX   rY   r     r   zpow.<locals>.<genexpr>i    r   c                   s   t |   S r   )r  r   r  r   r   r&  rX   rY   ri     s    zpow.<locals>.fnr  r%   )r^   r   r   r  sqrtr;  r   r   r  r6   r   r`   r   r   r   r~  r   exp2fallback_pow_scalarfallback_pow_tensor_scalarfallback_pow_tensor_tensorr  )r   r   r   Zis_integer_powZembed_exponentri   rX   r  rY   r    s@    
"




r  c                 C  s   t | tr| j}n| }t |tr&|j}t |tjsftj|  |  |	 | 
 dj}t |tjsfJ t |tjr| s| st |jtjs|  |j|_| S tjj|||d | S )Nr  unsafe_alias)r^   r9   r_   r*   r  r6   r   r`   r   r  r   Zis_input_bufferZis_module_bufferZ	NopKernelr*  rO  Zrealize_into)changedrz  r  Zchanged_datarX   rX   rY   r     s6    

r   c                 C  s   t | t| |S r   )r   r~  )r   r  rX   rX   rY   r    s    r  c                 C  s@   | |u r| S t ||  }t||  }t||  }t| |S r   rW  r`   r   r   r   r   r   )rK  r!  rV  rX   rX   rY   r[    s    r[  c                 C  s   t | |S r   )rH   floordivr  rX   rX   rY   r    s    r  c                 C  s   t | |S r   )rH   rK  r  rX   rX   rY   rK    s    rK  c                 C  s   t | ot |}t| ot|}|dkrP|r4J d|rBt| |S tt| |S |dkr|rdJ d|rrt| |S tt| |S t| |S )Nr  z5floordiv operands can not be boolean at the same timer  z5truncdiv operands can not be boolean at the same time)r   r   r  r  r  rK  r  )r   r   Zrounding_modeZboth_integerZboth_booleanrX   rX   rY   div_mode  s    r  c                 C  s<   t | ot |}|rt| |S ttjj}t|| |S d S r   )r   logical_andr4   rQ  r  rY  r%  )r   r   Z	both_boolri   rX   rX   rY   r    s
    
r  r  zOptional[ir.Constant]r  c              	   C  s   t | tjrt| jS t | tjr.t|  S t | tjr>| S t | tjsNdS t	j
j|  }t|N ttjdd | j|   }W d   n1 s0    Y  W d   n1 s0    Y  t |t	j
jjsJ t |jtjr|jS dS )z:Try convert an arbitrary IR node into an ir.Constant valueNZallow_indexingT)r^   r*   Z
MutableBoxget_constant_valuer_   r  r  r   ZLoopsrk   Z	_inductorops_handlerZExtractConstantsHandlerr`   rI   Zset_ops_handlerr   objectrd  r  Zinner_fn_argsvirtualizedZOpsValuer   )r   r=  rb   rX   rX   rY   r    s$    
Jr  c                 C  s|   t dd | |fD }|r$t| |S t| }d urf|jdkrRttd|j}n
d|j }t| |S dd }t|| |S )Nc                 s  s   | ]}t |pt|V  qd S r   )r   r   r}   rX   rX   rY   r   0  r   zdiv_prim.<locals>.<genexpr>r   infr  c                  W  s
   t j|  S r   )rH   r  r   rX   rX   rY   ri   =  s    zdiv_prim.<locals>.fn)	r   rK  r  r   mathcopysignr   r  r%  )r   r   is_integralrL  r  ri   rX   rX   rY   div_prim.  s    



r  c                 C  s    t | |ftjd\} }t| |S rO  )r  r   INT_TO_FLOATr  r  rX   rX   rY   r  C  s    

r  c                 C  s4   t | pt| }|rdd }ndd }t|| |S )Nc                 S  s   t | |S r   )rH   modr  rX   rX   rY   ri   U  s    zfmod.<locals>.fnc                 S  s   t | |S r   )rH   fmodr  rX   rX   rY   ri   Z  s    )r   r   r%  )r   r   r  ri   rX   rX   rY   r  O  s
    
r  c                C  sB   t |  st|  r&|d u r&tj}td|d}|| |||dS )Nrd  r:  r   r   r   r   rk   r  rz  r   r  rv  r   ri   rX   rX   rY   r  `  s    

r  c                 C  s   t |  st|  r&|d u r&tj}t|  dkr\|dv sBJ |pL|  }t| |ddS dd }t| ||d}t	j
jf i |d|i\}|d u rt| ||d	S |S )
Nr   r   r  TrD  c                 S  s   | \}|\}t ||fS r   )rH   r   a_tupleb_tupler   r   rX   rX   rY   
combine_fn~  s    zcumsum.<locals>.combine_fnr  r   r  r   r   )r   r   r   rk   r  r   r   r   r}  r*   Scanr   fallback_cumsumr   r  r   r  r   r  rX   rX   rY   cumsumr  s"    

r  c                 C  s   t |  st|  r&|d u r&tj}t|  dkr\|dv sBJ |pL|  }t| |ddS dd }t| ||d}t	j
jf i |d|i\}|d u rt| ||d	S |S )
Nr   r  TrD  c                 S  s   | \}|\}t ||fS r   )rH   r  r  rX   rX   rY   r    s    zcumprod.<locals>.combine_fnr  r  r  )r   r   r   rk   r  r   r   r   r}  r*   r  r   fallback_cumprodr  rX   rX   rY   cumprod  s"    

r  c                 C  sv   dd }|   }t|  dkr4|dv s,J t| S t| ||d}tjjf i |d|i\}|d u rrt| |dS |S )Nc              	   S  s\   | \}|\}t ||}t ||}||kt | B }t |t t || | |fS r   )rH   r  r  r|  rh  log1pexp)r  r  r   r   Zmin_vZmax_vr  rX   rX   rY   log_add_exp_helper  s    z(logcumsumexp.<locals>.log_add_exp_helperr   r  r  r  r]  )	r   r   r   r;  r}  r*   r  r   fallback_logcumsumexp)r   r   r  r   r   r  rX   rX   rY   logcumsumexp  s    r  c                   s   t |  dkr2 dv sJ t| t| tjdfS |  }tjd|dd}t	|  |d}|tjf|d< | 
  fd	d
f|d< tjjf i |d|i\}}|d u rt|  dS ||fS )Nr   r  r   r  Fr   Zarg_break_ties_leftr  r{  c                   s   t |   tjS r   rH   r  rk   r  r  r  rX   rY   r    r   zcummax.<locals>.<lambda>r|  r  r]  )r   r   r;  rF  rk   r  r   r*   get_reduction_combine_fnr}  r  r  r   fallback_cummaxr   r  r   r  r   r   rj  rX   r  rY   cummax  s     
r  c                   s   t |  dkr2 dv sJ t| t| tjdfS |  }tjd|dd}t	|  |d}|tjf|d< | 
  fd	d
f|d< tjjf i |d|i\}}|d u rt|  dS ||fS )Nr   r  r   argminFr  r  r{  c                   s   t |   tjS r   r  r  r  rX   rY   r    r   zcummin.<locals>.<lambda>r|  r  r]  )r   r   r;  rF  rk   r  r   r*   r  r}  r  r  r   fallback_cumminr  rX   r  rY   cummin  s     
r  c                C  sB   t |  st|  r&|d u r&tj}td|d}|| |||dS )Nre  r:  r   r  r  rX   rX   rY   re    s    

re  c                 C  s   t | tj} td| ||dS )Nr   r  rv  )r   rk   r   rz  r   r   r  rX   rX   rY   
reduce_any  s    r  c                 C  s2   |d ur$t | ||dt| ||dfS t | d |dS Nr  )reduce_amaxreduce_argmaxr  rX   rX   rY   
reduce_max  s
    r  c                 C  s2   |d ur$t | ||dt| ||dfS t | d |dS r  )reduce_aminreduce_argminr  rX   rX   rY   
reduce_min  s
    r  xor_sumrZ  rY  r  r:  r  
logical_or)r!  r  stabler   
descendingc          
   	   C  s>  |d u rd}|   }|  }tt||}t|dkrNt| td|tj|fS t|r^|| nd}tj	j
|ttjjst| |||dS t|ddtj|dd}dgt| }t|r|||< t||}t||}tjj|| j|jf|  | f||||d\}	}|	d u r t| |||dS |d us.J |	t|tjfS )NFr   r'   r  )r  r  r   rf   r  )rf   r{  r|  r   r  r  r  )r   r`   r   r   r;  r  rk   r  rI   rP   r   Zstatically_known_ltrO  int16rZ  sort_fallbackr  rR  r   r*   ZSortr   r   r  r   )
r   r  r   r  r  rf   r/  rj  Z
view_shaper   rX   rX   rY   sort_stable  s>    



	
r  c                 C  s   t | d||dS )NFr  )r  )r   r   r  rX   rX   rY   sortF  s    r  c                 C  s   t | |tj|dS )Nr   r   r#  )rZ  r   r  )rS   r   r#  rX   rX   rY   register_pointwise_numericK  s    r  ztorch._ops.OpOverloadPacketr  c                 C  s   t | j t| tjdS rO  )rE   rY  rZ  r   r  r  rX   rX   rY    register_pointwise_numeric_ldf64T  s
    
r  rf  logical_not)r  )r   r   r  identity)r(  pointwise_overrides_datac                 #  s~   t |  t|  jd }|d u r"d S  fdd}t|tjjrh| D ] }t||}| j||fV  qDn| j||fV  d S )Nc                   s    j d u rt| S d S r   )ZtritonrG  r  r  rX   rY   make_triton_fallback  s    
z6_get_pointwise_overrides.<locals>.make_triton_fallback)	r  r   r   r^   rk   rl   r   r   r   )nsr   rS   r  Zolnamer[  rX   r  rY   _get_pointwise_overrides  s    
r  r  c                   s,   | t |< t|   fdd}t| | d S )Nc                    sB    | i |}g }t | d |D ]\}}|t||dd q |S )Nr   Tr  )r   ra   r   )rd   r   resultsZmut_resultsr   r  outplace_oprX   rY   ri     s
    z$register_foreach_inplace.<locals>.fn)rO   r&  r   r   )aten_opZoutplace_aten_opr  ri   rX   r  rY   register_foreach_inplace	  s    
r  c                   s   t | d d fdd}|S )Nr   c                    s.    | i |}t || d  }t| d |S r\  )r   r   r   )rd   r   r  r  rX   rY   ri   -  s    zregister_inplace.<locals>.fn)r   )r  r  ri   rX   r  rY   register_inplace,  s    
r  c                 C  s   d S r   rX   rX  rX   rX   rY   sym_constrain_rangeV  s    r  c                 C  s&   t jjjd }t|tjsJ |jjS Nrz  	rI   rP   rQ   r  r^   rk   r  rV   r  r   r   rz  rX   rX   rY   sym_size[  s    r  c                 C  s&   t jjjd }t|tjsJ |jjS r  r  r  rX   rX   rY   
sym_stridel  s    r  c                 C  s   |   S r   )r  )r   rX   rX   rY   	sym_numelt  s    r  c                 C  s
   t j|  S r   )r   Addr   rX   rX   rY   sym_sum}  s    r  c                 O  s   t dd S )NzHelpful for debuggingrw   )r  rd   r   rX   rX   rY   foobar  s    r  c                 C  s   |    t| S r   )r*  r;  r   rX   rX   rY   _realize  s    r  c                 C  s   |    t| | | S r   )r*  r*   ZResizeStorageBytes)variabler  rX   rX   rY   resize_storage_bytes_  s    r  c                 C  s"   |    |   tt| |S r   )r*  r9   r   r*   ZSetSourceTensorKernel)r  Zsource_tensorrX   rX   rY   set__source_tensor  s    r  c                 C  s@   | |u r| S t ||  }t||  }t||  }t| |S r   r  )rK  r!  rX   rX   rY   
fsdp_copy_  s    r  c          	        sx  t | tsJ t |ttfs J |d u r.tj}|tjkrFtd| |tjkr`t	|dks`J |tj
krzt	|dkszJ |  |  }|  }t | jtjr| j | _t rtjjjrt|rtdqt|rt|jqdndtjjdrt|||dS t | gd	g}|!  tj"#||}t$||||%  fd
d}t&j'|||t|d}|S )Nzunsupported memory format: r   r   nanTr  r   r  r'   c                   sH   |  t  tj}t tj}t ||}t | fddS )Nc                     s
    gS r   rX   rX   )
flat_indexflat_loaderrX   rY   r    r   z*resize.<locals>.inner_fn.<locals>.<lambda>)rH   r  rk   r  r  r  )r  Zflat_index_exprlimitr  r  Z	old_numelZout_indexerZuninitalized_val)r  rY   r    s
    zresize.<locals>.inner_fnr  )(r^   r9   r\   r   rk   Zcontiguous_formatZpreserve_formatrZ  Zchannels_lastr   Zchannels_last_3dr  r   r  r_   r*   r  r  rM  utilsZdeterministicZfill_uninitialized_memoryr   r   r   rO  rZ  rI   rP   r   r  r  r  r  rd  Z stride_ordered_for_memory_formatr  rf  r6   r   )	r   r   r  r   rf   Zx_flatZ
out_strider  rb   rX   r  rY   resize  sT    



	r
  )auto_functionalizedc                 C  sB   ddl m} ||}tj| ||i ||d dd | D S )Nr   )kernel_side_table)
kernel_idxgridtma_descriptor_metadataZkernel_argsc                 S  s    i | ]\}}t |tr||qS rX   r   )r~   r  rz  rX   rX   rY   r     r   z'triton_kernel_wrap_.<locals>.<dictcomp>)*torch._higher_order_ops.triton_kernel_wrapr  Zget_constant_argsr*   ZUserDefinedTritonKernelr   )r  Zconstant_args_idxr  r  r   r  Zconstant_argsrX   rX   rY   triton_kernel_wrap_  s    	

r  c                 C  sj   t dd | g|D rHd}tjjjdd  }r@| d| }|tj_tj	| |||}t
ttj	|S )Nc                 s  s    | ]}t |tot|V  qd S r   r	  r}   rX   rX   rY   r     r   zcond.<locals>.<genexpr>z"control flow operator: torch.cond.stack_trace Found from : 
 )r   rI   rP   rQ   r  r  disable_cudagraphs_reasonr*   ZConditionalr   r\   mapr9   )predZtrue_fnZfalse_fnoperandsrz   r  r  rX   rX   rY   ry     s    ry   c                 C  st   t dd || D rFd}tjjjdd  }r>| d| }|tj_dddd	}tj	| |||}t
t||S )
Nc                 s  s    | ]}t |tot|V  qd S r   r	  r}   rX   rX   rY   r     s   zwhile_loop.<locals>.<genexpr>z(control flow operator: torch.while_loop.r  r  r   rb   c                 S  sN   t | tr| S t | tjr"t| S t | tjr8t| S tdt|  d S )NzNYI unsupported output type: )r^   r9   r*   r  MultiOutputr   rZ  r  r  rX   rX   rY   _map_output  s    

zwhile_loop.<locals>._map_output)r   rI   rP   rQ   r  r  r  r*   Z	WhileLoopr   r\   r  )Zcond_fnZbody_fnZcarried_inputsadditional_inputsrz   r  r  r  rX   rX   rY   
while_loop  s    
r  zir.Subgraph)subgraph_fn
identifierc                 G  s$   t jj| g|R  }tttj|S r   )r*   ZInvokeSubgraphr   r\   r  r9   )r  r  r  r  rX   rX   rY   invoke_subgraph)  s    r  )scheme)r  c          
      G  s   d }t jjjdd }|d us"J t| jjjD ]\}}|jdkrV|| t jj	|< q0q0|jdkrt j
|\}}t|| D ]6}	|	  |jrt jj|	  t jj|	  qtjjt j|||}q0t j|t jj	|< q0|S )Nquant_optionsrE  r   )rI   rP   rQ   r  r  r]   r?  nodesrS   envZfetch_args_kwargs_from_envr   r   r   r*  Zcodegen_low_precisionZlow_precision_codegen_opsr   r+  Zinvoke_quant_opsrk   rF  ZInterpreterr   Zrun_node)
r  r   r  r   r!  rc   rV   rd   r   r   rX   rX   rY   invoke_quant_tracer/  s"    

r$  ztuple[torch.Tensor])r  r  c                   s   ddl m m} t|dkr$td fddt||D }|| |fdd}t|d dd d	}td
d |D |d< tdd |D |d< t	j
jf |dd|}|d d u rtd|S )Nr'   )InputDescriptorlower_pointwise_subgraphr   zSUnable to generate code for associative_scan op, because there are lifted argumentsc                   s    g | ]} |  | d qS )r  )r   r`   r}   )r%  rX   rY   r   W  s   z$associative_scan.<locals>.<listcomp>c                   s    g t | t |R  S r   )r8  rH  )lhsrhs)lowered_combine_fnrX   rY   wrapped_combine_fn]  s
    z,associative_scan.<locals>.wrapped_combine_fnr  c                 s  s   | ]}|  V  qd S r   r  r}   rX   rX   rY   r   d  r   z#associative_scan.<locals>.<genexpr>r{  c                 s  s   | ]}|  V  qd S r   r  r}   rX   rX   rY   r   e  r   r|  F)r  Zcan_fallback_to_atenz/Unable to generate code for associative_scan op)r>  r%  r&  r   rZ  r   r   r}  r   r*   r  r   )r  Zxsr  r&  Zsubgraph_inputsr*  r   r  rX   )r%  r)  rY   associative_scanL  s,    


r+  c                 C  s   d S r   rX   )tokensrX   rX   rY   _sink_tokensp  s    r-  c                 O  s   t jj|g|R i |}ddlm} ||||}|d us>J tjj| }|d u rX|fS t	t j
tj|}t|ts|||fS |g|R S d S )Nr   )get_effect_key)r*   ZEffectfulKernelr   Ztorch._higher_order_ops.effectsr.  rI   rP   Zeffectful_opsr8  r  r  r9   r^   r   )tokenrS   rd   r   r  r.  Zeffect_typeZeffectful_kernelrX   rX   rY   with_effectsu  s    
r0  )register_comm_loweringsc                 C  s   t | |dddd}|d }tjjt|}tjjf i |d|d\}}|dkrtjj	|t
jkrtjf | d|d	|\}}||fS ttd
 t| |dd}	ttj t| |	}
t|
|dd}|	|fS dS )zn
    Lowering inductor_prims.prepare_softmax_online to compute max/sum in one pass if no split is needed.
    TNrx  r  Zonline_softmax_reduce)rk  r  r'   r%   )r  Z
num_outputZreduction_hintz
            Online softmax is disabled on the fly since Inductor decides to
            split the reduction. Cut an issue to PyTorch if this is an
            important use case and you want to speed it up with online
            softmax.
            )rv  )rw  rI   rP   r   simplifyrF   r*   r7   Z
num_splitsr  r(   r  r3   r   r>  r?  textwrapdedentr  rM   rQ  r  r  r  )r   r   r   r  r  hintZ	num_splitZ
max_tensorZ
sum_tensorrn  r  ZxsumrX   rX   rY   prepare_softmax_online  s<    



r6  r;  )quantized_lowerings)mkldnn_lowerings)jagged_loweringsc              	   c  sr   t | tjjsJ dt| }z2t| t|  dV  W |rH|t| < qnt|  n|rb|t| < n
t|  0 dS )z^
    A context manager to force fallback an op. Used in unit test
    for FallbackKernel.
    z+Only OpOverload to make the clean up easierN)	r^   rk   rl   rm   rM   r  r   rG  rz  )rS   Zold_handlerrX   rX   rY   force_fallback  s    


r:  )NN)NNNFN)F)F)F)F)N)N)N)r   r   r  r'   T)N)N)N)r   )r   )r   r   r'   )r   r   r'   )r   r   r'   )r   )r   )r   )r   )r  )T)N)N)T)NTF)F)r   NNr'   )NN)NNN)F)r  FF)T)T)F)F)F)F)F)N)r%   F)N)N)NN)NN)NNN)NNN)r   )Nr  N)F)Nr   r'   F)Nr   r'   F)r  )NNNN)rX   r   FTN)rX   r   FTN)N)N)N)NF)N)N)F)F)N)NF)NN)NN)N)N)NF)NF)NF)NF)r  F)NN)NN(  
__future__r   
contextlibr  r   r   loggingr  r  rT  r3  r>  collectionsr   collections.abcr   r   typingr   r   r   r	   r
   r   r   Ztyping_extensionsr   Zunittest.mockr   r   rk   Z$torch.ao.quantization.fx._decomposedZtorch.fxZtorch.utils._pytreer	  Z_pytreer8  Ztorch._dynamo.utilsr   Z(torch._higher_order_ops.associative_scanr   r  r   Ztorch._prims_commonr   r   r   r   r   r   r   r   r   r   r   Ztorch.fx.experimental.sym_noder   r   r  r   Ztorch.utils._ordered_setr    Ztorch.utils._sympy.functionsr!   r"   r#   r$   Z_dynamo.utilsr&    r(   r)   r*   r+   decompositionr,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   r  rH   rI   r  rJ   rK   rL   ZFALLBACK_ALLOW_LIST	getLoggerrY  rX  rM   __annotations__rN   rl   rm   r   rQ  Ztr_c10drH  r   Z_higher_order_opsrA  rU   r&  rO   Zquantized_decomposedrZ   rh   ro   rv   r{   r|   r   r  r  r  re  ZbmmZconvolutionZconvolution_backwardr  r  r  r  r  r  Z_int_mmr  r  r  rd  r  r  r  r0  Z	complex32Z	complex64r   r  r   r   r   r   r   r   r   r   r   r   r   r   r   r  r%  r5  r   rI  rL  rS  rR  r   rT  rW  Z
device_putrX  rZ  re  rg  rh  r   aliasdetachZdetach_ZliftZview_ofrn  r   ro  rw  rx  r{  r|  r  r  r  r  rJ  r  r   r  r  r  Z_unsafe_viewZreshaper  slicer  r  r  Zquantize_per_channelr  Zdequantize_per_channelr  Zquantize_per_tensorr  Zdequantize_per_tensorr  r^  r  r  r  r  r  r"  r#  r&  r'  r)  r1  r  r3  r  r6  rG  cacher@  rB  rC  rM  r\  r_  Zrngprimsrh  ri  rm  Z	bernoullirj  ro  rr  	lru_cacheru  rv  r`  r{  rx  ry  r~  r}  r|  randintZforce_stride_orderr  rg  r  r  r  Zlookup_seedr  randomr  r  r  r  r  rn  r  Z	NO_OPMATHr  r  rr   r  r  rq   rs   r  Z_adaptive_avg_pool3dZadaptive_max_pool3dZ*_scaled_dot_product_attention_math_for_mpsuniformZexponentialZ_pdist_forwardZsoft_margin_loss_backwardZ_fused_rms_normZxpuZis_availableZembedding_dense_backwardZ_cdist_forwardZ_cdist_backwardZ
_trilinearZsegment_reduceZ_segment_reduce_backwardZhistcZ	histogramZbin_ctZ_histogramdd_bin_edgesZ_histogramdd_from_bin_ctsZaddbmmZ_addmm_activationZ_grouped_mmZ
_cudnn_rnnZ_cudnn_rnn_backwardZ_embedding_bagZ_embedding_bag_forward_onlyZ_embedding_bag_backwardZ*_embedding_bag_per_sample_weights_backwardZ_fused_moving_avg_obs_fq_helperZ*_fused_moving_avg_obs_fq_helper_functionalZ max_pool3d_with_indices_backwardZ_adaptive_avg_pool2d_backwardZ_adaptive_avg_pool3d_backwardZadaptive_max_pool2d_backwardZadaptive_max_pool3d_backwardZfractional_max_pool2d_backwardZfractional_max_pool3d_backwardZreplication_pad1d_backwardZreplication_pad2d_backwardZupsample_linear1d_backwardZupsample_bicubic2d_backwardZupsample_trilinear3d_backwardZgrid_sampler_2d_backwardZ_pdist_backwardr  r  ZkthvalueZtopkr  ZmedianZ	nanmedianrF  Zresize_Z
resize_as_Z_linalg_detZlinalg_householder_productZlinalg_inv_exZlinalg_ldl_factor_exZlinalg_ldl_solveZ	linalg_luZlinalg_lu_factor_exZlinalg_lu_solveZlinalg_matrix_expZ	linalg_qrZ_linalg_slogdetZ_linalg_solve_exZlinalg_solve_triangularZ_linalg_svdZ	lu_unpackZormqrZ_linalg_check_errorsZlinalg_pinvZatol_rtol_tensorZ_linalg_eighZtriangular_solveZlinalg_cholesky_exZcholesky_inverseZcholesky_solveZgeqrfZ_fft_r2cZnonzerogcdZ_thnn_fused_lstm_cellZ_primsZ	rng_primsZrun_and_save_rng_stateZrun_with_rng_stateZgraphsafe_run_with_rng_stateZmasked_scatterZmasked_scatter_backwardrK  ZangleZ_efficientzerotensorZ(_sparse_coo_tensor_with_dims_and_tensorsZ	to_sparseZ
_to_sparser   r  r  r  Z#_scaled_dot_product_flash_attentionZ,_scaled_dot_product_flash_attention_backwardZ#_scaled_dot_product_cudnn_attentionZ,_scaled_dot_product_cudnn_attention_backwardZ+_scaled_dot_product_flash_attention_for_cpuZ4_scaled_dot_product_flash_attention_for_cpu_backwardZ0_scaled_dot_product_fused_attention_overrideableZ9_scaled_dot_product_fused_attention_overrideable_backwardZ_flash_attention_forwardZ_flash_attention_backwardZ_efficient_attention_forwardZ_efficient_attention_backwardZindex_reducer<  r;  r  rL  r  r  r  r  Zscalar_tensorr  Z
LongTensorr  r  r  r  r  r~  r  r  r  ri  rF  r   Z
zeros_liker	  r  r  r  r  r  r  r  r   r/  r1  r0  r   r6  r=  r>  r?  rA  rC  rD  r:  rU  Zfallback__unsafe_masked_indexrW  Z,fallback__unsafe_masked_index_put_accumulater  r\  rP  ra  r^  rj  rh  rl  rf  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r%  r  r/  r-  r9  r>  r?  r;  rD  rR  rI  rS  rF  rb  rr  rq  rs  rw  rz  r}  rm  r  r  r  r  rt  r  r  r  r  r  ZTensor_Tensorr  ZScalarr  ZTensor_Scalarr  r   r  r[  r  rK  r  r  r  r  r  Ztrue_divider  r  rd  r  r  r  r  r  r  r  r  r  r  r  re  r   r  rZ  r  rY  r  r  rn  r  ro  r  r  r  r  r  r   r  r  r  r  Zrsqrtr  r  expm1Zrelur5  r  r  r  cossinabsZbitwise_andZbitwise_left_shiftZbitwise_notZ
bitwise_orZbitwise_right_shiftZbitwise_xorlgammaerfZspecial_erfr  tantanhr  r  r  logical_xorr  r  Z	clamp_minZ	clamp_maxnegr  	remaindersignZsignbitZ	_neg_viewler  r  r  r  necoshsinhacosacoshasinasinhatan2atanatanhr  erfcZerfinvhypotlog10log2	nextafterZcodegen.commonr(  r  r  r   rS   r   r#  Z_foreach_addListZforeach_add_listZforeach_add_scalarZ_foreach_mulZforeach_mul_listZforeach_mul_scalarZ_foreach_subZ_foreach_negZ_foreach_absZ_foreach_powZScalarAndTensorZ_foreach_divZforeach_div_listZforeach_div_scalarZ_foreach_sqrtZ_foreach_rsqrtZ_foreach_maximumZ_foreach_minimumZ_foreach_clamp_minZ_foreach_clamp_maxZ_foreach_reciprocalZ_foreach_signZ_foreach_copyr  Z_foreach_add_Z_foreach_mul_Z_foreach_div_r  Zadd_Zbitwise_and_Zbitwise_left_shift_Zbitwise_not_Zbitwise_or_Zbitwise_right_shift_Zbitwise_xor_Zmul_Zdiv_ZTensor_modeZlogical_and_Zlogical_not_Zlogical_or_Zlogical_xor_Zsub_Zrelu_Zsigmoid___and__
__lshift____or__
__rshift____xor____iand____ilshift____ior____irshift____ixor__r  r  r   r  r  r   methodfuncr  r  Z_inductor_testr*  r  r  r  set_Zsource_Tensorr  Zfsdpr  r
  Z*torch._higher_order_ops.auto_functionalizer  r  Zhigher_orderry   r  r  Zinvoke_quantr$  r+  r-  r0  Zcomm_loweringr1  r6  r<  r7  Zregister_quantized_opsZregister_woq_mm_opsr8  Zregister_onednn_fusion_opsr9  Zregister_jagged_opscontextmanagerr:  rX   rX   rX   rY   <module>   s   $4@8




	H95    
V:
,
-



#












4
E2$2$"$-"/|6



		
(2&




	
	*(	.a$:		 


C8



.
	



$!#I%
~



&!$"t /

  $
  $ _7 K"$      
 
)5


E


Q

B-


        z
	 &

	 H1
+


/#

	 











'	














@
"
#6
