o
    i                     @  s
  d dl mZ d dlmZmZmZ d dlmZ d dlm	  m
  m  mZ d dlmZmZ d dlmZmZ erCd dlmZ d dlmZ g d	Zed
dG dd dejZeG dd dejZe	d*d+ddZe	d,d-d$d%Ze	d*d.d&d'Zed/d0d(d)ZdS )1    )annotations)ListTupleTYPE_CHECKING)	dataclassN)PaddedSharedLayoutSwizzledSharedLayout)builtin_unwrap_if_constexpr)ir)shared_memory_descriptor)
async_load
async_waitmake_tensor_descriptortensor_descriptortensor_descriptor_typeT)eqc                   @  sd   e Zd ZU dZded< ded< ded< ded< d!ddZd"ddZd#ddZd$ddZd!ddZ	d S )%r   z!The type for a tensor descriptor.zttgl.block_type
block_typezttgl.tuple_type
shape_typestrides_type)PaddedSharedLayout | SwizzledSharedLayoutlayoutreturnstrc                 C  s   d| j  d| j dS )Nztensor_descriptor<z, >)r   r   self r   e/home/kuhnn/.local/lib/python3.10/site-packages/triton/experimental/gluon/language/amd/gfx1250/tdm.py__str__   s   ztensor_descriptor_type.__str__handlesList[ir.value]cursorintTuple[tensor_descriptor, int]c                 C  sJ   || }|d7 }| j ||\}}| j||\}}t|||| }||fS )N   )r   _unflatten_irr   r   )r   r    r"   handleshapestridesvaluer   r   r   r&      s   z$tensor_descriptor_type._unflatten_irbuilder
ir.builderir.typec                 C  s*   | j j }|| j ||| j|S N)r   
element_tyis_int_signed!get_tensor_descriptor_layout_typeto_irr   _to_ir)r   r+   	is_signedr   r   r   r3   $   s   

ztensor_descriptor_type._to_iroutList[ir.type]Nonec                 C  s0   | | | | j|| | j|| d S r.   )appendr3   r   _flatten_ir_typesr   )r   r+   r5   r   r   r   r9   ,   s   z(tensor_descriptor_type._flatten_ir_typesc              	   C  s6   d| j   d| j  d| j  d| j  d	S )NTD_)r   mangler   r   r   r   r   r   r   r<   1   s   6ztensor_descriptor_type.mangleN)r   r   )r    r!   r"   r#   r   r$   )r+   r,   r   r-   )r+   r,   r5   r6   r   r7   )
__name__
__module____qualname____doc____annotations__r   r&   r3   r9   r<   r   r   r   r   r      s   
 



r   c                   @  sl   e Zd ZU dZded< ded< ded< ded< dddZedd Zedd Zedd Z	edd Z
dS )r   z4A descriptor representing a tensor in global memory.zir.valuer'   z
ttgl.tupler(   r)   r   typer    r!   r   r7   c                 C  s(   | | j | j| | j| d S r.   )r8   r'   r(   _flatten_irr)   )r   r    r   r   r   rC   >   s   ztensor_descriptor._flatten_irc                 C     | j jS r.   )rB   r   r   r   r   r   r   C      ztensor_descriptor.block_typec                 C  
   | j jjS r.   )rB   r   r(   r   r   r   r   block_shapeG      
ztensor_descriptor.block_shapec                 C  rF   r.   )rB   r   r/   r   r   r   r   dtypeK   rH   ztensor_descriptor.dtypec                 C  rD   r.   )rB   r   r   r   r   r   r   O   rE   ztensor_descriptor.layoutN)r    r!   r   r7   )r=   r>   r?   r@   rA   rC   propertyr   rG   rI   r   r   r   r   r   r   5   s   
 



r   basettgl.tensorr(   "List[ttgl.constexpr | ttgl.tensor]r)   rG   List[ttgl.constexpr]r   r   r   c                 C  sR  t |}d|  krdksn J d| dt ||ks)J d| dt | t ||ks;J d| dt | t| jtjsFJ d	t|}t|ttfsUJ d
t|trc|jdkscJ d| j	}|j
|dd}|j
|dd}	t|}t|}t| jj|}
t|
|j|j|}|d}|j||j|||	|}t||||S )a  Make a tensor descriptor object.

    Args:
        base (tensor): base pointer of the tensor in global memory.
        shape (List[int]): shape of the tensor.
        strides (List[int]): strides of the tensor.
        block_shape (List[int]): block shape of the tensor.
        layout (PaddedSharedLayout | SwizzledSharedLayout): the layout of the tensor in shared memory.

    Returns:
        tensor_descriptor: the created tensor descriptor object
    r%      z Expected 1 <= ndim <= 5 but got z dimensionsz	Expected z strides but got zExpected block_shape to have z dimensions but got zExpected base to be a pointerzBExpected layout to be a PaddedSharedLayout or SwizzledSharedLayoutz3Expected max_phase to be 1 for SwizzledSharedLayoutFrequire_i64Tzero)len
isinstancerI   ttglpointer_typer
   r   r   	max_phaser'   _convert_to_ir_valuestupler   rB   r/   r   _str_to_padding_optionr+   create_make_tensor_descriptorr3   r   )rK   r(   r)   rG   r   	_semanticndimbase_handleshape_handlesstride_handlesr   rB   paddingr'   r   r   r   r   T   s.   &$$



r   srcoffsetsdestr   predboolmbarrierr7   c           	      C  sZ   |j |dd}||}|j}t|}|dur|jntj }|j| j||j|| dS )a-  Load a block of tensor specified in tensor descriptor from global memory to shared memory asynchronously.

    Args:
        src (tensor_descriptor): the source tensor descriptor.
        offsets (List[int]): the offsets from the base pointer in the tensor descriptor.
        dest (shared_memory_descriptor): the shared memory destination to store the loaded data.
        pred (bool, optional): Predicate to enable or disable the load. Defaults to True.
        mbarrier (shared_memory_descriptor, optional): The barrier object to signal "arrive" on.
    FrP   N)	rX   	to_tensorr'   r
   rU   r   r*   r+   %create_async_tdm_copy_global_to_local)	rb   rc   rd   re   rg   r\   offset_handlespred_handlembarrier_handler   r   r   r      s   
r   c                 C  s&   |j |dd}|j| j||j dS )ak  Store a block of tensor specified in tensor descriptor from shared memory to global memory asynchronously.

    Args:
        dest (tensor_descriptor): the destination tensor descriptor.
        offsets (List[int]): the offsets from the base pointer in the tensor descriptor.
        src (shared_memory_descriptor): the shared memory source to load the data.
    FrP   N)rX   r+   %create_async_tdm_copy_local_to_globalr'   )rd   rc   rb   r\   rj   r   r   r   async_store   s   
rn   c                 C  s   t | } |j|  dS )zWait for the outstanding asynchronous tensor operations to complete.

    Args:
        num_outstanding (int): number of outstanding async tensor operations to wait for.
    N)r
   r+   create_async_tdm_wait)num_outstandingr\   r   r   r   r      s   r   r.   )rK   rL   r(   rM   r)   rM   rG   rN   r   r   r   r   )TNN)rb   r   rc   rM   rd   r   re   rf   rg   r   r   r7   )rd   r   rc   rM   rb   r   r   r7   )r   N)r   r7   )
__future__r   typingr   r   r   dataclassesr   (triton.experimental.gluon.language._coreexperimentalgluonlanguage_corerU   +triton.experimental.gluon.language._layoutsr   r   r	   r
   	triton._Cr   r   __all__	base_typer   
base_valuer   r   r   rn   r   r   r   r   r   <module>   s2    $+