o
    i	                     @   sv   d dl mZ d dlmZmZ g dZG dd deZedddZedd	d
ZedddZ	edddddZ
dS )    )SwizzledSharedLayout)builtin_unwrap_if_constexpr)arriveinit
invalidateMBarrierLayoutwaitc                       s"   e Zd ZdZd fdd	Z  ZS )r   z
    Layout for mbarrier synchronization in Ampere and later architectures.

    Args:
        cga_layout (List[List[int]]): CTA layout bases. Defaults to [].
    Nc                    s    t  jddddg|pg d d S )N   r   )vec	per_phase	max_phaseorder
cga_layout)super__init__)selfr   	__class__ l/home/kuhnn/.local/lib/python3.10/site-packages/triton/experimental/gluon/language/nvidia/ampere/mbarrier.pyr      s    zMBarrierLayout.__init__N)__name__
__module____qualname____doc__r   __classcell__r   r   r   r   r      s    r   Nc                 C   s   t |}|j| j| dS )z
    Initialize an mbarrier with a specified count.

    Args:
        mbarrier (shared_memory_descriptor): The barrier object to initialize.
        count (int): The initial count for the barrier.
    N)r   buildercreate_mbarrier_inithandle)mbarriercount	_semanticr   r   r   r      s   	r   c                 C   s   |j | j dS )z
    Invalidate an mbarrier, resetting its state.

    Args:
        mbarrier (shared_memory_descriptor): The barrier object to invalidate.
    N)r   create_mbarrier_invalr   )r    r"   r   r   r   r       s   r   Tr   c                 C   s>   | |}| |}dd |D }|j| j|j|j| dS )a  
    Wait until the mbarrier object completes its current phase.

    Args:
        mbarrier (shared_memory_descriptor): The barrier object to wait on.
        phase (int): The phase index to wait for.
        pred (bool): Predicate. Operation is skipped if predicate is False. Defaults to True.
        deps (Sequence[shared_memory_descriptor]): Dependent allocations barrier is waiting on. Used to track liveness of dependent allocations. Defaults to ().
    c                 S   s   g | ]}|j qS r   )r   ).0xr   r   r   
<listcomp>8   s    zwait.<locals>.<listcomp>N)	to_tensorr   create_mbarrier_waitr   )r    phasepreddepsr"   r   r   r   r	   +   s   

r	   )r*   r"   c                C   s&   d}| |}|j| j||j dS )a  
    Arrive on an mbarrier, signaling that a thread has reached the barrier.

    Args:
        mbarrier (shared_memory_descriptor): The barrier object to arrive on.
        pred (bool): Predicate. Operation is skipped if predicate is False. Defaults to True.
    r
   N)r'   r   create_mbarrier_arriver   )r    r*   r"   r!   r   r   r   r   <   s   	
r   r   )Tr   N)+triton.experimental.gluon.language._layoutsr   (triton.experimental.gluon.language._corer   r   __all__r   r   r   r	   r   r   r   r   r   <module>   s    
