o
    
j                     @   s\   d dl Z d dlm  mZ d dl mZ G dd dejZG dd dejZG dd dZdS )	    N)nnc                       s(   e Zd Zd	 fdd	Zd
ddZ  ZS )FFTransformer      皙?c                    s   t    tj|||d| _|d d }tj||||d| _tj||||d| _t|| _	t|| _
t|| _t|| _d S )N)dropout      )kernel_sizepadding)super__init__r   MultiheadAttention	self_attnConv1dconv1conv2	LayerNormnorm1norm2Dropoutdropout1dropout2)selfin_out_channels	num_headshidden_channels_ffnkernel_size_fft	dropout_pr   	__class__ U/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/generic/transformer.pyr      s   
zFFTransformer.__init__Nc                 C   s   | ddd}| j|||||d\}}|| | }| || }| ddd}| t| |}| |}|| }|	dd}| 
|}|	dd}||fS )u*   😦 ugly looking with all the transposingr	   r   r   )	attn_maskkey_padding_mask)permuter   r   r   r   Frelur   r   	transposer   )r   srcsrc_masksrc_key_padding_masksrc2	enc_alignr!   r!   r"   forward   s   

zFFTransformer.forward)r   r   r   NN__name__
__module____qualname__r   r.   __classcell__r!   r!   r   r"   r      s    r   c                       s&   e Zd Z fddZdddZ  ZS )FFTransformerBlockc                    s2   t    t fddt|D | _d S )Nc                    s   g | ]
}t  d qS ))r   r   r   r   )r   ).0_r   r   r   r   r!   r"   
<listcomp>*   s    z/FFTransformerBlock.__init__.<locals>.<listcomp>)r   r   r   
ModuleListrange
fft_layers)r   r   r   r   
num_layersr   r   r8   r"   r   '   s   

zFFTransformerBlock.__init__Nc                 C   sf   |dur|j dkr|d}|  }g }| jD ]}|||d\}}||d qt|d}|S )z
        TODO: handle multi-speaker
        Shapes:
            - x: :math:`[B, C, T]`
            - mask:  :math:`[B, 1, T] or [B, T]`
        Nr   r   )r+   )ndimsqueezeboolr<   append	unsqueezetorchcat)r   xmaskg
alignmentslayeralignr!   r!   r"   r.   5   s   


zFFTransformerBlock.forwardr/   r0   r!   r!   r   r"   r5   &   s    r5   c                   @   s"   e Zd Z	dddZdddZdS )	FFTDurationPredictorr   Nc                 C   s$   t |||||| _t|d| _d S )Nr   )r5   fftr   Linearproj)r   in_channelshidden_channelsr   r=   r   cond_channelsr!   r!   r"   r   I   s   zFFTDurationPredictor.__init__c                 C   s   | j ||d}| |}|S )z
        Shapes:
            - x: :math:`[B, C, T]`
            - mask:  :math:`[B, 1, T]`

        TODO: Handle the cond input
        )rF   )rL   rN   )r   rE   rF   rG   r!   r!   r"   r.   O   s   
zFFTDurationPredictor.forward)r   Nr/   )r1   r2   r3   r   r.   r!   r!   r!   r"   rK   H   s    
rK   )	rC   torch.nn.functionalr   
functionalr&   Moduler   r5   rK   r!   r!   r!   r"   <module>   s     "