o
    
j                     @   sj   d dl mZ d dlmZ d dlmZ d dlmZ G dd dejZ	G dd dejZ
G d	d
 d
ejZdS )    )nn)ResidualConv1dBNBlock)FFTransformerBlock)RelativePositionTransformerc                       *   e Zd ZdZ fddZdddZ  ZS )"RelativePositionTransformerEncoderaw  Speedy speech encoder built on Transformer with Relative Position encoding.

    TODO: Integrate speaker conditioning vector.

    Args:
        in_channels (int): number of input channels.
        out_channels (int): number of output channels.
        hidden_channels (int): number of hidden channels
        params (dict): dictionary for residual convolutional blocks.
    c              	      s@   t    t|||dddg dd| _t|||fi || _d S )N         )r
   r
   r
   )kernel_sizenum_res_blocksnum_conv_blocks	dilations)super__init__r   prenetr   rel_pos_transformerselfin_channelsout_channelshidden_channelsparams	__class__ V/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/feed_forward/encoder.pyr      s   
	z+RelativePositionTransformerEncoder.__init__Nc                 C   s*   |d u rd}|  || }| ||}|S Nr
   )r   r   r   xx_maskgor   r   r   forward!   s
   z*RelativePositionTransformerEncoder.forwardNN__name__
__module____qualname____doc__r   r#   __classcell__r   r   r   r   r      s    r   c                       r   )ResidualConv1dBNEncoderaq  Residual Convolutional Encoder as in the original Speedy Speech paper

    TODO: Integrate speaker conditioning vector.

    Args:
        in_channels (int): number of input channels.
        out_channels (int): number of output channels.
        hidden_channels (int): number of hidden channels
        params (dict): dictionary for residual convolutional blocks.
    c              	      sr   t    tt||dt | _t|||fi || _tjt||dt t	|t||dg | _
d S r   )r   r   r   
SequentialConv1dReLUr   r   res_conv_blockBatchNorm1dpostnetr   r   r   r   r   5   s   

z ResidualConv1dBNEncoder.__init__Nc                 C   s@   |d u rd}|  || }| ||}| || | }|| S r   )r   r/   r1   r   r   r   r   r#   C   s   zResidualConv1dBNEncoder.forwardr$   r%   r   r   r   r   r+   )   s    r+   c                       sJ   e Zd ZdZdddg d dg dddd	f fd
d	ZdddZ  ZS )Encodera~  Factory class for Speedy Speech encoder enables different encoder types internally.

    Args:
        num_chars (int): number of characters.
        out_channels (int): number of output channels.
        in_hidden_channels (int): input and hidden channels. Model keeps the input channels for the intermediate layers.
        encoder_type (str): encoder layer types. 'transformers' or 'residual_conv_bn'. Default 'residual_conv_bn'.
        encoder_params (dict): model parameters for specified encoder type.
        c_in_channels (int): number of channels for conditional input.

    Note:
        Default encoder_params to be set in config.json...

        ```python
        # for 'relative_position_transformer'
        encoder_params={
            'hidden_channels_ffn': 128,
            'num_heads': 2,
            "kernel_size": 3,
            "dropout_p": 0.1,
            "num_layers": 6,
            "rel_attn_window_size": 4,
            "input_length": None
        },

        # for 'residual_conv_bn'
        encoder_params = {
            "kernel_size": 4,
            "dilations": 4 * [1, 2, 4] + [1],
            "num_conv_blocks": 2,
            "num_res_blocks": 13
        }

        # for 'fftransformer'
        encoder_params = {
            "hidden_channels_ffn": 1024 ,
            "num_heads": 2,
            "num_layers": 6,
            "dropout_p": 0.1
        }
        ```
    residual_conv_bn   )r
      r4   r
   r5      )r   r   r   r   r   c                    s   t    || _|| _|| _|| _|| _| dkr$t||||| _	d S | dkr4t
||||| _	d S | dkrM||ksBJ dt|fi || _	d S td)Nrelative_position_transformerr3   fftransformerzP[!] must be `in_channels` == `out_channels` when encoder type is 'fftransformer'z [!] unknown encoder type.)r   r   r   r   r   encoder_typec_in_channelslowerr   encoderr+   r   NotImplementedError)r   in_hidden_channelsr   r9   encoder_paramsr:   r   r   r   r   y   s$   


zEncoder.__init__Nc                 C   s   |  ||}|| S )zi
        Shapes:
            x: [B, C, T]
            x_mask: [B, 1, T]
            g: [B, C, 1]
        )r<   r   r   r   r   r#      s   zEncoder.forward)Nr%   r   r   r   r   r2   L   s    /!r2   N)torchr   "TTS.tts.layers.generic.res_conv_bnr   "TTS.tts.layers.generic.transformerr   #TTS.tts.layers.glow_tts.transformerr   Moduler   r+   r2   r   r   r   r   <module>   s    !#