o
    
jz)                     @   s   d dl Z d dl mZ d dlmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ dZd	d
 ZG dd de jjZG dd de jjZG dd de jjZdS )    N)nn)Conv1dConvTranspose1d)
functional)weight_norm)remove_parametrizations)load_fsspecg?c                 C   s   t | | | d S )N   )int)kd r   W/home/kuhnn/.local/lib/python3.10/site-packages/TTS/vocoder/models/hifigan_generator.pyget_padding   s   r   c                       2   e Zd ZdZd
 fdd	Zdd Zdd	 Z  ZS )	ResBlock1a;  Residual Block Type 1. It has 3 convolutional layers in each convolutional block.

    Network::

        x -> lrelu -> conv1_1 -> conv1_2 -> conv1_3 -> z -> lrelu -> conv2_1 -> conv2_2 -> conv2_3 -> o -> + -> o
        |--------------------------------------------------------------------------------------------------|


    Args:
        channels (int): number of hidden channels for the convolutional layers.
        kernel_size (int): size of the convolution filter in each layer.
        dilations (list): list of dilation value for each conv layer in a block.
          r      c                    s   t    ttt|||d|d t||d dtt|||d|d t||d dtt|||d|d t||d dg| _ttt|||ddt|ddtt|||ddt|ddtt|||ddt|ddg| _d S )Nr   r   dilationpaddingr	   )	super__init__r   
ModuleListr   r   r   convs1convs2selfchannelskernel_sizer   	__class__r   r   r   !   s\   


#
zResBlock1.__init__c                 C   sL   t | j| jD ]\}}t|t}||}t|t}||}|| }q|S )z
        Args:
            x (Tensor): input tensor.
        Returns:
            Tensor: output tensor.
        Shapes:
            x: [B, C, T]
        )zipr   r   F
leaky_reluLRELU_SLOPE)r   xc1c2xtr   r   r   forwardT   s   	
zResBlock1.forwardc                 C   s0   | j D ]}t|d q| jD ]}t|d qd S Nweight)r   r   r   r   lr   r   r   remove_weight_norme   s
   

zResBlock1.remove_weight_norm)r   r   __name__
__module____qualname____doc__r   r,   r1   __classcell__r   r   r"   r   r      s
    3r   c                       r   )	ResBlock2a  Residual Block Type 2. It has 1 convolutional layers in each convolutional block.

    Network::

        x -> lrelu -> conv1-> -> z -> lrelu -> conv2-> o -> + -> o
        |---------------------------------------------------|


    Args:
        channels (int): number of hidden channels for the convolutional layers.
        kernel_size (int): size of the convolution filter in each layer.
        dilations (list): list of dilation value for each conv layer in a block.
    r   r   r   c                    sb   t    ttt|||d|d t||d dtt|||d|d t||d dg| _d S )Nr   r   r   )r   r   r   r   r   r   r   convsr   r"   r   r   r   {   s0   


zResBlock2.__init__c                 C   s,   | j D ]}t|t}||}|| }q|S N)r:   r%   r&   r'   )r   r(   cr+   r   r   r   r,      s
   

zResBlock2.forwardc                 C   s   | j D ]}t|d qd S r-   )r:   r   r/   r   r   r   r1      s   
zResBlock2.remove_weight_norm)r   r9   r2   r   r   r"   r   r8   l   s
    r8   c                       sV   e Zd Z					d fdd	ZdddZe d	d
 Zdd Z	dddZ	  Z
S )HifiganGeneratorr   r   Tc                    sd  t    |	| _t|| _t|| _tt||dddd| _|dkr$t	nt
}t | _tt||D ]#\}\}}| jtt|d|  |d|d   |||| d d q2t | _tt| jD ]"}|d|d   }tt||D ]\}\}}| j|||| qsqbtt||ddd|d| _|
dkrt|
|d| _|st| jd	 |st| jd	 d
S d
S )a  HiFiGAN Generator with Multi-Receptive Field Fusion (MRF)

        Network:
            x -> lrelu -> upsampling_layer -> resblock1_k1x1 -> z1 -> + -> z_sum / #resblocks -> lrelu -> conv_post_7x1 -> tanh -> o
                                                 ..          -> zI ---|
                                              resblockN_kNx1 -> zN ---'

        Args:
            in_channels (int): number of input tensor channels.
            out_channels (int): number of output tensor channels.
            resblock_type (str): type of the `ResBlock`. '1' or '2'.
            resblock_dilation_sizes (List[List[int]]): list of dilation values in each layer of a `ResBlock`.
            resblock_kernel_sizes (List[int]): list of kernel sizes for each `ResBlock`.
            upsample_kernel_sizes (List[int]): list of kernel sizes for each transposed convolution.
            upsample_initial_channel (int): number of channels for the first upsampling layer. This is divided by 2
                for each consecutive upsampling layer.
            upsample_factors (List[int]): upsampling factors (stride) for each upsampling layer.
            inference_padding (int): constant padding applied to the input at inference time. Defaults to 5.
           r   r   )r   1r	   )r   biasr   r.   N)r   r   inference_paddinglennum_kernelsnum_upsamplesr   r   conv_prer   r8   r   r   ups	enumerater$   appendr   	resblocksrange	conv_post
cond_layerr   )r   in_channelsout_channelsresblock_typeresblock_dilation_sizesresblock_kernel_sizesupsample_kernel_sizesupsample_initial_channelupsample_factorsrA   cond_channelsconv_pre_weight_normconv_post_weight_normconv_post_biasresblockiur   ch_r   r"   r   r   r      sB   
#





zHifiganGenerator.__init__Nc                 C   s   |  |}t| dr|| | }t| jD ]=}t|t}| j| |}d}t| j	D ]!}|du r?| j
|| j	 |  |}q,|| j
|| j	 |  |7 }q,|| j	 }qt|}| |}t|}|S )z
        Args:
            x (Tensor): feature input tensor.
            g (Tensor): global conditioning input tensor.

        Returns:
            Tensor: output waveform.

        Shapes:
            x: [B, C, T]
            Tensor: [B, 1, T]
        rL   N)rE   hasattrrL   rJ   rD   r%   r&   r'   rF   rC   rI   rK   torchtanh)r   r(   gorZ   z_sumjr   r   r   r,      s    




zHifiganGenerator.forwardc                 C   s4   | | jjj}tjj|| j| jfd}| 	|S )z
        Args:
            x (Tensor): conditioning input tensor.

        Returns:
            Tensor: output waveform.

        Shapes:
            x: [B, C, T]
            Tensor: [B, 1, T]
        	replicate)
torE   r.   devicer_   r   r   padrA   r,   )r   r<   r   r   r   	inference  s   
zHifiganGenerator.inferencec                 C   sN   t d | jD ]}t|d q| jD ]}|  qt| jd t| jd d S )NzRemoving weight norm...r.   )printrF   r   rI   r1   rE   rK   r/   r   r   r   r1     s   


z#HifiganGenerator.remove_weight_normFc                 C   sH   t |td|d}| |d  |r"|   | jrJ |   d S d S )Ncpu)map_locationcachemodel)r   r_   rg   load_state_dictevaltrainingr1   )r   configcheckpoint_pathrp   rm   stater   r   r   load_checkpoint%  s   
z HifiganGenerator.load_checkpoint)r   r   TTTr;   )FF)r3   r4   r5   r   r,   r_   no_gradri   r1   ru   r7   r   r   r"   r   r=      s    
I

r=   )r_   r   torch.nnr   r   r   r%   torch.nn.utils.parametrizationsr   torch.nn.utils.parametrizer   TTS.utils.ior   r'   r   Moduler   r8   r=   r   r   r   r   <module>   s   Z6