o
    
j\!                     @   sF   d dl Z d dlm  mZ G dd de jjZG dd de jjZdS )    Nc                       s:   e Zd ZdZdddddddif fdd		Zd
d Z  ZS )KernelPredictorz7Kernel predictor for the location-variable convolutions   @           	LeakyReLUnegative_slopeg?c                    s  t    || _|| _|| _|| _|| | | }|| }|d d }tjtjj	||ddddt
tj|	di |
| _tjtj|tjj	||||ddt
tj|	di |
tjj	||||ddt
tj|	di |
tj|tjj	||||ddt
tj|	di |
tjj	||||ddt
tj|	di |
tj|tjj	||||ddt
tj|	di |
tjj	||||ddt
tj|	di |
| _tjj	||||dd| _tjj	||||dd| _dS )a9  
        Args:
            cond_channels (int): number of channel for the conditioning sequence,
            conv_in_channels (int): number of channel for the input sequence,
            conv_out_channels (int): number of channel for the output sequence,
            conv_layers (int):
            kpnet_
                 T)paddingbiasN )super__init__conv_in_channelsconv_out_channelsconv_kernel_sizeconv_layerstorchnn
SequentialConv1dgetattr
input_convDropoutresidual_convkernel_conv	bias_conv)selfcond_channelsr   r   r   r   kpnet_hidden_channelskpnet_conv_sizekpnet_dropoutkpnet_nonlinear_activation!kpnet_nonlinear_activation_paramsl_wl_br   	__class__r   O/home/kuhnn/.local/lib/python3.10/site-packages/TTS/vocoder/layers/lvc_block.pyr      s>   



zKernelPredictor.__init__c           	      C   sx   |j \}}}| |}|| | }| |}| |}| || j| j| j	| j
|}| || j| j	|}||fS )z~
        Args:
            c (Tensor): the conditioning sequence (batch, cond_channels, cond_length)
        Returns:
        )shaper   r   r   r   
contiguousviewr   r   r   r   )	r   cbatch_cond_lengthkbkernelsr   r   r   r)   forwardB   s   


zKernelPredictor.forward)__name__
__module____qualname____doc__r   r4   __classcell__r   r   r'   r)   r      s    :r   c                       sB   e Zd ZdZ						d fdd	Zd	d
 Zedd Z  ZS )LVCBlockz"the location-variable convolutions   r      r   r   c
              
      s   t    || _|| _|| _tj | _tjj	|||d ||d |d  |d d| _
t||d| |||||	d| _t|D ]!}
d|
 t|d d  }tjj||||d|
 d}| j| q>d S )Nr	   )kernel_sizestrider   output_padding)r   r   r   r   r   r    r!   r"   r   r   )r=   r   dilation)r   r   cond_hop_lengthr   r   r   r   
ModuleListconvsConvTranspose1dupsampler   kernel_predictorrangeintr   append)r   in_channelsr   upsample_ratior   r   rA   r    r!   r"   ir   convr'   r   r)   r   Y   s<   
	zLVCBlock.__init__c           
   
   C   s  |j d }| |\}}t|d}| |}t| jD ]c}t|d}| j| |}t|d}|dd|ddddddddf }|dd|ddddf }	| |||	d| j	}|t
|ddd|ddf t
|dd|dddf   }q|S )aL  forward propagation of the location-variable convolutions.
        Args:
            x (Tensor): the input sequence (batch, in_channels, in_length)
            c (Tensor): the conditioning sequence (batch, cond_channels, cond_length)

        Returns:
            Tensor: the output sequence (batch, in_channels, in_length)
        r   g?N)r*   rF   F
leaky_relurE   rG   r   rC   location_variable_convolutionrA   r   sigmoidtanh)
r   xr-   rJ   r3   r   rL   yr1   r2   r   r   r)   r4      s   
	
(FzLVCBlock.forwardc                 C   s   | j \}}}|j \}}}}	}
||
| ks J d| d|
|  |t|	d d  }t| ||fdd} | d|d|  |} ||k rMt| d|fdd} | d||} | ddddddddd|f } | dd	} | d	|	d} td
| |}||dd }|	 
||d}|S )u  perform location-variable convolution operation on the input sequence (x) using the local convolution kernl.
        Time: 414 μs ± 309 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each), test on NVIDIA V100.
        Args:
            x (Tensor): the input sequence (batch, in_channels, in_length).
            kernel (Tensor): the local convolution kernel (batch, in_channel, out_channels, kernel_size, kernel_length)
            bias (Tensor): the bias for the local convolution (batch, out_channels, kernel_length)
            dilation (int): the dilation of convolution.
            hop_size (int): the hop_size of the conditioning sequence.
        Returns:
            (Tensor): the output sequence after performing local convolution. (batch, out_channels, in_length).
        z&length of (x, kernel) is not matched, z vs r   r	   constantr   r   Nr;   zbildsk,biokl->bolsd)r*   rH   rN   padunfold	transposer   einsum	unsqueezer+   r,   )rS   kernelr   r@   hop_sizer.   r/   	in_lengthout_channelsr=   kernel_lengthr   or   r   r)   rP      s,   &z&LVCBlock.location_variable_convolution)r;   r   r<   r   r   r   )	r5   r6   r7   r8   r   r4   staticmethodrP   r9   r   r   r'   r)   r:   V   s    /r:   )r   torch.nn.functionalr   
functionalrN   Moduler   r:   r   r   r   r)   <module>   s    Q