o
    
j/                     @   sX   d dl Z d dl mZ d dlmZ G dd dejZG dd dejZG dd	 d	eZdS )
    N)nn)BaseEncoderc                       $   e Zd Z fddZdd Z  ZS )LSTMWithProjectionc                    sD   t    || _|| _|| _tj||dd| _tj||dd| _	d S )NT)batch_firstFbias)
super__init__
input_sizehidden_size	proj_sizer   LSTMlstmLinearlinear)selfr   r   r   	__class__ J/home/kuhnn/.local/lib/python3.10/site-packages/TTS/encoder/models/lstm.pyr
      s   
zLSTMWithProjection.__init__c                 C   s&   | j   |  |\}\}}| |S )N)r   flatten_parametersr   )r   xo_r   r   r   forward   s   

zLSTMWithProjection.forward__name__
__module____qualname__r
   r   __classcell__r   r   r   r   r      s    r   c                       r   )LSTMWithoutProjectionc                    s>   t    tj|||dd| _tj||dd| _t | _d S )NT)r   r   
num_layersr   r   )	r	   r
   r   r   r   r   r   ReLUrelu)r   	input_dimlstm_dimproj_dimnum_lstm_layersr   r   r   r
      s   
zLSTMWithoutProjection.__init__c                 C   s&   |  |\}\}}| | |d S )N)r   r$   r   )r   r   r   hiddenr   r   r   r      s   zLSTMWithoutProjection.forwardr   r   r   r   r   r!      s    r!   c                       s<   e Zd Z						d fdd	Zd	d
 ZdddZ  ZS )LSTMSpeakerEncoder         TFNc           
         s   t    || _|| _|| _|| _g }|r7|t||| t|d D ]}	|t||| q$t	j
| | _nt||||| _t	|| _| jrO| || _nd | _|   d S )N   )r	   r
   use_lstm_with_projectionuse_torch_specaudio_configr'   appendr   ranger   
Sequentiallayersr!   InstanceNorm1dinstancenormget_torch_mel_spectrogram_class
torch_spec_init_layers)
r   r%   r'   r&   r(   r0   r1   r2   r6   r   r   r   r   r
   #   s"   

zLSTMSpeakerEncoder.__init__c                 C   sD   | j  D ]\}}d|v rtj|d qd|v rtj| qd S )Nr   g        weight)r6   named_parametersr   init	constant_xavier_normal_)r   nameparamr   r   r   r;   F   s   zLSTMSpeakerEncoder._init_layersc              	   C   s   t  6 t jjjdd | jr|d | |}| |	dd}W d   n1 s.w   Y  W d   n1 s=w   Y  | 
|}| jrR|dddf }|r^t jjj|ddd}|S )a{  Forward pass of the model.

        Args:
            x (Tensor): Raw waveform signal or spectrogram frames. If input is a waveform, `torch_spec` must be `True`
                to compute the spectrogram on-the-fly.
            l2_norm (bool): Whether to L2-normalize the outputs.

        Shapes:
            - x: :math:`(N, 1, T_{in})` or :math:`(N, D_{spec}, T_{in})`
        F)enabledr/      Nr)   )pdim)torchno_gradcudaampautocastr1   squeeze_r:   r8   	transposer6   r0   r   
functional	normalize)r   r   l2_normdr   r   r   r   M   s   



zLSTMSpeakerEncoder.forward)r,   r-   r.   TFN)T)r   r   r   r
   r;   r   r    r   r   r   r   r+   "   s    #r+   )rG   r   TTS.encoder.models.base_encoderr   Moduler   r!   r+   r   r   r   r   <module>   s    