o
    
jP                     @   s2   d dl Z d dlZd dlmZ G dd dejZdS )    N)nnc                       sh   e Zd ZdZ													d fd	d
	Zdd Zdd ZedddZedddZ	  Z
S )	TorchSTFTa_  Some of the audio processing funtions using Torch for faster batch processing.

    Args:

        n_fft (int):
            FFT window size for STFT.

        hop_length (int):
            number of frames between STFT columns.

        win_length (int, optional):
            STFT window length.

        pad_wav (bool, optional):
            If True pad the audio with (n_fft - hop_length) / 2). Defaults to False.

        window (str, optional):
            The name of a function to create a window tensor that is applied/multiplied to each frame/window. Defaults to "hann_window"

        sample_rate (int, optional):
            target audio sampling rate. Defaults to None.

        mel_fmin (int, optional):
            minimum filter frequency for computing melspectrograms. Defaults to None.

        mel_fmax (int, optional):
            maximum filter frequency for computing melspectrograms. Defaults to None.

        n_mels (int, optional):
            number of melspectrogram dimensions. Defaults to None.

        use_mel (bool, optional):
            If True compute the melspectrograms otherwise. Defaults to False.

        do_amp_to_db_linear (bool, optional):
            enable/disable amplitude to dB conversion of linear spectrograms. Defaults to False.

        spec_gain (float, optional):
            gain applied when converting amplitude to DB. Defaults to 1.0.

        power (float, optional):
            Exponent for the magnitude spectrogram, e.g., 1 for energy, 2 for power, etc.  Defaults to None.

        use_htk (bool, optional):
            Use HTK formula in mel filter instead of Slaney.

        mel_norm (None, 'slaney', or number, optional):
            If 'slaney', divide the triangular mel weights by the width of the mel band
            (area normalization).

            If numeric, use `librosa.util.normalize` to normalize each filter by to unit l_p norm.
            See `librosa.util.normalize` for a full description of supported norm values
            (including `+-np.inf`).

            Otherwise, leave all the triangles aiming for a peak value of 1.0. Defaults to "slaney".
    Fhann_windowNr   P         ?slaneyc                    s   t    || _|| _|| _|| _|| _|| _|| _|	| _	|
| _
|| _|| _|| _|| _|| _tjtt||dd| _d | _|| _|
rJ|   d S d S )NF)requires_grad)super__init__n_fft
hop_length
win_lengthpad_wavsample_ratemel_fminmel_fmaxn_melsuse_meldo_amp_to_db	spec_gainpoweruse_htkmel_normr   	Parametergetattrtorchwindow	mel_basis
normalized_build_mel_basis)selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   	__class__ S/home/kuhnn/.local/lib/python3.10/site-packages/TTS/utils/audio/torch_transforms.pyr
   @   s*   
zTorchSTFT.__init__c                 C   s  |j dkr
|d}| jr#t| j| j d }tjjj	|||fdd}tj
|d| j| j| j| jdd| jddd
}|ddddddd	f }|dddddddf }ttj|d |d  d
d}| jdurp|| j }| jr}t| j||}| jr| j|| jd}|S )zCompute spectrogram frames by torch based stft.

        Args:
            x (Tensor): input waveform

        Returns:
            Tensor: spectrogram frames.

        Shapes:
            x: [B x T] or [:math:`[B, 1, T]`]
              reflect)modeTF)centerpad_moder   onesidedreturn_complexNr   g:0yE>min)r   )ndim	unsqueezer   intr   r   r   r   
functionalpadstftsqueezer   r   r   sqrtclampr   r   matmulr   tor   
_amp_to_dbr   )r    xpaddingoMPSr#   r#   r$   __call__h   s6   

 

zTorchSTFT.__call__c              	   C   s<   t jj| j| j| j| j| j| j| j	d}t
| | _d S )N)srr   r   fminfmaxhtknorm)librosafiltersmelr   r   r   r   r   r   r   r   
from_numpyfloatr   )r    r   r#   r#   r$   r      s   	zTorchSTFT._build_mel_basisc                 C   s   t t j| dd| S )Ngh㈵>r-   )r   logr7   r;   r   r#   r#   r$   r:      s   zTorchSTFT._amp_to_dbc                 C   s   t | | S )N)r   exprM   r#   r#   r$   
_db_to_amp   s   zTorchSTFT._db_to_amp)Fr   Nr   Nr   FFr   NFr   F)r   )__name__
__module____qualname____doc__r
   rA   r   staticmethodr:   rO   __classcell__r#   r#   r!   r$   r      s,    >(+r   )rG   r   r   Moduler   r#   r#   r#   r$   <module>   s    