o
    
j	                     @   s   d dl Z d dlmZ d dlmZmZ d dlZd dlZd dl	m
Z
 d dlT dZddeeeejf dee fd	d
Zdd ZdddZdS )    N)Path)OptionalUnion)binary_dilation)*i  fpath_or_wav	source_src                 C   sd   t | ts
t | trtj| dd\}}n| }|dur%|tkr%t||t}t|tdd}t	|}|S )a  
    Applies the preprocessing operations used in training the Speaker Encoder to a waveform
    either on disk or in memory. The waveform will be resampled to match the data hyperparameters.

    :param fpath_or_wav: either a filepath to an audio file (many extensions are supported, not
    just .wav), either the waveform as a numpy array of floats.
    :param source_sr: if passing an audio waveform, the sampling rate of the waveform before
    preprocessing. After preprocessing, the waveform's sampling rate will match the data
    hyperparameters. If passing a filepath, the sampling rate will be automatically detected and
    this argument will be ignored.
    N)srT)increase_only)

isinstancestrr   librosaloadsampling_rateresamplenormalize_volumeaudio_norm_target_dBFStrim_long_silences)r   r   wav r   ^/home/kuhnn/.local/lib/python3.10/site-packages/TTS/vc/modules/freevc/speaker_encoder/audio.pypreprocess_wav   s   r   c                 C   s<   t jj| tttt d ttt d td}|t	j
jS )z
    Derives a mel spectrogram ready to be used by the encoder from a preprocessed audio waveform.
    Note: this not a log-mel spectrogram.
    i  )yr	   n_fft
hop_lengthn_mels)r   featuremelspectrogramr   intmel_window_lengthmel_window_stepmel_n_channelsastypenpfloat32T)r   framesr   r   r   wav_to_mel_spectrogram,   s   r'   Fc                 C   sX   |r|rt d|dtt| d   }|dk r|s"|dkr$|r$| S | d|d   S )Nz,Both increase only and decrease only are set
      r      )
ValueErrorr#   log10mean)r   target_dBFSr
   decrease_onlydBFS_changer   r   r   r   ;   s   r   )N)FF)structpathlibr   typingr   r   r   numpyr#   scipy.ndimage.morphologyr   -TTS.vc.modules.freevc.speaker_encoder.hparams	int16_maxr   ndarrayr   r   r'   r   r   r   r   r   <module>   s    $