o
    
j                     @   s  d dl Z d dlmZ d dlmZmZ d dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ dd Zdefd	d
ZdefddZdefddZdd ZdZdZdd Zdd Zd*ddZd+ddZg fdee fddZg fd edee fd!d"Zg fd#ee dee fd$d%Zd,d(d)ZdS )-    N)glob)DictList)read)	TorchSTFTc                 C   sx   t | \}}|jtjkrd}n |jtjkrd}n|jtjks$|jtjkr'd}ntd|j t	|
tj| |fS )Nl        i   g      ?z#Provided data dtype not supported: )r   dtypenpint32int16float16float32NotImplementedErrortorchFloatTensorastype)	full_pathsampling_ratedatanorm_fix r   V/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/tortoise/audio_utils.pyload_wav_to_torch   s   r   	audiopathc                 C   sN   t | dkst | dk std| d|   d|    | dd d S )N   r   zError with z. Max=z min=   )r   anyprintmaxminclip_)audior   r   r   r   check_audio   s   "r"   c                 C   s   | dd  dkrt | \}}n"| dd  dkr&tj| d d\}}t|}nJ d| dd   t|jdkrX|jd d	k rG|d }||fS |jd d	k sPJ |d d df }||fS )
Nz.wavz.mp3)srFz#Unsupported audio format provided: r   r      )r   librosaloadr   r   lenshape)r   r!   lsrr   r   r   read_audio_file#   s   r+   c                    sB   t | \  fdddD }|D ] t |  qdd |D S )Nc                    s   g | ]
}t j |qS r   )
torchaudio
functionalresample).0r   r!   r*   r   r   
<listcomp>:   s    z'load_required_audio.<locals>.<listcomp>)i"V  ]  c                 S   s   g | ]}| d qS r   )	unsqueeze)r/   r!   r   r   r   r1   >   s    )r+   r"   )r   audiosr   r0   r   load_required_audio7   s
   r6   c                 C   s8   t | \}}||krtj|||}t||  |dS )Nr   )r+   r,   r-   r.   r"   r4   )r   r   r!   r*   r   r   r   
load_audioA   s
   

r7   g    ă@g    'c                 C   s   | d d t t  t S )Nr   r   )TACOTRON_MEL_MAXTACOTRON_MEL_MIN)norm_melr   r   r   denormalize_tacotron_melO      r;   c                 C   s   d| t  tt    d S )Nr   r   )r9   r8   )melr   r   r   normalize_tacotron_melS   r<   r>   r   h㈵>c                 C   s   t t j| |d| S )z5
    PARAMS
    ------
    C: compression factor
    )r   )r   logclamp)xCclip_valr   r   r   dynamic_range_compressionW   s   rE   c                 C   s   t | | S )zF
    PARAMS
    ------
    C: compression factor used to compress
    )r   exp)rB   rC   r   r   r   dynamic_range_decompression`   s   rG   extra_voice_dirsc                 C   s~   | }i }|D ]6}t |}|D ],}t j||}t j|r;tt| dtt| d tt| d ||< qq|S )Nz/*.wavz/*.mp3z/*.pth)oslistdirpathjoinisdirlistr   )rH   dirsvoicesdsubssubsubjr   r   r   
get_voicesi   s   
:rU   voicec                 C   sp   | dkrdS t |}||  }t|dkr$|d dr$d t|d fS g }|D ]}t|}|| q(|d fS )NrandomNNr   r   z.pth)rU   r(   endswithr   r'   r6   append)rV   rH   rP   pathsconds	cond_pathcr   r   r   
load_voiceu   s   r_   rP   c           	      C   s   g }g }| D ]A}|dkrt | dkrtd  dS t||\}}|d u r4t |dks.J d|| q|d u rGt |dksBJ d|| qt |dkrR|d fS tjdd |D dd	jdd	}tjd
d |D dd	jdd	}||f}d |fS )NrW   r   zQCannot combine a random voice with a non-random voice. Just using a random voice.rX   r   z^Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this.c                 S      g | ]}|d  qS r3   r   r/   lr   r   r   r1          zload_voices.<locals>.<listcomp>)dimc                 S   r`   r   r   ra   r   r   r   r1      rc   )r(   r   r_   extendrZ   r   stackmean)	rP   rH   latentsclipsrV   cliplatent	latents_0	latents_1r   r   r   load_voices   s2   
  ro   Fcudac              
   C   sB   t ddddddddd}||}|| }t|}|rt|}|S )	Ni      Td   r2   r   i.  )n_fft
hop_length
win_lengthuse_meln_melssample_ratemel_fminmel_fmax)r   torE   r>   )wavdo_normalizationdevicestftr=   r   r   r   wav_to_univnet_mel   s    

r   )r   r?   re   )Frp   )rI   r   typingr   r   r&   numpyr   r   r,   scipy.io.wavfiler    TTS.utils.audio.torch_transformsr   r   strr"   r+   r6   r7   r8   r9   r;   r>   rE   rG   rU   r_   ro   r   r   r   r   r   <module>   s0    



		