o
    
j7                     @   s:  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZ	d dlZd dl
Z
d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ dd Zdadd	 Zd
d ZG dd dZe e_eje_d dlZejejdeejgd dd Z e!dkre j"de j#dZ$e$j%de&ddd e$j%de'ddd e$j%de&ddd e$j%de&d d!d e$j%d"e&d#d$d e$j%d%e&d&d'd e$( Z)e* Z+e,d(G ej-d)e)j.d*Z.ej/d+d,d-Z0ej1d.d/g d0d1Z2ej3d2d3Z4ej-d4d5d6Z5e+j6e de5d$d7 ej7d8d9Z8ej9d:d;fd<d=Z:W d   n	1 sw   Y  e,d>` ej-d?d3Z;ej-d@d3Z<ej=dAd$dBd$e)j>dCZ>ej=dDdEdFd$e)j?dCZ?ej=dGdEdHd$e)j@dCZ@ej=dIdEdJd$e)jAdCZAej3d2d3ZBej-d4d5d6ZCe+j6e deCd$d7 ej7dKd9ZDdLdM ZEW d   n	1 sw   Y  e,dN eF  eG )ZHej-dOdPd*ZIej-dQdPd*ZJej-dRdPd*ZKej3d2d3ZLej7dSd9ZMW d   n	1 sw   Y  eG &ZNej-dTdPd*ZOej1dUd/g d0d1ZPej-dVdWd*ZQej7dXd9ZRW d   n	1 sw   Y  eG ZSej3d2d3ZTejUdYd3ZVejUdZd3ZWW d   n	1 sw   Y  W d   n	1 s-w   Y  e8jXe:e0e2e.ge4e;e<gd[ eDjXeEe2e;e<e>e?e@e.eAgeBeJeKeIeOgd[ eMjXeeIeJeKgeLgd[ eRjXeePeQeOgeTeVeWgd[ W d   n	1 szw   Y  W d   n	1 sw   Y  e+jYd:d5e)jZd\d] dS dS )^    N)format_audio_list)	train_gpt)
XttsConfig)Xttsc                   C   s   t j rt j  d S d S N)torchcudais_availableempty_cache r   r   S/home/kuhnn/.local/lib/python3.10/site-packages/TTS/demos/xtts_ft_demo/xtts_demo.pyclear_gpu_cache   s   
r   c                 C   sh   t   | r	|r	|sdS t }|| t|atd tj|| |dd tj	
 r.t	  td dS )NzYou need to run the previous steps or manually set the `XTTS checkpoint path`, `XTTS config path`, and `XTTS vocab path` fields !!zLoading XTTS model! F)checkpoint_path
vocab_pathuse_deepspeedzModel Loaded!)r   r   	load_jsonr   init_from_config
XTTS_MODELprintload_checkpointr   r   r	   )xtts_checkpointxtts_config
xtts_vocabconfigr   r   r   
load_model   s   


r   c                 C   s   t d u s|sdS t j|t jjt jjt jjd\}}t j|| ||t jjt jjt jj	t jj
t jjd	}tjddd }t|d d|d< |j}t||d d	 W d    n1 s[w   Y  d
||fS )N)z6You need to run the previous step to load the model !!NN)
audio_pathgpt_cond_lenmax_ref_lengthsound_norm_refs)	textlanguagegpt_cond_latentspeaker_embeddingtemperaturelength_penaltyrepetition_penaltytop_ktop_pz.wavF)suffixdeletewavr   i]  zSpeech generated !)r   get_conditioning_latentsr   r   max_ref_lenr   	inferencer#   r$   r%   r&   r'   tempfileNamedTemporaryFiler   tensor	unsqueezename
torchaudiosave)langtts_textspeaker_audio_filer!   r"   outfpout_pathr   r   r   run_tts+   s(   "
r;   c                   @   s.   e Zd ZdddZdd Zdd Zdd	 Zd
S )Loggerlog.outc                 C   s    || _ tj| _t| j d| _d S )Nw)log_filesysstdoutterminalopenlog)selffilenamer   r   r   __init__H   s   zLogger.__init__c                 C   s   | j | | j| d S r   )rB   writerD   )rE   messager   r   r   rH   M   s   zLogger.writec                 C   s   | j   | j  d S r   )rB   flushrD   rE   r   r   r   rJ   Q   s   
zLogger.flushc                 C   s   dS )NFr   rK   r   r   r   isattyU   s   zLogger.isattyN)r=   )__name__
__module____qualname__rG   rH   rJ   rL   r   r   r   r   r<   G   s
    
r<   z'%(asctime)s [%(levelname)s] %(message)s)levelformathandlersc                  C   sD   t j  tt jjd} |  W  d    S 1 sw   Y  d S )Nr)r@   rA   rJ   rC   r?   read)fr   r   r   	read_logsg   s   
$rV   __main__zrXTTS fine-tuning demo


        Example runs:
        python3 TTS/demos/xtts_ft_demo/xtts_demo.py --port 
        )descriptionformatter_classz--portz*Port to run the gradio demo. Default: 5003i  )typehelpdefaultz
--out_pathzMOutput path (where data and checkpoints will be saved) Default: /tmp/xtts_ft/z/tmp/xtts_ft/z--num_epochsz&Number of epochs to train. Default: 10
   z--batch_sizezBatch size. Default: 4   z--grad_acummz#Grad accumulation steps. Default: 1   z--max_audio_lengthz0Max permitted audio size in seconds. Default: 11   z1 - Data processingz7Output path (where data and checkpoints will be saved):)labelvaluemultiplezlSelect here the audio files that you want to use for XTTS trainining (Supported formats: wav, mp3, and flac))
file_countra   zDataset Languageen)re   esfrdeitptpltrrunlcsarzhhukoja)ra   rb   choicesz	Progress:)ra   zLogs:F)ra   interactive)everyzStep 1 - Create dataset)rb   T)
track_tqdmc           	      C   s   t   tj|d}tj|dd | d u rdS zt| |||d\}}}W n   t  t }d| ddf Y S t   |dk rLd	}t	| |ddfS t	d
 d
||fS )NdatasetT)exist_ok)zyYou should provide one or multiple audio files! If you provided it, probably the upload of the files is not finished yet! r{   )target_languager:   gradio_progresszThe data processing was interrupted due an error !! Please check the console to verify the full error message! 
 Error summary: r{   x   zUThe sum of the duration of the audios that you provided should be at least 2 minutes!zDataset Processed!)
r   ospathjoinmakedirsr   	traceback	print_exc
format_excr   )	r   r    r:   progress
train_meta	eval_metaaudio_total_sizeerrorrI   r   r   r   preprocess_dataset   s$   

r   z2 - Fine-tuning XTTS Encoderz
Train CSV:z	Eval CSV:zNumber of epochs:d   )ra   minimummaximumsteprb   zBatch size:   i   zGrad accumulation steps:   z$Max permitted audio size in seconds:   zStep 2 - Run the trainingc              
   C   s   t   |r|s	dS zt|d }t| |||||||d\}}	}
}}W n   t  t }d| ddddf Y S td| d|  td|
 d|  tj	|d}t
d	 t   d	||
||fS )
N)z\You need to run the data processing step or manually set `Train CSV` and `Eval CSV` fields !r{   r{   r{   r{   i"V  )output_pathmax_audio_lengthzxThe training was interrupted due an error !! Please check the console to check the full error message! 
 Error summary: r{   zcp  zbest_model.pthzModel training done!)r   intr   r   r   r   r   systemr   r   r   )r    	train_csveval_csv
num_epochs
batch_size
grad_acummr   r   config_pathoriginal_xtts_checkpoint
vocab_fileexp_pathspeaker_wavr   ft_xtts_checkpointr   r   r   train_model  s    &r   z3 - InferencezXTTS checkpoint path:r{   zXTTS config path:zXTTS vocab path:z#Step 3 - Load Fine-tuned XTTS modelzSpeaker reference audio:LanguagezInput Text.zBThis model sounds really good and above all, it's reasonably fast.zStep 4 - InferencezGenerated Audio.zReference audio used.)fninputsoutputsz0.0.0.0)sharedebugserver_portserver_name)[argparser   r@   r.   gradiogrlibrosa.displaylibrosanumpynpr   r3   r   &TTS.demos.xtts_ft_demo.utils.formatterr   &TTS.demos.xtts_ft_demo.utils.gpt_trainr   TTS.tts.configs.xtts_configr   TTS.tts.models.xttsr   r   r   r   r;   r<   rA   stderrloggingbasicConfigINFOStreamHandlerrV   rM   ArgumentParserRawTextHelpFormatterparseradd_argumentr   str
parse_argsargsBlocksdemoTabTextboxr:   Fileupload_fileDropdownr5   Labelprogress_datalogsloadButtonprompt_compute_btnProgressr   r   r   Sliderr   r   r   r   progress_trainlogs_tts_train	train_btnr   RowColumncol1r   r   r   progress_loadload_btncol2speaker_reference_audiotts_languager6   tts_btncol3progress_genAudiotts_output_audioreference_audioclicklaunchportr   r   r   r   <module>   s   

	H
C



!<

  |
  