o
    
jl                     @   s  d dl Z d dlZd dlm  mZ d dlmZ d dlmZ d dl	m
Z
 d dlZd dlmZ dJddZdJdd	Zd
d Zdd Zdd ZdKddZdd Zdd Zdd Zdd Zdd Zdd Zdd ZdJd d!ZdKd"d#Zd$d% ZdJd&d'ZdLd*d+ZdMd-d.Z dJd/d0Z!dNd3d4Z"dKd5d6Z#dJd7d8Z$dJd9d:Z%d;d< Z&dJd=d>Z'd?e(d@e(dAe
e
e(  fdBdCZ)dDdE Z*dFdG Z+dHdI Z,dS )O    N)glob)Path)List)tqdmc              	        t j| |}t|ddd}| }W d   n1 sw   Y  t|d d}t|dd D ]\}}t|d|krNtd|d  d	|	   q3t
jt j| |dd
 t fdddD shJ d jv rodnd}	d jv rxdnd}
g }d}  D ]=}|	du r|dur|j|v rqt j| |j}t j|s|d7 }q||j||	dur|	n|j|
dur|
n|j| d q|dkrtd| d |S )zbNormalizes the CML-TTS meta data file to TTS format
    https://github.com/freds0/CML-TTS-Dataset/rutf8encodingNr   |    > Missing column in line  -> sepc                 3       | ]}| j v V  qd S Ncolumns.0xmetadata N/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/datasets/formatters.py	<genexpr>       zcml_tts.<locals>.<genexpr>)wav_filename
transcript	client_iddefaultemotion_nameneutraltext
audio_filespeaker_namer"   	root_path	 | > [!]  files not found)ospathjoinopen	readlineslensplit	enumerateprintstrippdread_csvallr   
itertuplesr    r   existsappendr   r"   )r(   	meta_fileignored_speakersfilepathflinesnum_colsidxliner    r"   itemsnot_found_counterrow
audio_pathr   r   r   cml_tts   sB   
	rG   c              	      r   )zInteral dataset formatter.r   r   r	   Nr   r   r   r   r   r   c                 3   r   r   r   r   r   r   r   r   C   r   zcoqui.<locals>.<genexpr>)r&   r%   r'   coquir"   r#   r$   r)   r*   )r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r   r8   r'   r&   r9   r:   r%   r"   )r(   r;   r<   r=   r>   r?   r@   rA   rB   r'   r"   rC   rD   rE   rF   r   r   r   rH   7   sB   
	rH   c              	   K   s   t j| |}g }d}t|ddd,}|D ] }|d}t j| |d d }	|d }
||
|	|| d	 qW d
   |S 1 sAw   Y  |S )zhNormalize TWEB dataset.
    https://www.kaggle.com/bryanpark/the-world-english-bible-speech-dataset
    twebr   utf-8r	   	r   .wavr   r%   r&   r'   r(   Nr+   r,   r-   r.   r1   r:   r(   r;   kwargstxt_filerC   r'   ttfrB   colswav_filer%   r   r   r   rI   ]   s   

rI   c              	   K   s   t j| |}g }d}t|ddd1}|D ]%}|d}|d  }	|d  }
t j| d|	}	||
|	|| d	 qW d
   |S 1 sFw   Y  |S )0Normalizes Mozilla meta data files to TTS formatmozillar   rJ   r	   r   r   r   wavsrM   N)r+   r,   r-   r.   r1   r4   r:   rO   r   r   r   rV   m   s   

rV   c              	   K   s   t j| |}g }d}t|ddd>}|D ]2}| d}|d  }	|d  }
d|	d	d  d
}t j| ||	}	||
|	|| d qW d   |S 1 sSw   Y  |S )rU   rV   r   z
ISO 8859-1r	   r   r   r   BATCH___FINALrM   N)r+   r,   r-   r.   r4   r1   r:   )r(   r;   rP   rQ   rC   r'   rR   rB   rS   rT   r%   folder_namer   r   r   
mozilla_de|   s    
r\   c              
   C   s  t dtj dtj dtj dtj }|s(t| tj dtj d dd}n|}g }|D ]}tj|r9|}ntj| |}tj|}|	|}	|	d	u rPq.|	
d
}
t|tr_|
|v r_q.td| t|dddO}|D ]D}|d}|stj|d|d d }ntj| |ddd|d d }tj|r|d  }||||
| d qptd|  qpW d	   n1 sw   Y  q.|S )a  Normalizes M-AI-Labs meta data files to TTS format

    Args:
        root_path (str): root folder of the MAILAB language folder.
        meta_files (str):  list of meta files to be used in the training. If None, finds all the csv files
            recursively. Defaults to None
    by_bookz(male|female)z(?P<speaker_name>[^z]+)z**zmetadata.csvT	recursiveNr'   z | > {}r   rJ   r	   r   rW   r   rL    r   rM   z> File %s does not exist!)recompiler+   r   r   r,   isfiler-   dirnamesearchgroup
isinstancelistr3   formatr.   r1   replacer4   r:   )r(   
meta_filesr<   speaker_regex	csv_filesrC   csv_filerQ   folderspeaker_name_matchr'   rR   rB   rS   rT   r%   r   r   r   mailabs   sD   *"



"rq   c              	   K      t j| |}g }d}t|ddd-}|D ]!}|d}t j| d|d d }	|d	 }
||
|	|| d
 qW d   |S 1 sBw   Y  |S )z`Normalizes the LJSpeech meta data file to TTS format
    https://keithito.com/LJ-Speech-Dataset/ljspeechr   rJ   r	   r   rW   r   rL      rM   NrN   rO   r   r   r   rs         

rs   c              	   K   s   t j| |}g }t|ddd@}d}t|D ]0\}}|d dkr%|d7 }|d}	t j| d|	d d	 }
|	d }|||
d
| | d qW d   |S 1 sSw   Y  |S )zbNormalizes the LJSpeech meta data file for TTS testing
    https://keithito.com/LJ-Speech-Dataset/r   rJ   r	   r   rt   r   r   rW   rL   z	ljspeech-rM   N)r+   r,   r-   r.   r2   r1   r:   )r(   r;   rP   rQ   rC   rR   
speaker_idrA   rB   rS   rT   r%   r   r   r   ljspeech_test   s$   

rw   c              	   K   rr   )zuNormalizes the thorsten meta data file to TTS format
    https://github.com/thorstenMueller/deep-learning-german-tts/thorstenr   rJ   r	   r   rW   r   rL   r   rM   NrN   rO   r   r   r   rx      ru   rx   c           
      K   s   t j| d|}t| }g }d}|dD ]+}|j}t j| d|dd }	t j	|	s9t
d|	 d q|||	|| d	 q|S )
zNormalizes the sam-accenture meta data file to TTS format
    https://github.com/Sam-Accenture-Non-Binary-Voice/non-binary-voice-filesvoice_over_recordingssam_accenturez./fileidvo_voice_quality_transformationidrL    [!] z( in metafile does not exist. Skipping...rM   )r+   r,   r-   ETparsegetrootfindallr%   getr9   r3   r:   )
r(   r;   rP   xml_filexml_rootrC   r'   itemr%   rT   r   r   r   rz      s   rz   c              	   K   rr   )zWNormalizes the RUSLAN meta data file to TTS format
    https://ruslan-corpus.github.io/ruslanr   rJ   r	   r   RUSLANr   rL   r   rM   NrN   rO   r   r   r   r     ru   r   c              	   K      t j| |}g }d}t|ddd*}|D ]}|d}t j| |d }	|d }
||
|	|| d qW d	   |S 1 s?w   Y  |S )
z/Normalizes the CSS10 dataset file to TTS formatcss10r   rJ   r	   r   r   r   rM   NrN   rO   r   r   r   r        

r   c              	   K   s   t j| |}g }d}t|ddd8}|D ],}| d }||dd |dd  }	t j| d|d }
||	|
|| d	 qW d
   |S 1 sMw   Y  |S )z1Normalizes the Nancy meta data file to TTS formatnancyr   rJ   r	   r   "wavnrL   rM   N)r+   r,   r-   r.   r1   findrfindr:   )r(   r;   rP   rQ   rC   r'   rR   rB   utt_idr%   rT   r   r   r   r     s    
r   c              
   C   s   t j| |}g }t|dddE}|D ]9}|drq|d}|d }|d }	t|tr2|	|v r2qt j| d|d	 d
d}
|	||
d|	 | d qW d   |S 1 sXw   Y  |S )z8Normalize the common voice meta data file to TTS format.r   rJ   r	   r    rK   rt   r   clipsr   z.mp3rL   MCV_rM   N)
r+   r,   r-   r.   
startswithr1   rg   rh   rj   r:   )r(   r;   r<   rQ   rC   rR   rB   rS   r%   r'   rT   r   r   r   common_voice-  s*   



r   c              
   C   sD  g }|st |  ddd}nt|trtj| |g}|D ]l}tj|dd }t|dddP}|D ]E}|d	}|d }	|d d
^}
}}tj| |
 d| }tj||	d }|d }t|t	rl|
|v rlq4|
||d|
 | d q4W d   n1 sw   Y  q|D ]}tj|d sJ d|d  q|S )z+https://ai.google/tools/datasets/libri-tts/z/**/*trans.tsvTr^   .r   r   rJ   r	   rK   rY   /rL   rt   LTTS_rM   Nr&    [!] wav files don't exist - )r   rg   strr+   r,   r-   basenamer1   r.   rh   r:   r9   )r(   rk   r<   rC   r;   
_meta_filerR   rB   rS   	file_namer'   
chapter_idrY   
_root_pathrT   r%   r   r   r   r   	libri_ttsC  s>   


$r   c              	   K   s   t j| |}g }d}g }t|ddd<}|D ]1}|d}	t j| d|	d  d }
t j|
s8||
 q|	d	  }|||
|| d
 qW d    n1 sSw   Y  tdt	| d |S )Nzturkish-femaler   rJ   r	   r   rW   r   rL   r   rM   r}   z# files skipped. They don't exist...)
r+   r,   r-   r.   r1   r4   r9   r:   r3   r0   )r(   r;   rP   rQ   rC   r'   skipped_filesrR   rB   rS   rT   r%   r   r   r   custom_turkishg  s"   

	r   c              	   C   s   t j| |}g }t|ddd>}|D ]2}|drq|d}t j| |d }|d }	|d }
t|tr;|
|v r;q||	||
| d	 qW d
   |S 1 sQw   Y  |S )zBRSpeech 3.0 betar   rJ   r	   r   r   r   rt      rM   N)	r+   r,   r-   r.   r   r1   rg   rh   r:   )r(   r;   r<   rQ   rC   rR   rB   rS   rT   r%   rv   r   r   r   brspeechz  s&   



r   wav48_silence_trimmedmic1c                 C   s&  d}g }t tj| d ddd}|D ]|}tj|| tj\}}	}
|
dd }t|tr5|	|v r5qt	|dd	d
}|
 d }W d   n1 sMw   Y  |	dkretj| ||	|d|  }ntj| ||	|d| d|  }tj|r|||d|	 | d qtd|  q|S )u  VCTK dataset v0.92.

    URL:
        https://datashare.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip

    This dataset has 2 recordings per speaker that are annotated with ```mic1``` and ```mic2```.
    It is believed that (😄 ) ```mic1``` files are the same as the previous version of the dataset.

    mic1:
        Audio recorded using an omni-directional microphone (DPA 4035).
        Contains very low frequency noises.
        This is the same audio released in previous versions of VCTK:
        https://doi.org/10.7488/ds/1994

    mic2:
        Audio recorded using a small diaphragm condenser microphone with
        very wide bandwidth (Sennheiser MKH 800).
        Two speakers, p280 and p315 had technical issues of the audio
        recordings using MKH 800.
    flactxt	/**/*.txtTr^   r   r   r   rJ   r	   Np280z_mic1.rY   VCTK_rM   r   )r   r+   r,   r-   relpathr1   r   rg   rh   r.   r/   r9   r:   r3   )r(   rk   	wavs_pathmicr<   file_extrC   r;   rY   rv   rQ   file_id	file_textr%   rT   r   r   r   vctk  s*   
"r   wav48c              	   C   s   g }t tj| d ddd}|D ]U}tj|| tj\}}}|dd }	t|tr3||v r3qt	|ddd	}
|

 d }W d
   n1 sKw   Y  tj| |||	d }|||d| | d q|S )z:homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gzr   r   Tr^   r   r   r   rJ   r	   NrL   	VCTK_old_rM   )r   r+   r,   r-   r   r1   r   rg   rh   r.   r/   r:   )r(   rk   r   r<   rC   r;   rY   rv   rQ   r   r   r%   rT   r   r   r   vctk_old  s    
r   c           
   	   K   s   g }d}t j| d} t|  ddd}|D ]Z}t jd t j |v r)|dd}nt jt j|dt j|dd	}t j|rpt j|rpt	|d
dd}|
 d }	W d    n1 saw   Y  ||	||| d q|S )N	synpaflexr`   **/*.wavTr^   wavr   rL   z.txtr   rJ   r	   r   rM   )r+   r,   r-   r   r   rj   rd   r   r9   r.   r/   r:   )
r(   	metafilesrP   rC   r'   	wav_filesrT   rQ   r   r%   r   r   r   r     s"   r   trainTc              	   C   s   g }|}t tj| | ddd}|D ]b}tj|| tj\}}}	|	dd }
t|tr5||v r5qt	|ddd}|
 d	d
}W d   n1 sOw   Y  |r_tttj|r_qtj| |||
d }|||d| | d q|S )z$ToDo: Refer the paper when availabler   Tr^   r   r   r   rJ   r	   
r`   Nz.flacOB_rM   )r   r+   r,   r-   r   r1   r   rg   rh   r.   readlinerj   anymapr   isdigitr:   )r(   rk   ignore_digits_sentencesr<   rC   	split_dirr;   rY   rv   rQ   r   r   r%   rT   r   r   r   
open_bible  s"   
r   c                 C   s   g }t tj| |dddJ}|D ]>}|d\}}|dd }|d^}}	}
tj| tj|d||	|d	 }t|trC||v rCq|||d
| | d qW d   |S 1 s[w   Y  |S )zhttp://www.openslr.org/94/r   rJ   r	   rK   NrY   audiorL   MLS_rM   )	r.   r+   r,   r-   r1   rd   rg   rh   r:   )r(   rk   r<   rC   metarB   filer%   speakerbookrY   rT   r   r   r   mls  s$   "

r   c                 K      t | |ddS )P
    :param meta_file   Used only for consistency with load_tts_samples api
    2
voxcel_idx	_voxcel_xr(   r;   rP   r   r   r   	voxceleb2     r   c                 K   r   )r   1r   r   r   r   r   r   	voxceleb1  r   r   c                 C   s  |dv sJ |dkrdnd}t | }|d| d }|jjdd |d urHtt|d	d
d}dd | D W  d    S 1 sBw   Y  nq| sd}g }|d}	t|	d| d|dD ])}
tt |
jjj	}|
dsrJ d }|| d|
 d| d| d |d7 }q`tt|dd
d}|d| W d    n1 sw   Y  ||k rtd| d| tt|d	d
d}dd | D W  d    S 1 sw   Y  d S )N)r   r   r   i B i@B metafile_voxcelebz.csvT)exist_okr   rJ   r	   c                 S      g | ]	}|  d qS r   r4   r1   r   r   r   r   
<listcomp>&      z_voxcel_x.<locals>.<listcomp>r   r   zBuilding VoxCeleb z/ Meta file ... this needs to be done only once.)desctotalr|   r   z|voxcelrY   r   r   wr`   z7Found too few instances for Voxceleb. Should be around z, is: c                 S   r   r   r   r   r   r   r   r   <  r   )r   parentmkdirr.   r   r/   r9   rglobr   stemr   r:   writer-   
ValueError)r(   r;   r   expected_countvoxceleb_pathcache_tor>   cnt	meta_datar   r,   rv   r%   r   r   r   r     s>   "


"
$r   c              	   C   s   t j| |}g }t|dddB}|D ]6}|drq|d}t j| |d }|d }	|d d	d
}
t|tr?|	|v r?q|	||	|
| d qW d   |S 1 sUw   Y  |S )zGeneric emotion datasetr   rJ   r	   	file_path,r   r   rt   r   r`   )r&   r'   r"   r(   N)
r+   r,   r-   r.   r   r1   rj   rg   rh   r:   )r(   r;   r<   rQ   rC   rR   rB   rS   rT   rv   
emotion_idr   r   r   emotion?  s*   



r   r(   r;   returnc              	   K   s   t j| |}g }d}t|ddd*}|D ]}|dd\}}	t j| d|}
||	|
|| d qW d	   |S 1 s?w   Y  |S )
aW  Normalizes the Baker meta data file to TTS format

    Args:
        root_path (str): path to the baker dataset
        meta_file (str): name of the meta dataset containing names of wav to select and the transcript of the sentence
    Returns:
        List[List[str]]: List of (text, wav_path, speaker_name) associated with each sentences
    bakerr   rJ   r	   r   r   clips_22rM   N)r+   r,   r-   r.   rstripr1   r:   )r(   r;   rP   rQ   rC   r'   rR   rB   wav_namer%   wav_pathr   r   r   r   U  s   	
r   c              	   K   s   t j| |}g }d}t|ddd1}|D ]%}|d}t j| d|d d }	|d	 d
d}
||
|	|| d qW d   |S 1 sFw   Y  |S )zVJapanese single-speaker dataset from https://github.com/kaiidams/Kokoro-Speech-Datasetkokoror   rJ   r	   r   rW   r   rL   rt    r`   rM   N)r+   r,   r-   r.   r1   rj   r:   rO   r   r   r   r   i  s   

r   c              	   K   r   )
zqKorean single-speaker dataset from https://www.kaggle.com/datasets/bryanpark/korean-single-speaker-speech-datasetkssr   rJ   r	   r   r   rt   rM   NrN   rO   r   r   r   r   w  r   r   c              	   K   s   t j| |}g }d}t|ddd*}|D ]}|d}t j| |d }	|d }
||
|	|| d qW d    |S 1 s?w   Y  |S )	Nbel_ttsr   rJ   r	   r   r   r   rM   rN   rO   r   r   r   bel_tts_formatter  s   

r   r   )NN)Nr   r   N)Nr   N)r   TN)-r+   ra   xml.etree.ElementTreeetreeElementTreer~   r   pathlibr   typingr   pandasr5   r   rG   rH   rI   rV   r\   rq   rs   rw   rx   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sL    

'&
3

$


/





#