o
    
j6"                     @   s   d Z ddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 g ddgg ddgdZd	d
dddZdddZi Zdd Zdd Zdd Zdd Zdd Zedkre
e
j eejdkrled e  ejd ejd ejd Zed < ed!< eD ]
Zeeed" qdS dS )#z voxceleb 1 & 2     N)logging)zIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partaazIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partabzIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partaczIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partadzGhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_test_wav.zip)zIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partaazIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partabzIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partaczIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partadzIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partaezIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partafzIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partagzIhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_dev_aac_partahzGhttps://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox2_test_aac.zip)vox1_dev_wavvox1_test_wavvox2_dev_aacvox2_test_aac ae63e55b951748cc486645f532ba230b bbc063c46078a602ca71605645c2a402 185fdc63c3c739954633d50379a3d102 0d2b3ea430a821c33263b5ea37ede312)r   r   r   r    )userpasswordc                 C   s  t j| dd z|D ]>}t j| |dd }t j|rq
td||f  tj	d|t
d t
d |f dd	 t |}td
||jf  q
d|vrhd|ddd }tj	d||f dd	 |d7 }|d}t|d}t|  }W d   n1 sw   Y  |t| krtd| t|d&}	|	|  t j| |	 d j}
tj	d|
|f dd	 W d   W dS 1 sw   Y  W dS w )zDownload and extract the given split of dataset.

    Args:
        directory: the directory where to put the downloaded data.
        subset: subset name of the corpus.
        urls: the list of urls to download the data file.
    T)exist_ok/zDownloading %s to %sz%wget %s --user %s --password %s -O %sr   r   shellz+Successfully downloaded %s, size(bytes): %dz.zip_Nzcat %s* > %s.ziprbzmd5sum of %s mismatchrr   zmv %s %s)osmakedirspathjoinsplitexistsr   info
subprocesscallUSERstatst_sizestripopenhashlibmd5read	hexdigestMD5SUM
ValueErrorzipfileZipFile
extractallinfolistfilename)	directorysubseturlsurlzip_filepathstatinfoextract_pathf_zipr%   zfileextract_path_ori r9   U/home/kuhnn/.local/lib/python3.10/site-packages/TTS/encoder/utils/prepare_voxceleb.pydownload_and_extractA   s>   


r;   c              
   C   sn   zt j| dd}|dk rtd|  W |S W |S  ty6 } ztd|  d}W Y d}~|S d}~ww )zRun a command in a subprocess.
    Args:
        cmd: command line to be executed.
    Return:
        int, the return code.
    Tr   r   zChild was terminated by signal zExecution failed: iN)r   r   r   r   OSError)cmdretcodeer9   r9   r:   exec_cmdo   s   r@   c                 C   sR   d|  d| }t d|  t|}|dkr't d|  t d dS dS )	zDecode a given AAC file into WAV using ffmpeg.
    Args:
        aac_file: file path to input AAC file.
        wav_file: file path to output WAV file.
    Return:
        bool, True if success.
    z
ffmpeg -i  z&Decoding aac file using command line: r   z'Failed to decode aac file with retcode z&Please check your ffmpeg installation.FT)r   r   r@   error)aac_filewav_filer=   retr9   r9   r:   decode_aac_with_ffmpeg   s   
rF   c                 C   sd  t d|  tj| |}g }t|D ]z\}}}|D ]r}	tj|	\}
}| dkr?tj|
\}}|r7qtj||	}n"| dkr`tj||	}|d }tj|s_t	||s_t
dnq|tjjd }|tvrvtt}|t|< tt|d }|tj||t| |f qqtj||}tj|g dd}|j|d	d
d t d| dS )ah  Optionally convert AAC to WAV and make speaker labels.
    Args:
        input_dir: the directory which holds the input dataset.
        subset: the name of the specified subset. e.g. vox1_dev_wav
        output_dir: the directory to place the newly generated csv files.
        output_file: the name of the newly generated csv file. e.g. vox1_dev_wav.csv
    z+Preprocessing audio and label for subset %sz.wavz.m4azAudio decoding failed.r   )wav_filenamewav_length_ms
speaker_idspeaker_name)datacolumnsF	)indexsepz"Successfully generated csv file {}N)r   r   r   r   r   walksplitextlowerr   rF   RuntimeErrorr   rP   speaker_id_dictlensfr&   appendabspathpandas	DataFrameto_csvformat)	input_dirr0   
output_diroutput_file
source_dirfilesrootr   	filenamesr.   nameextext2rD   rC   rK   num
wav_lengthcsv_file_pathdfr9   r9   r:   convert_audio_and_make_label   s<   	
 rl   c                 C   s   t }||vrt|dtj| |d }|stj|r|S td|  td| t| |||  t	| || |d  td |S )zdownload and processzis not in voxcelebz.csvz*Downloading and process the voxceleb in %szPreparing subset %sz#Finished downloading and processing)
SUBSETSr)   r   r   r   r   r   r   r;   rl   )r/   r0   force_processr1   
subset_csvr9   r9   r:   	processor   s   

rp   __main__   z:Usage: python prepare_data.py save_directory user password         r   r   F)__doc__r$   r   r   sysr*   rZ   	soundfilerW   abslr   rm   r(   r   rU   r;   r@   rF   rl   rp   __name__set_verbosityINFOrV   argvprintexitDIRSUBSETr9   r9   r9   r:   <module>   sH   

./*