o
    
j	                     @   sv   d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ dd	 Zd
d Zedkr9e  dS dS )z+Find all the unique characters in a dataset    N)RawTextHelpFormatter)process_map)load_config)load_tts_samples)Gruutc                 C   s&   | d }t |dd}tt|S )Ntext| )
phonemizer	phonemizereplacesetlist)itemr   ph r   O/home/kuhnn/.local/lib/python3.10/site-packages/TTS/bin/find_unique_phonemes.pycompute_phonemes   s   r   c                  C   sn  t jdtd} | jdtddd |  }t|jat	tj
dtjtjd\}}|| }tdt| d	d
 |D }t|}tjrA|sEtd||d t|ksTtdt|d ddatt|t dd}g }|D ]}	||	 qjt|}tdd |}
dd
 |D }t|}tdt|  tddt|  tddt|
  tddt|  d S )NzFind all the unique characters or phonemes in a dataset.


    Example runs:

    python TTS/bin/find_unique_phonemes.py --config_path config.json
    )descriptionformatter_classz--config_pathzPath to dataset config file.T)typehelprequired)
eval_spliteval_split_max_sizeeval_split_sizez
Num items:c                 S   s   g | ]}|d  qS )languager   ).0r   r   r   r   
<listcomp>,       zmain.<locals>.<listcomp>z+Phoneme language must be defined in config.r   zCurrently, just one phoneme language per config file is supported !! Please split the dataset config into different configs and run it individually for each language !!)r   
keep_puncs   )max_workers	chunksizec                 S   s   |   S )N)islower)cr   r   r   <lambda>?   s    zmain.<locals>.<lambda>c                 S   s   g | ]}|  qS r   )lower)r   r%   r   r   r   r   @   r   z > Number of unique phonemes: z > Unique phonemes: r	   z > Unique lower phonemes: z( > Unique all forced to lower phonemes: )argparseArgumentParserr   add_argumentstr
parse_argsr   config_pathr%   r   datasetsr   r   printlenallphoneme_language
ValueErrorcountr   r
   r   r   multiprocessing	cpu_countextendr   filterjoinsorted)parserargstrain_items
eval_itemsitemslanguage_listis_lang_defphonemesphonesr   lower_phonesphones_force_lowerr   r   r   main   sB   	


rF   __main__)__doc__r(   r5   r   tqdm.contrib.concurrentr   
TTS.configr   TTS.tts.datasetsr   TTS.tts.utils.text.phonemizersr   r   rF   __name__r   r   r   r   <module>   s    6
