o
    
j                     @   sN   d Z ddlZddlmZ ddlmZ ddlmZ dd Zedkr%e  dS dS )	z+Find all the unique characters in a dataset    N)RawTextHelpFormatter)load_config)load_tts_samplesc            
      C   s   t jdtd} | jdtddd |  }t|j}t|j	d|j
|jd\}}|| }dd	d
 |D }t|}tdd |}dd |D }	t|	}	tdt|  tddt|  tddt|  tddt|	  d S )NzFind all the unique characters or phonemes in a dataset.


    Example runs:

    python TTS/bin/find_unique_chars.py --config_path config.json
    )descriptionformatter_classz--config_pathzPath to dataset config file.T)typehelprequired)
eval_spliteval_split_max_sizeeval_split_size c                 s   s    | ]}|d  V  qdS )textN ).0itemr   r   L/home/kuhnn/.local/lib/python3.10/site-packages/TTS/bin/find_unique_chars.py	<genexpr>    s    zmain.<locals>.<genexpr>c                 S   s   |   S )N)islower)cr   r   r   <lambda>"   s    zmain.<locals>.<lambda>c                 S   s   g | ]}|  qS r   )lower)r   r   r   r   r   
<listcomp>#   s    zmain.<locals>.<listcomp>z  > Number of unique characters: z > Unique characters: z > Unique lower characters: z* > Unique all forced to lower characters: )argparseArgumentParserr   add_argumentstr
parse_argsr   config_pathr   datasetsr   r   joinsetfilterprintlensorted)
parserargsr   train_items
eval_itemsitemstextscharslower_charschars_force_lowerr   r   r   main	   s(   	

r/   __main__)	__doc__r   r   
TTS.configr   TTS.tts.datasetsr   r/   __name__r   r   r   r   <module>   s    #
