o
    
jwA                     @   s   d dl mZ d dlmZ d dlmZ dd ZdZdZdZ	d	Z
d
ZdZdZdZdZdZdZdZee e e e e ZG dd dZG dd dZG dd deZG dd deZedkrle Ze Ze  e  dS dS )    )replace)Dict)CharactersConfigc                   C   s   t tttttdS )N)padeosbos
characterspunctuationsphonemes)_pad_eos_bos_characters_punctuations	_phonemes r   r   P/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/utils/text/characters.pyparse_symbols   s   r   z<PAD>z<EOS>z<BOS>z<BLNK>4ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzz!'(),-.:;? u5   iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻu   ʘɓǀɗǃʄǂɠǁʛue   pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟu   ˈˌːˑu   ʍwɥʜʢʡɕʑɺɧʲu   ɚ˞ɫc                   @   s   e Zd ZdZd#dededededef
dd	Zed
efddZ	ed
efddZ
ed
efddZed
efddZedd Zejdd Zedd Zd$ddZedd Zded
efddZd ed
efd!d"ZdS )%BaseVocabularyzBase Vocabulary class.

    This class only needs a vocabulary dictionary without specifying the characters.

    Args:
        vocab (Dict): A dictionary of characters and their corresponding indices.
    Nvocabr   blankr   r   c                 C   s"   || _ || _|| _|| _|| _d S N)r   r   r   r   r   )selfr   r   r   r   r   r   r   r   __init__/   s
   
zBaseVocabulary.__init__returnc                 C      | j r	| | j S t| jS )zReturn the index of the padding character. If the padding character is not specified, return the length
        of the vocabulary.r   
char_to_idlenr   r   r   r   r   pad_id6      zBaseVocabulary.pad_idc                 C   r   )z~Return the index of the blank character. If the blank character is not specified, return the length of
        the vocabulary.r   r   r   r   r    r   r   r   blank_id<   r"   zBaseVocabulary.blank_idc                 C   r   )zzReturn the index of the bos character. If the bos character is not specified, return the length of the
        vocabulary.r   r   r   r   r    r   r   r   bos_idB   r"   zBaseVocabulary.bos_idc                 C   r   )zzReturn the index of the eos character. If the eos character is not specified, return the length of the
        vocabulary.r   r   r   r   r    r   r   r   eos_idH   r"   zBaseVocabulary.eos_idc                 C      | j S )z!Return the vocabulary dictionary._vocabr    r   r   r   r   N   s   zBaseVocabulary.vocabc                 C   sR   d\| _ | _| _|dur'|| _ dd t| j D | _dd t| j D | _dS dS )zASet the vocabulary dictionary and character mapping dictionaries.)NNNNc                 S      i | ]\}}||qS r   r   .0idxcharr   r   r   
<dictcomp>Y       z(BaseVocabulary.vocab.<locals>.<dictcomp>c                 S      i | ]\}}||qS r   r   r-   r   r   r   r1   Z       
)r+   _char_to_id_id_to_char	enumerater   r   r   r   r   r   S   s   c                 K   sV   | j dur"d| j v r"| j jr"t| j j| j j| j j| j j| j j| fS tdi || fS )z!Initialize from the given config.N
vocab_dictr   )r   r9   r   r   r   r   r   )configkwargsr   r   r   init_from_config^   s   
zBaseVocabulary.init_from_configr   c              	   C   s    t | j| j| j| j| jdddS )NF)r9   r   r   r   r   	is_unique	is_sorted)r   r+   r   r   r   r   r    r   r   r   	to_confign   s   zBaseVocabulary.to_configc                 C   
   t | jS )z*Return number of tokens in the vocabulary.r   r+   r    r   r   r   	num_charsy   s   
zBaseVocabulary.num_charsr0   c              
   C   s<   z| j | W S  ty } ztdt| d|d}~ww )zMap a character to an token ID. [!]  is not in the vocabulary.Nr5   KeyErrorreprr   r0   er   r   r   r   ~   s   zBaseVocabulary.char_to_idr/   c                 C   
   | j | S )zMap an token ID to a character.r6   r   r/   r   r   r   
id_to_char      
zBaseVocabulary.id_to_char)NNNNr   r   )__name__
__module____qualname____doc__r   strr   propertyintr!   r$   r&   r(   r   setterstaticmethodr<   r?   rB   r   rM   r   r   r   r   r   &   s,     





r   c                   @   s  e Zd ZdZ								dBdedededed	ed
edededdfddZedefddZ	edefddZ
edefddZedefddZedd Zejdd Zedd Zejdd Zedd Zejd d Zed!d" Zejd#d" Zed$d% Zejd&d% Zed'd( Zejd)d( Zed*d+ Zejd,d+ Zed-d. Zd/d0 Zd1edefd2d3Zd4edefd5d6ZdCd8efd9d:ZedDd=d>ZdEd@dAZdS )FBaseCharactersu  🐸BaseCharacters class

        Every new character class should inherit from this.

        Characters are oredered as follows ```[PAD, EOS, BOS, BLANK, CHARACTERS, PUNCTUATIONS]```.

        If you need a custom order, you need to define inherit from this class and override the ```_create_vocab``` method.

        Args:
            characters (str):
                Main set of characters to be used in the vocabulary.

            punctuations (str):
                Characters to be treated as punctuation.

            pad (str):
                Special padding character that would be ignored by the model.

            eos (str):
                End of the sentence character.

            bos (str):
                Beginning of the sentence character.

            blank (str):
                Optional character used between characters by some models for better prosody.

            is_unique (bool):
                Remove duplicates from the provided characters. Defaults to True.
    el
            is_sorted (bool):
                Sort the characters in alphabetical order. Only applies to `self.characters`. Defaults to True.
    NFTr   r	   r   r   r   r   r=   r>   r   c	           	      C   s<   || _ || _|| _|| _|| _|| _|| _|| _|   d S r   )	r   r   r   r   r   _blankr=   r>   _create_vocab	r   r   r	   r   r   r   r   r=   r>   r   r   r   r      s   zBaseCharacters.__init__c                 C   r   r   r   r    r   r   r   r!         zBaseCharacters.pad_idc                 C   r   r   r#   r    r   r   r   r$      r]   zBaseCharacters.blank_idc                 C   r   r   r'   r    r   r   r   r(      r]   zBaseCharacters.eos_idc                 C   r   r   r%   r    r   r   r   r&      r]   zBaseCharacters.bos_idc                 C   r)   r   )r   r    r   r   r   r         zBaseCharacters.charactersc                 C      || _ |   d S r   )r   r[   )r   r   r   r   r   r         c                 C   r)   r   )r   r    r   r   r   r	      r^   zBaseCharacters.punctuationsc                 C   r_   r   )r   r[   )r   r	   r   r   r   r	      r`   c                 C   r)   r   )r   r    r   r   r   r      r^   zBaseCharacters.padc                 C   r_   r   )r   r[   )r   r   r   r   r   r      r`   c                 C   r)   r   )r   r    r   r   r   r      r^   zBaseCharacters.eosc                 C   r_   r   )r   r[   )r   r   r   r   r   r      r`   c                 C   r)   r   )r   r    r   r   r   r      r^   zBaseCharacters.bosc                 C   r_   r   )r   r[   )r   r   r   r   r   r      r`   c                 C   r)   r   )rZ   r    r   r   r   r      r^   zBaseCharacters.blankc                 C   r_   r   )rZ   r[   )r   r   r   r   r   r     r`   c                 C   r)   r   r*   r    r   r   r   r     r^   zBaseCharacters.vocabc                 C   s6   || _ dd t| jD | _dd t| jD | _d S )Nc                 S   r,   r   r   r-   r   r   r   r1     r2   z(BaseCharacters.vocab.<locals>.<dictcomp>c                 S   r3   r   r   r-   r   r   r   r1     r4   )r+   r7   r   r5   r6   r8   r   r   r   r     s
   c                 C   r@   r   rA   r    r   r   r   rB     rN   zBaseCharacters.num_charsc                    s6   j } jrtt|} jrt|}t|} jd ur)t jdkr) jg| n|} jd ur=t jdkr= jg| n|} j	d urQt j	dkrQ j	g| n|} j
d uret j
dkre j
g| n|}|t j  _ jr fdd jD }t jt j  krt jksn J d| d S d S )Nr   c                    s    h | ]} j |d kr|qS )   )r   count)r.   xr    r   r   	<setcomp>%  s     z/BaseCharacters._create_vocab.<locals>.<setcomp>z: [!] There are duplicate characters in the character set. )r   r=   listsetr>   sortedrZ   r   r   r   r   r   r   r5   r6   )r   r+   
duplicatesr   r    r   r[     s$   ((((*zBaseCharacters._create_vocabr0   c              
   C   s<   z| j | W S  ty } ztdt| d|d }~ww )NrC   rD   rE   rH   r   r   r   r   *  s   zBaseCharacters.char_to_idr/   c                 C   rJ   r   rK   rL   r   r   r   rM   0  s   
zBaseCharacters.id_to_charr   levelc                 C   s   d| }t | d| j  t | d| j  t | d| j  t | d| j  t | d| j  t | d| j  t | d| j  t | d	| j  d
S )z9
        Prints the vocabulary in a nice format.
        	z| > Characters: z| > Punctuations: z	| > Pad: z	| > EOS: z	| > BOS: z| > Blank: z| > Vocab: z| > Num chars: N)	printr   r   r   r   r   rZ   r   rB   )r   ri   indentr   r   r   	print_log3  s   zBaseCharacters.print_logr:   Coqpitc                 C   s<   | j durtdi | j | fS t }t| | d}||fS )zcInit your character class from a config.

        Implement this method for your subclass.
        Nr   r   )r   rY   r   r?   r:   r   
new_configr   r   r   r<   A  s
   
zBaseCharacters.init_from_configr   c              
   C   s(   t | j| j| j| j| j| j| j| jdS )Nr   r	   r   r   r   r   r=   r>   )	r   r   r   r   r   r   rZ   r=   r>   r    r   r   r   r?   O  s   zBaseCharacters.to_config)NNNNNNFT)r   r:   rn   rO   )rP   rQ   rR   rS   rT   boolr   rU   rV   r!   r$   r(   r&   r   rW   r	   r   r   r   r   r   rB   r[   r   rM   rm   rX   r<   r?   r   r   r   r   rY      s    $	
















rY   c                       f   e Zd ZdZeeeeee	ddfde
de
de
de
de
d	e
d
ededdf fddZedddZ  ZS )IPAPhonemesu-  🐸IPAPhonemes class to manage `TTS.tts` model vocabulary

    Intended to be used with models using IPAPhonemes as input.
    It uses system defaults for the undefined class arguments.

    Args:
        characters (str):
            Main set of case-sensitive characters to be used in the vocabulary. Defaults to `_phonemes`.

        punctuations (str):
            Characters to be treated as punctuation. Defaults to `_punctuations`.

        pad (str):
            Special padding character that would be ignored by the model. Defaults to `_pad`.

        eos (str):
            End of the sentence character. Defaults to `_eos`.

        bos (str):
            Beginning of the sentence character. Defaults to `_bos`.

        blank (str):
            Optional character used between characters by some models for better prosody. Defaults to `_blank`.

        is_unique (bool):
            Remove duplicates from the provided characters. Defaults to True.

        is_sorted (bool):
            Sort the characters in alphabetical order. Defaults to True.
    FTr   r	   r   r   r   r   r=   r>   r   Nc	           	   
         t  |||||||| d S r   superr   r\   	__class__r   r   r   |     zIPAPhonemes.__init__r:   rn   c              
   C   s   d| v rB| j durBd| j v r| j jdur| j d | j d< t| j d | j d | j d | j d | j d | j d | j d	 | j d
 d| fS | j durQtdi | j | fS t }t| | d}||fS )zInit a IPAPhonemes object from a model config

        If characters are not defined in the config, it will be set to the default characters and the config
        will be updated.
        r   Nr
   r	   r   r   r   r   r=   r>   rr   ro   r   )r   r
   rv   r   r?   rp   r   r   r   r<     s(   

zIPAPhonemes.init_from_configrs   )rP   rQ   rR   rS   r   r   r   r   r   rZ   rT   rt   r   rX   r<   __classcell__r   r   rz   r   rv   \  s>    !	
rv   c                       ru   )	Graphemesu  🐸Graphemes class to manage `TTS.tts` model vocabulary

    Intended to be used with models using graphemes as input.
    It uses system defaults for the undefined class arguments.

    Args:
        characters (str):
            Main set of case-sensitive characters to be used in the vocabulary. Defaults to `_characters`.

        punctuations (str):
            Characters to be treated as punctuation. Defaults to `_punctuations`.

        pad (str):
            Special padding character that would be ignored by the model. Defaults to `_pad`.

        eos (str):
            End of the sentence character. Defaults to `_eos`.

        bos (str):
            Beginning of the sentence character. Defaults to `_bos`.

        is_unique (bool):
            Remove duplicates from the provided characters. Defaults to True.

        is_sorted (bool):
            Sort the characters in alphabetical order. Defaults to True.
    FTr   r	   r   r   r   r   r=   r>   r   Nc	           	   
      rw   r   rx   r\   rz   r   r   r     r|   zGraphemes.__init__r:   rn   c              
   C   s   | j dur:d| j v r0t| j d | j d | j d | j d | j d | j d | j d	 | j d
 d| fS tdi | j | fS t }t| | d}||fS )zInit a Graphemes object from a model config

        If characters are not defined in the config, it will be set to the default characters and the config
        will be updated.
        Nr
   r   r	   r   r   r   r   r=   r>   rr   ro   r   )r   r~   r   r?   rp   r   r   r   r<     s$   


zGraphemes.init_from_configrs   )rP   rQ   rR   rS   r   r   r   r   r   rZ   rT   rt   r   rX   r<   r}   r   r   rz   r   r~     s>    	
r~   __main__N)dataclassesr   typingr   TTS.tts.configs.shared_configsr   r   r   r   r   rZ   r   r   _vowels_non_pulmonic_consonants_pulmonic_consonants_suprasegmentals_other_symbols_diacrilicsr   r   rY   rv   r~   rP   grphrm   r   r   r   r   <module>   s8    d SNG