o
    
jw                     @   sX   d dl Z d dlZd dlmZ d dlmZ e je je j	e
dZG dd dZdS )    N)	Tokenizerenglish_cleanersz*../../utils/assets/tortoise/tokenizer.jsonc                   @   s2   e Zd ZedfddZdd Zdd Zdd	 ZdS )
VoiceBpeTokenizerNc                 C   s6   d | _ |d urt|| _ |d urt|| _ d S d S N)	tokenizerr   	from_filefrom_str)self
vocab_file	vocab_str r   T/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/tortoise/tokenizer.py__init__   s   zVoiceBpeTokenizer.__init__c                 C   s   t |}|S r   r   r
   txtr   r   r   preprocess_text   s   z!VoiceBpeTokenizer.preprocess_textc                 C   s$   |  |}|dd}| j|jS )N [SPACE])r   replacer   encodeidsr   r   r   r   r      s   
zVoiceBpeTokenizer.encodec                 C   sX   t |tjr|  }| jj|dddd}|dd}|dd}|dd}|S )NF)skip_special_tokensr    r   z[STOP]z[UNK])
isinstancetorchTensorcpunumpyr   decoder   )r
   seqr   r   r   r   r      s   zVoiceBpeTokenizer.decode)__name__
__module____qualname__DEFAULT_VOCAB_FILEr   r   r   r   r   r   r   r   r      s
    r   )osr   
tokenizersr   TTS.tts.utils.text.cleanersr   pathjoindirnamerealpath__file__r$   r   r   r   r   r   <module>   s    