o
    
jp                     @   sb  d dl Z d dlZd dlZd dl mZ d dlZd dlZd dlmZ d dl	m	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZ edkr/e jdedZejdeddd ejdeddd ejdedddd ejdedddd ejdeddd ejdeddd ejd d!ed"d# e Z ee j!Z"ed<i e"j#Z$d$e"% v red<i e"j&\ZZe"j're(ene(eZ)ee"Z*ee*e j+e j,d\Z*Z-e.d%Z/e0e/e j1Z/e/e j2e j3Z4ee*j5j6e"j7de$e4d$e"% v re"j&ndd&e"% v re"d& nde"j'e"j8e"j9e"j:d'Z1e1;e"j<d(dd) ee1e j=d*e1j>ddd+Z?g Z@eA  e	e?D ]ZBeBd  ZCeBd, ZDeBd- ZEeBd* ZFeBd. ZGeBd/ ZHeBd0 ZIe j,reeCJ ZCeDJ ZDeFJ ZFeGJ ZGe*KeCeDeFZLeLd1 M ZNeOeND ]g\ZPZQeIeP ZRejSjTjUeQVd d,Wd de*j5j6d2ddd3Xd Vd d,ZQeQdeGeP deDeP f Y  ZQejZ[eRZ\ejZ]e\d  d4 Z^eR_e\e^Z`ejZaeRZbejZae`Zce@debecg eee`eQ qvq3ejZfe j2d5Zgehegd6d7d8Zie@D ]Zjeikejd   d9ejd,  d: qW d   n	1 sw   Y  eld;eg  W d   dS 1 s(w   Y  dS dS )=    N)RawTextHelpFormatter)
DataLoader)tqdm)load_config)
TTSDataset)setup_model)make_symbolsphonemessymbols)AudioProcessor)load_checkpoint__main__a  Extract attention masks from trained Tacotron/Tacotron2 models.
These masks can be used for different purposes including training a TTS model with a Duration Predictor.

Each attention mask is written to the same path as the input wav file with ".npy" file extension.
(e.g. path/bla.wav (wav file) --> path/bla.npy (attention mask))

Example run:
    CUDA_VISIBLE_DEVICE="0" python TTS/bin/compute_attention_masks.py
        --model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth
        --config_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/config.json
        --dataset_metafile metadata.csv
        --data_path /root/LJSpeech-1.1/
        --batch_size 32
        --dataset ljspeech
        --use_cuda True
)descriptionformatter_classz--model_pathTz&Path to Tacotron/Tacotron2 model file )typerequiredhelpz--config_pathz'Path to Tacotron/Tacotron2 config file.z	--dataset z>Target dataset processor name from TTS.tts.dataset.preprocess.)r   defaultr   r   z--dataset_metafilez7Dataset metafile inclusing file paths with transcripts.z--data_pathz1Defines the data path. It overwrites config.json.)r   r   r   z
--use_cudaFzenable/disable cuda.z--batch_size   z?Batch size for the model. Use batch_size=1 if you have no CUDA.)r   r   r   
characterszTTS.tts.datasets.formatters	add_blank)	compute_linear_specap	meta_datar   r   use_phonemesphoneme_cache_pathphoneme_languageenable_eos_bossort_by_audio_len)r      )
batch_sizenum_workers
collate_fnshuffle	drop_last               
alignmentsnearest)sizescale_factormodealign_cornersrecompute_scale_factorz	_attn.npyzmetadata_attn_mask.txtwzutf-8)encoding|
z >> Metafile created:  )margparse	importlibosr   numpynptorchtorch.utils.datar   r   
TTS.configr   TTS.tts.datasets.TTSDatasetr   TTS.tts.modelsr   TTS.tts.utils.text.charactersr   r	   r
   TTS.utils.audior   TTS.utils.ior   __name__ArgumentParserparseradd_argumentstrboolint
parse_argsargsconfig_pathCaudior   keysr   r   len	num_charsmodel
model_pathuse_cuda_import_modulepreprocessorgetattrdataset	data_pathdataset_metafiler   decoderrtext_cleanerr   r   enable_eos_bos_charssort_and_filter_itemsgetr!   r#   loader
file_pathsno_graddata
text_inputtext_lengthslinear_input	mel_inputmel_lengthsstop_targets	item_idxscudaforwardmodel_outputsdetachr+   	enumerateidx	alignmentitem_idxnn
functionalinterpolate	transpose	unsqueezesqueezecpupathbasenamewav_file_namesplitextalign_file_namereplace	file_pathabspathwav_file_abs_pathfile_abs_pathappendsavejoinmetafileopenfpwriteprintr6   r6   r6   R/home/kuhnn/.local/lib/python3.10/site-packages/TTS/bin/compute_attention_masks.py<module>   s    




$"$