o
    
jr                     @   s`   d dl mZmZmZ d dlmZmZ d dlmZm	Z	 d dl
mZmZmZ eG dd deZdS )    )asdict	dataclassfield)DictList)Coqpitcheck_argument)BaseAudioConfigBaseDatasetConfigBaseTrainingConfigc                   @   s  e Zd ZU dZeedZeed< dZe	ed< dZ
eed< dZe	ed	< ed
Ze	ed< dZe	ed< ed
Ze	ed< dZeed< dZeed< dZeed< dZe	ed< dZeed< dZeed< dZeed< dZeed< edd dZee ed< dZeed< dZeed< dZ eed< edd dZ!eed< ed d dZ"ee ed!< dZ#e	ed"< d#Z$eed$< dZ%eed%< d&Z&eed'< dZ'eed(< d&Z(eed)< dZ)eed*< d&Z*eed+< dS ),BaseVCConfiga6  Shared parameters among all the tts models.

    Args:

        audio (BaseAudioConfig):
            Audio processor config object instance.

        batch_group_size (int):
            Size of the batch groups used for bucketing. By default, the dataloader orders samples by the sequence
            length for a more efficient and stable training. If `batch_group_size > 1` then it performs bucketing to
            prevent using the same batches for each epoch.

        loss_masking (bool):
            enable / disable masking loss values against padded segments of samples in a batch.

        min_text_len (int):
            Minimum length of input text to be used. All shorter samples will be ignored. Defaults to 0.

        max_text_len (int):
            Maximum length of input text to be used. All longer samples will be ignored. Defaults to float("inf").

        min_audio_len (int):
            Minimum length of input audio to be used. All shorter samples will be ignored. Defaults to 0.

        max_audio_len (int):
            Maximum length of input audio to be used. All longer samples will be ignored. The maximum length in the
            dataset defines the VRAM used in the training. Hence, pay attention to this value if you encounter an
            OOM error in training. Defaults to float("inf").

        compute_f0 (int):
            (Not in use yet).

        compute_energy (int):
            (Not in use yet).

        compute_linear_spec (bool):
            If True data loader computes and returns linear spectrograms alongside the other data.

        precompute_num_workers (int):
            Number of workers to precompute features. Defaults to 0.

        use_noise_augment (bool):
            Augment the input audio with random noise.

        start_by_longest (bool):
            If True, the data loader will start loading the longest batch first. It is useful for checking OOM issues.
            Defaults to False.

        shuffle (bool):
            If True, the data loader will shuffle the dataset when there is not sampler defined. Defaults to True.

        drop_last (bool):
            If True, the data loader will drop the last batch if it is not complete. It helps to prevent
            issues that emerge from the partial batch statistics. Defaults to True.

        add_blank (bool):
            Add blank characters between each other two characters. It improves performance for some models at expense
            of slower run-time due to the longer input sequence.

        datasets (List[BaseDatasetConfig]):
            List of datasets used for training. If multiple datasets are provided, they are merged and used together
            for training.

        optimizer (str):
            Optimizer used for the training. Set one from `torch.optim.Optimizer` or `TTS.utils.training`.
            Defaults to ``.

        optimizer_params (dict):
            Optimizer kwargs. Defaults to `{"betas": [0.8, 0.99], "weight_decay": 0.0}`

        lr_scheduler (str):
            Learning rate scheduler for the training. Use one from `torch.optim.Scheduler` schedulers or
            `TTS.utils.training`. Defaults to ``.

        lr_scheduler_params (dict):
            Parameters for the generator learning rate scheduler. Defaults to `{"warmup": 4000}`.

        test_sentences (List[str]):
            List of sentences to be used at testing. Defaults to '[]'

        eval_split_max_size (int):
            Number maximum of samples to be used for evaluation in proportion split. Defaults to None (Disabled).

        eval_split_size (float):
            If between 0.0 and 1.0 represents the proportion of the dataset to include in the evaluation set.
            If > 1, represents the absolute number of evaluation samples. Defaults to 0.01 (1%).

        use_speaker_weighted_sampler (bool):
            Enable / Disable the batch balancer by speaker. Defaults to ```False```.

        speaker_weighted_sampler_alpha (float):
            Number that control the influence of the speaker sampler weights. Defaults to ```1.0```.

        use_language_weighted_sampler (bool):
            Enable / Disable the batch balancer by language. Defaults to ```False```.

        language_weighted_sampler_alpha (float):
            Number that control the influence of the language sampler weights. Defaults to ```1.0```.

        use_length_weighted_sampler (bool):
            Enable / Disable the batch balancer by audio length. If enabled the dataset will be divided
            into 10 buckets considering the min and max audio of the dataset. The sampler weights will be
            computed forcing to have the same quantity of data for each bucket in each training batch. Defaults to ```False```.

        length_weighted_sampler_alpha (float):
            Number that control the influence of the length sampler weights. Defaults to ```1.0```.
    )default_factoryaudior   batch_group_sizeNloss_masking   min_audio_leninfmax_audio_lenmin_text_lenmax_text_lenF
compute_f0compute_energycompute_linear_specprecompute_num_workersuse_noise_augmentstart_by_longestshuffle	drop_lastc                   C   s   t  gS N)r
    r    r    P/home/kuhnn/.local/lib/python3.10/site-packages/TTS/vc/configs/shared_configs.py<lambda>   s    zBaseVCConfig.<lambda>datasetsradam	optimizeroptimizer_paramslr_schedulerc                   C   s   i S r   r    r    r    r    r!   r"          lr_scheduler_paramsc                   C   s   g S r   r    r    r    r    r!   r"      r(   test_sentenceseval_split_max_sizeg{Gz?eval_split_sizeuse_speaker_weighted_samplerg      ?speaker_weighted_sampler_alphause_language_weighted_samplerlanguage_weighted_sampler_alphause_length_weighted_samplerlength_weighted_sampler_alpha)+__name__
__module____qualname____doc__r   r	   r   __annotations__r   intr   boolr   floatr   r   r   r   r   r   r   r   r   r   r   r#   r   r
   r%   strr&   dictr'   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r    r    r    r!   r   	   s>   
 lr   N)dataclassesr   r   r   typingr   r   coqpitr   r   
TTS.configr	   r
   r   r   r    r    r    r!   <module>   s    