o
    ´‹
jê  ã                   @   s@   d dl mZmZ d dlmZ d dlmZ eG dd„ deƒƒZdS )é    )Ú	dataclassÚfield)ÚList)ÚBaseTTSConfigc                   @   s  e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed	< d
Zeed< d
Zeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< d Zeed!< d"Zeed#< d$Zeed%< dZeed&< d'Zeed(< dZeed)< d*Z eed+< e!d,d-„ d.Z"e#e ed/< e!d0d-„ d.Z$e%ed1< d2Z&eed3< d4Z'eed5< e!d6d-„ d.Z(e%ed7< d8Z)eed9< d2Z*eed:< dZ+eed;< d<Z,eed=< dZ-eed>< dZ.eed?< e!d@d-„ d.Z/e#e edA< dZ0eedB< d
Z1eedC< d
Z2eedD< dEdF„ Z3dS )GÚNeuralhmmTTSConfigu|  
    Define parameters for Neural HMM TTS model.

    Example:

        >>> from TTS.tts.configs.overflow_config import OverflowConfig
        >>> config = OverflowConfig()

    Args:
        model (str):
            Model name used to select the right model class to initilize. Defaults to `Overflow`.
        run_eval_steps (int):
            Run evalulation epoch after N steps. If None, waits until training epoch is completed. Defaults to None.
        save_step (int):
            Save local checkpoint every save_step steps. Defaults to 500.
        plot_step (int):
            Plot training stats on the logger every plot_step steps. Defaults to 1.
        model_param_stats (bool):
            Log model parameters stats on the logger dashboard. Defaults to False.
        force_generate_statistics (bool):
            Force generate mel normalization statistics. Defaults to False.
        mel_statistics_parameter_path (str):
            Path to the mel normalization statistics.If the model doesn't finds a file there it will generate statistics.
            Defaults to None.
        num_chars (int):
            Number of characters used by the model. It must be defined before initializing the model. Defaults to None.
        state_per_phone (int):
            Generates N states per phone. Similar, to `add_blank` parameter in GlowTTS but in Overflow it is upsampled by model's encoder. Defaults to 2.
        encoder_in_out_features (int):
            Channels of encoder input and character embedding tensors. Defaults to 512.
        encoder_n_convolutions (int):
            Number of convolution layers in the encoder. Defaults to 3.
        out_channels (int):
            Channels of the final model output. It must match the spectragram size. Defaults to 80.
        ar_order (int):
            Autoregressive order of the model. Defaults to 1. In ablations of Neural HMM it was found that more autoregression while giving more variation hurts naturalness of the synthesised audio.
        sampling_temp (float):
            Variation added to the sample from the latent space of neural HMM. Defaults to 0.334.
        deterministic_transition (bool):
            deterministic duration generation based on duration quantiles as defiend in "S. Ronanki, O. Watts, S. King, and G. E. Henter, â€œMedianbased generation of synthetic speech durations using a nonparametric approach,â€ in Proc. SLT, 2016.". Defaults to True.
        duration_threshold (float):
            Threshold for duration quantiles. Defaults to 0.55. Tune this to change the speaking rate of the synthesis, where lower values defines a slower speaking rate and higher values defines a faster speaking rate.
        use_grad_checkpointing (bool):
            Use gradient checkpointing to save memory. In a multi-GPU setting currently pytorch does not supports gradient checkpoint inside a loop so we will have to turn it off then.Adjust depending on whatever get more batch size either by using a single GPU or multi-GPU. Defaults to True.
        max_sampling_time (int):
            Maximum sampling time while synthesising latents from neural HMM. Defaults to 1000.
        prenet_type (str):
            `original` or `bn`. `original` sets the default Prenet and `bn` uses Batch Normalization version of the
            Prenet. Defaults to `original`.
        prenet_dim (int):
            Dimension of the Prenet. Defaults to 256.
        prenet_n_layers (int):
            Number of layers in the Prenet. Defaults to 2.
        prenet_dropout (float):
            Dropout rate of the Prenet. Defaults to 0.5.
        prenet_dropout_at_inference (bool):
            Use dropout at inference time. Defaults to False.
        memory_rnn_dim (int):
            Dimension of the memory LSTM to process the prenet output. Defaults to 1024.
        outputnet_size (list[int]):
            Size of the output network inside the neural HMM. Defaults to [1024].
        flat_start_params (dict):
            Parameters for the flat start initialization of the neural HMM. Defaults to `{"mean": 0.0, "std": 1.0, "transition_p": 0.14}`.
            It will be recomputed when you pass the dataset.
        std_floor (float):
            Floor value for the standard deviation of the neural HMM. Prevents model cheating by putting point mass and getting infinite likelihood at any datapoint. Defaults to 0.01.
            It is called `variance flooring` in standard HMM literature.
        optimizer (str):
            Optimizer to use for training. Defaults to `adam`.
        optimizer_params (dict):
            Parameters for the optimizer. Defaults to `{"weight_decay": 1e-6}`.
        grad_clip (float):
            Gradient clipping threshold. Defaults to 40_000.
        lr (float):
            Learning rate. Defaults to 1e-3.
        lr_scheduler (str):
            Learning rate scheduler for the training. Use one from `torch.optim.Scheduler` schedulers or
            `TTS.utils.training`. Defaults to `None`.
        min_seq_len (int):
            Minimum input sequence length to be used at training.
        max_seq_len (int):
            Maximum input sequence length to be used at training. Larger values result in more VRAM usage.
    ÚNeuralHMM_TTSÚmodeléd   Úrun_eval_stepsiô  Ú	save_stepé   Ú	plot_stepFÚmodel_param_statsÚforce_generate_statisticsNÚmel_statistics_parameter_pathÚ	num_charsé   Ústate_per_phonei   Úencoder_in_out_featuresé   Úencoder_n_convolutionséP   Úout_channelsÚar_orderr   Úsampling_tempTÚdeterministic_transitiong…ëQ¸…Û?Úduration_thresholdÚuse_grad_checkpointingiè  Úmax_sampling_timeÚoriginalÚprenet_typeé   Ú
prenet_dimÚprenet_n_layersg      à?Úprenet_dropoutÚprenet_dropout_at_inferenceé   Úmemory_rnn_dimc                   C   ó   dgS )Nr&   © r)   r)   r)   úW/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/configs/neuralhmm_tts_config.pyÚ<lambda>   s    zNeuralhmmTTSConfig.<lambda>)Údefault_factoryÚoutputnet_sizec                   C   s   ddddœS )Ng        g      ð?gìQ¸…ëÁ?)ÚmeanÚstdÚtransition_pr)   r)   r)   r)   r*   r+   ‚   s    Úflat_start_paramsgü©ñÒMbP?Ú	std_floorÚAdamÚ	optimizerc                   C   s   ddiS )NÚweight_decaygíµ ÷Æ°>r)   r)   r)   r)   r*   r+   ‡   s    Úoptimizer_paramsg     ˆã@Ú	grad_clipÚlrÚlr_scheduleré
   Úmin_text_lenÚmax_text_lenÚmin_audio_lenc                   C   r(   )NzBe a voice, not an echo.r)   r)   r)   r)   r*   r+   “   s   ÿÚtest_sentencesÚrÚuse_d_vector_fileÚuse_speaker_embeddingc                 C   sd   | j dks	J dƒ‚t| jƒdksJ d| j› ƒ‚d| jd   k r%dk s0n J d| jd › ƒ‚dS )zÈValidate the hyperparameters.

        Raises:
            AssertionError: when the parameters network is not defined
            AssertionError: transition probability is not between 0 and 1
        r   z>AR order must be greater than 0 it is an autoregressive model.r   zeParameter Network must have atleast one layer check the config file for parameter network. Provided: r0   z:Transition probability must be between 0 and 1. Provided: N)r   Úlenr-   Úparameternetworkr1   )Úselfr)   r)   r*   Úcheck_values   s   
ÿÿzNeuralhmmTTSConfig.check_values)4Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   ÚstrÚ__annotations__r
   Úintr   r   r   Úboolr   r   r   r   r   r   r   r   r   Úfloatr   r   r   r   r    r"   r#   r$   r%   r'   r   r-   r   r1   Údictr2   r4   r6   r7   r8   r9   r;   r<   r=   r>   r?   r@   rA   rE   r)   r)   r)   r*   r      sX   
 Tÿr   N)Údataclassesr   r   Útypingr   ÚTTS.tts.configs.shared_configsr   r   r)   r)   r)   r*   Ú<module>   s
    