o
    
j!                     @   sL   d dl mZmZ d dlmZmZ eG dd deZeG dd deZdS )    )	dataclassfield)BaseAudioConfigBaseTrainingConfigc                   @   s   e Zd ZU dZeedZeed< dZe	ed< dZ
eed< dZeed	< d
Zeed< dZeed< dZeed< dZeed< dZe	ed< dZeed< dZeed< dZeed< edd dZeed< d
S )BaseVocoderConfiga"  Shared parameters among all the vocoder models.
    Args:
        audio (BaseAudioConfig):
            Audio processor config instance. Defaultsto `BaseAudioConfig()`.
        use_noise_augment (bool):
            Augment the input audio with random noise. Defaults to False/
        eval_split_size (int):
            Number of instances used for evaluation. Defaults to 10.
        data_path (str):
            Root path of the training data. All the audio files found recursively from this root path are used for
            training. Defaults to `""`.
        feature_path (str):
            Root path to the precomputed feature files. Defaults to None.
        seq_len (int):
            Length of the waveform segments used for training. Defaults to 1000.
        pad_short (int):
            Extra padding for the waveforms shorter than `seq_len`. Defaults to 0.
        conv_path (int):
            Extra padding for the feature frames against convolution of the edge frames. Defaults to MISSING.
            Defaults to 0.
        use_cache (bool):
            enable / disable in memory caching of the computed features. If the RAM is not enough, if may cause OOM.
            Defaults to False.
        epochs (int):
            Number of training epochs to. Defaults to 10000.
        wd (float):
            Weight decay.
         optimizer (torch.optim.Optimizer):
            Optimizer used for the training. Defaults to `AdamW`.
        optimizer_params (dict):
            Optimizer kwargs. Defaults to `{"betas": [0.8, 0.99], "weight_decay": 0.0}`
    default_factoryaudioFuse_noise_augment
   eval_split_size 	data_pathNfeature_pathi  seq_lenr   	pad_shortconv_pad	use_cachei'  epochs        wdAdamW	optimizerc                   C   s   ddgddS )Ng?gGz?r   )betasweight_decay r   r   r   U/home/kuhnn/.local/lib/python3.10/site-packages/TTS/vocoder/configs/shared_configs.py<lambda>8   s    zBaseVocoderConfig.<lambda>optimizer_params)__name__
__module____qualname____doc__r   r   r	   __annotations__r
   boolr   intr   strr   r   r   r   r   r   r   floatr   r   dictr   r   r   r   r      s   
 !r   c                   @   s  e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed< dZeed< dZeed	< dZeed
< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< edd dZeed< edd dZeed< dZeed< edd dZeed< dZeed < dZeed!< d"Zeed#< ed$d dZeed%< d"Zeed&< ed'd dZ eed(< dZ!eed)< d*Z"eed+< dZ#d*Z$eed,< d-S ).BaseGANVocoderConfiga  Base config class used among all the GAN based vocoders.
    Args:
        use_stft_loss (bool):
            enable / disable the use of STFT loss. Defaults to True.
        use_subband_stft_loss (bool):
            enable / disable the use of Subband STFT loss. Defaults to True.
        use_mse_gan_loss (bool):
            enable / disable the use of Mean Squared Error based GAN loss. Defaults to True.
        use_hinge_gan_loss (bool):
            enable / disable the use of Hinge GAN loss. Defaults to True.
        use_feat_match_loss (bool):
            enable / disable feature matching loss. Defaults to True.
        use_l1_spec_loss (bool):
            enable / disable L1 spectrogram loss. Defaults to True.
        stft_loss_weight (float):
            Loss weight that multiplies the computed loss value. Defaults to 0.
        subband_stft_loss_weight (float):
            Loss weight that multiplies the computed loss value. Defaults to 0.
        mse_G_loss_weight (float):
            Loss weight that multiplies the computed loss value. Defaults to 1.
        hinge_G_loss_weight (float):
            Loss weight that multiplies the computed loss value. Defaults to 0.
        feat_match_loss_weight (float):
            Loss weight that multiplies the computed loss value. Defaults to 100.
        l1_spec_loss_weight (float):
            Loss weight that multiplies the computed loss value. Defaults to 45.
        stft_loss_params (dict):
            Parameters for the STFT loss. Defaults to `{"n_ffts": [1024, 2048, 512], "hop_lengths": [120, 240, 50], "win_lengths": [600, 1200, 240]}`.
        l1_spec_loss_params (dict):
            Parameters for the L1 spectrogram loss. Defaults to
            `{
                "use_mel": True,
                "sample_rate": 22050,
                "n_fft": 1024,
                "hop_length": 256,
                "win_length": 1024,
                "n_mels": 80,
                "mel_fmin": 0.0,
                "mel_fmax": None,
            }`
        target_loss (str):
            Target loss name that defines the quality of the model. Defaults to `G_avg_loss`.
        grad_clip (list):
            A list of gradient clipping theresholds for each optimizer. Any value less than 0 disables clipping.
            Defaults to [5, 5].
        lr_gen (float):
            Generator model initial learning rate. Defaults to 0.0002.
        lr_disc (float):
            Discriminator model initial learning rate. Defaults to 0.0002.
        lr_scheduler_gen (torch.optim.Scheduler):
            Learning rate scheduler for the generator. Defaults to `ExponentialLR`.
        lr_scheduler_gen_params (dict):
            Parameters for the generator learning rate scheduler. Defaults to `{"gamma": 0.999, "last_epoch": -1}`.
        lr_scheduler_disc (torch.optim.Scheduler):
            Learning rate scheduler for the discriminator. Defaults to `ExponentialLR`.
        lr_scheduler_disc_params (dict):
            Parameters for the discriminator learning rate scheduler. Defaults to `{"gamma": 0.999, "last_epoch": -1}`.
        scheduler_after_epoch (bool):
            Whether to update the learning rate schedulers after each epoch. Defaults to True.
        use_pqmf (bool):
            enable / disable PQMF for subband approximation at training. Defaults to False.
        steps_to_start_discriminator (int):
            Number of steps required to start training the discriminator. Defaults to 0.
        diff_samples_for_G_and_D (bool):
            enable / disable use of different training samples for the generator and the discriminator iterations.
            Enabling it results in slower iterations but faster convergance in some cases. Defaults to False.
    ganmodelTuse_stft_lossuse_subband_stft_lossuse_mse_gan_lossuse_hinge_gan_lossuse_feat_match_lossuse_l1_spec_lossr   stft_loss_weightsubband_stft_loss_weight   mse_G_loss_weighthinge_G_loss_weightd   feat_match_loss_weight-   l1_spec_loss_weightc                   C   s   g dg dg ddS )N)   i   i   )x      2   )iX  i  r=   )n_fftshop_lengthswin_lengthsr   r   r   r   r   r      s   zBaseGANVocoderConfig.<lambda>r   stft_loss_paramsc                	   C   s   dddddddd dS )NTi"V  r;      P   r   )use_melsample_raten_fft
hop_length
win_lengthn_melsmel_fminmel_fmaxr   r   r   r   r   r      s   l1_spec_loss_paramsloss_0target_lossc                   C   s   ddgS )N   r   r   r   r   r   r      s    	grad_clipg-C6*?lr_genlr_discExponentialLRlr_scheduler_genc                   C   
   dddS Ng+?)gamma
last_epochr   r   r   r   r   r         
 lr_scheduler_gen_paramslr_scheduler_discc                   C   rV   rW   r   r   r   r   r   r      r[   lr_scheduler_disc_paramsscheduler_after_epochFuse_pqmfdiff_samples_for_G_and_DN)%r   r    r!   r"   r+   r&   r#   r,   r$   r-   r.   r/   r0   r1   r2   r'   r3   r5   r6   r8   r:   r   rB   r(   rM   rO   rQ   rR   rS   rU   r\   r]   r^   r_   r`   steps_to_start_discriminatorra   r   r   r   r   r)   ;   sB   
 Dr)   N)dataclassesr   r   
TTS.configr   r   r   r)   r   r   r   r   <module>   s    4