o
    
j8                     @   s   d dl mZ d dlmZmZmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZ d dlm Z  d dl!m"Z" G dd de Z#dS )    )	signature)DictListTupleN)Coqpit)nn)
DataLoader)DistributedSampler)get_optimizerget_scheduler)AudioProcessor)load_fsspec)
GANDataset)DiscriminatorLossGeneratorLoss)setup_discriminatorsetup_generator)BaseVocoder)plot_resultsc                       s  e Zd Zd;dedef fddZdejdejfdd	Zdejdejfd
dZ	de
de
dedee
e
f fddZdedede
de
dee
e
f f
ddZde
de
ddde
dedee
ejf fddZe de
dejdedee
e
f fddZde
de
ddde
dedee
ejf fddZ		d<ded ed!ed"eddf
d#d$Zd=d%d&Zdefd'd(Zdefd)d*Zdefd+d,Zedede
fd-d.Z 	d;dede
d/d0d1ed2ed3ed4efd5d6Z!d7d8 Z"ed>dedd fd9d:Z#  Z$S )?GANNconfigapc                    s<   t  | || _t|| _t|| _d| _d| _|| _	dS )u  Wrap a generator and a discriminator network. It provides a compatible interface for the trainer.
        It also helps mixing and matching different generator and disciminator networks easily.

        To implement a new GAN models, you just need to define the generator and the discriminator networks, the rest
        is handled by the `GAN` class.

        Args:
            config (Coqpit): Model configuration.
            ap (AudioProcessor): 🐸TTS AudioProcessor instance. Defaults to None.

        Examples:
            Initializing the GAN model with HifiGAN generator and discriminator.
            >>> from TTS.vocoder.configs import HifiganConfig
            >>> config = HifiganConfig()
            >>> model = GAN(config)
        FN)
super__init__r   r   model_gr   model_d
train_discy_hat_gr   )selfr   r   	__class__ I/home/kuhnn/.local/lib/python3.10/site-packages/TTS/vocoder/models/gan.pyr      s   


zGAN.__init__xreturnc                 C      | j |S )zRun the generator's forward pass.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: output of the GAN generator network.
        )r   forwardr   r#   r!   r!   r"   r&   /   s   	zGAN.forwardc                 C   r%   )zRun the generator's inference pass.

        Args:
            x (torch.Tensor): Input tensor.
        Returns:
            torch.Tensor: output of the GAN generator network.
        )r   	inferencer'   r!   r!   r"   r(   :   s   zGAN.inferencebatch	criterionoptimizer_idxc                 C   s  i }i }|d }|d }|dvrt d|dkr| |ddddd|df }|| _d| _d| _|jd dkrN|| _| j|}|| _| j|| _d	\}	}
}| j	r| j
jr|d
 }|d }t  | |}W d   n1 svw   Y  |jd dkr| j|}n| }| }| j}tt| jjjdkr| |  |}| ||}n| | }| |}t|tr|\}	}
|du rd\}}n	|\}}n|}	|}|| |	|}d|i}|dkr\d	\}	}
}| j	rHtt| jjjdkr| | j|}n| | j}d}| j
jr+t  | |}W d   n	1 s&w   Y  t|trB|\}	}
|du r=d}n|\}}n|}	d\}
}|| | j||	|
|| j| j}d| ji}||fS )a0  Compute model outputs and the loss values. `optimizer_idx` selects the generator or the discriminator for
        network on the current pass.

        Args:
            batch (Dict): Batch of samples returned by the dataloader.
            criterion (Dict): Criterion used to compute the losses.
            optimizer_idx (int): ID of the optimizer in use on the current pass.

        Raises:
            ValueError: `optimizer_idx` is an unexpected value.

        Returns:
            Tuple[Dict, Dict]: model outputs and the computed loss values.
        inputwaveform)r      z  [!] Unexpected `optimizer_idx`.r   N   r.   )NNN
input_discwaveform_disc)NNmodel_outputs)
ValueErrorr   sizer   	y_hat_suby_sub_gshapepqmf_synthesispqmf_analysisr   r   diff_samples_for_G_and_Dtorchno_gradclonelenr   r   r&   
parametersdetach
isinstancetupleuse_feat_match_loss)r   r)   r*   r+   outputs	loss_dictr#   yy_hatscores_fake
feats_fake
feats_realx_dy_d
D_out_fake
D_out_realscores_real_r!   r!   r"   
train_stepD   s   &












zGAN.train_stepnamerD   c           
      C   sd   | j r	|d d n|d d }|d }t||||}|d d   }| d|i}	||	fS )a  Logging shared by the training and evaluation.

        Args:
            name (str): Name of the run. `train` or `eval`,
            ap (AudioProcessor): Audio processor used in training.
            batch (Dict): Batch used in the last train/eval step.
            outputs (Dict): Model outputs from the last train/eval step.

        Returns:
            Tuple[Dict, Dict]: log figures and audio samples.
        r   r2   r.   r-   z/audio)r   r   squeezer@   cpunumpy)
r   rR   r   r)   rD   rG   rF   figuressample_voiceaudiosr!   r!   r"   _log   s   zGAN._logloggerLoggerassetsstepsc                 C   8   |  d| j||\}}||| |||| jj dS )zCall `_log()` for training.evalNrY   r   eval_figureseval_audiossample_rater   r)   rD   rZ   r\   r]   rV   rX   r!   r!   r"   	train_log      zGAN.train_logc                 C   s   d| _ | |||S )z$Call `train_step()` with `no_grad()`T)r   rQ   )r   r)   r*   r+   r!   r!   r"   	eval_step   s   zGAN.eval_stepc                 C   r^   )zCall `_log()` for evaluation.r_   Nr`   rd   r!   r!   r"   eval_log   rf   zGAN.eval_logFcheckpoint_pathr_   cachec                 C   sj   t |td|d}d|v r| j||| dS | |d  |r1d| _t| jdr3| j  dS dS dS )a  Load a GAN checkpoint and initialize model parameters.

        Args:
            config (Coqpit): Model config.
            checkpoint_path (str): Checkpoint file path.
            eval (bool, optional): If true, load the model for inference. If falseDefaults to False.
        rT   )map_locationrj   
model_discmodelNremove_weight_norm)	r   r;   devicer   load_checkpointload_state_dictr   hasattrrn   )r   r   ri   r_   rj   stater!   r!   r"   rp      s   zGAN.load_checkpointc                 C   s   |j | jjk| _dS )zEnable the discriminator training based on `steps_to_start_discriminator`

        Args:
            trainer (Trainer): Trainer object.
        N)total_steps_doner   steps_to_start_discriminatorr   )r   trainerr!   r!   r"   on_train_step_start   s   zGAN.on_train_step_startc                 C   s@   t | jj| jj| jj| j}t | jj| jj| jj| j}||gS )zInitiate and return the GAN optimizers based on the config parameters.

        It returnes 2 optimizers in a list. First one is for the generator and the second one is for the discriminator.

        Returns:
            List: optimizers.
        )r
   r   	optimizeroptimizer_paramslr_genr   lr_discr   )r   
optimizer1
optimizer2r!   r!   r"   r
     s   zGAN.get_optimizerc                 C   s   | j j| j jgS )zSet the initial learning rates for each optimizer.

        Returns:
            List: learning rates for each optimizer.
        )r   r{   rz   r   r!   r!   r"   get_lr  s   z
GAN.get_lrc                 C   s8   t | jj| jj|d }t | jj| jj|d }||gS )zSet the schedulers for each optimizer.

        Args:
            optimizer (List[`torch.optim.Optimizer`]): List of optimizers.

        Returns:
            List: Schedulers, one for each optimizer.
        r   r.   )r   r   lr_scheduler_genlr_scheduler_gen_paramslr_scheduler_disclr_scheduler_disc_params)r   rx   
scheduler1
scheduler2r!   r!   r"   r     s   	zGAN.get_schedulerc                 C   sF   t | d tr| d \}}| d \}}||||dS | \}}||dS )zFormat the batch for training.

        Args:
            batch (List): Batch out of the dataloader.

        Returns:
            Dict: formatted model inputs.
        r   r.   )r,   r-   r0   r1   )r,   r-   )rA   list)r)   x_Gy_Gx_Dy_Dr#   rF   r!   r!   r"   format_batch+  s   

zGAN.format_batchis_evalTsamplesverbosenum_gpusrankc                 C   s   t | j||j| jj|j|jd|v r|jnd| | |j|j|d}|	  |dkr/t
|ddnd}	t||r7dn|j|dkd|	|rC|jn|jdd	}
|
S )
a  Initiate and return the GAN dataloader.

        Args:
            config (Coqpit): Model config.
            ap (AudioProcessor): Audio processor.
            is_eval (True): Set the dataloader for evaluation if true.
            samples (List): Data samples.
            verbose (bool): Log information if true.
            num_gpus (int): Number of GPUs in use.
            rank (int): Rank of the current GPU. Defaults to None.

        Returns:
            DataLoader: Torch dataloader.
        r:   F)r   itemsseq_lenhop_len	pad_shortconv_padreturn_pairsis_trainingreturn_segmentsuse_noise_augment	use_cacher   r.   T)shuffleNr   )
batch_sizer   	drop_lastsamplernum_workers
pin_memory)r   r   r   
hop_lengthr   r   r:   r   r   shuffle_mappingr	   r   r   num_eval_loader_workersnum_loader_workers)r   r   r\   r   r   r   r   r   datasetr   loaderr!   r!   r"   get_data_loader<  s4   	zGAN.get_data_loaderc                 C   s   t | jt| jgS )z$Return criterions for the optimizers)r   r   r   r~   r!   r!   r"   get_criteriono  s   zGAN.get_criterionc                 C   s   t j| |d}t| |dS )N)r   )r   )r   init_from_configr   )r   r   r   r!   r!   r"   r   s  s   zGAN.init_from_config)N)FF)r$   N)T)%__name__
__module____qualname__r   r   r   r;   Tensorr&   r(   r   intr   rQ   strrY   npndarrayre   r<   r   Modulerg   rh   boolrp   rw   r   r
   r   r   staticmethodr   r   r   r   __classcell__r!   r!   r   r"   r      s    "
&x
&



3r   )$inspectr   typingr   r   r   rU   r   r;   coqpitr   r   torch.utils.datar   torch.utils.data.distributedr	   trainer.trainer_utilsr
   r   TTS.utils.audior   TTS.utils.ior    TTS.vocoder.datasets.gan_datasetr   TTS.vocoder.layers.lossesr   r   TTS.vocoder.modelsr   r   TTS.vocoder.models.base_vocoderr   TTS.vocoder.utils.generic_utilsr   r   r!   r!   r!   r"   <module>   s"    