o
    
jQ                     @   s@  d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
mZ zddlmZmZ eedZW n ey@   dZY nw g dZdd	 Zd
d Zdd Zdd Zdd Zd'ddZG dd dejZG dd dejZG dd dejZG dd dZG dd deZdd  ZG d!d" d"ZG d#d$ d$Z d%d& Z!dS )(a  
This is an almost carbon copy of gaussian_diffusion.py from OpenAI's ImprovedDiffusion repo, which itself:

This code started out as a PyTorch port of Ho et al's diffusion models:
https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py

Docstrings have been added, as well as DDIM sampling and a new collection of beta schedules.
    Ntqdm)
DPM_SolverNoiseScheduleVPmodel_wrapper)sample_dpmpp_2msample_euler_ancestral)	k_euler_adpm++2m)r
   pddimc                    s   d | |||fD ]}t |tjr|  nq dusJ d fdd||fD \}}dd| | t||  | | d t|    S )z
    Compute the KL divergence between two gaussians.

    Shapes are automatically broadcasted, so batches can be compared to
    scalars, among other use cases.
    Nz&at least one argument must be a Tensorc                    s,   g | ]}t |tjr|nt| qS  )
isinstancethTensortensorto.0xr   r   T/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/tortoise/diffusion.py
<listcomp>/   s   , znormal_kl.<locals>.<listcomp>      ?g         )r   r   r   exp)mean1logvar1mean2logvar2objr   r   r   	normal_kl   s   6r!   c                 C   s2   ddt tdtj | dt | d     S )zb
    A fast approximation of the cumulative distribution function of the
    standard normal.
    r         ?       @gHm?   )r   tanhnpsqrtpipowr   r   r   r   approx_standard_normal_cdf4   s   2r+   c                C   s   | j |j   kr|j ksJ  J | | }t| }||d  }t|}||d  }t|}t|jdd}	td| jdd}
|| }t| dk |	t| dk|
t|jdd}|j | j ksfJ |S )a{  
    Compute the log-likelihood of a Gaussian distribution discretizing to a
    given image.

    :param x: the target images. It is assumed that this was uint8 values,
              rescaled to the range [-1, 1].
    :param means: the Gaussian mean Tensor.
    :param log_scales: the Gaussian log stddev Tensor.
    :return: a tensor like x of log probabilities (in nats).
    gp?g-q=)minr"   g++?)shaper   r   r+   logclampwhere)r   means
log_scales
centered_xinv_stdvplus_incdf_plusmin_incdf_minlog_cdf_pluslog_one_minus_cdf_min	cdf_delta	log_probsr   r   r   #discretized_gaussian_log_likelihood<   s"   "r>   c                 C   s   | j ttdt| jdS )z6
    Take the mean over all non-batch dimensions.
       dim)meanlistrangelenr.   r   r   r   r   	mean_flatZ   s   rF   c                 C   sX   | dkrd| }|d }|d }t j|||t jdS | dkr%t|dd S td	|  )
a@  
    Get a pre-defined beta schedule for the given name.

    The beta schedule library consists of beta schedules which remain similar
    in the limit of num_diffusion_timesteps.
    Beta schedules may be added, but should not be removed or changed once
    they are committed to maintain backwards compatibility.
    linear  g-C6?g{Gz?dtypecosinec                 S   s    t | d d t j d d S )NgMb?gT㥛 ?r   )mathcosr(   )tr   r   r   <lambda>t   s     z)get_named_beta_schedule.<locals>.<lambda>zunknown beta schedule: )r&   linspacefloat64betas_for_alpha_barNotImplementedError)schedule_namenum_diffusion_timestepsscale
beta_startbeta_endr   r   r   get_named_beta_schedulea   s   	rY   r-   c                 C   sP   g }t | D ]}||  }|d |  }|td||||  | qt|S )a$  
    Create a beta schedule that discretizes the given alpha_t_bar function,
    which defines the cumulative product of (1-beta) over time from t = [0,1].

    :param num_diffusion_timesteps: the number of betas to produce.
    :param alpha_bar: a lambda that takes an argument t from 0 to 1 and
                      produces the cumulative product of (1-beta) up to that
                      part of the diffusion process.
    :param max_beta: the maximum beta to use; use values lower than 1 to
                     prevent singularities.
    r?   )rD   appendr,   r&   array)rU   	alpha_barmax_betabetasit1t2r   r   r   rR   z   s   "
rR   c                   @   s   e Zd ZdZdZdZdZdS )ModelMeanTypez2
    Which type of output the model predicts.
    
previous_xstart_xepsilonN)__name__
__module____qualname____doc__
PREVIOUS_XSTART_XEPSILONr   r   r   r   rb      s
    rb   c                   @   s    e Zd ZdZdZdZdZdZdS )ModelVarTypez
    What is used as the model's output variance.

    The LEARNED_RANGE option has been added to allow the model to predict
    values between FIXED_SMALL and FIXED_LARGE, making its job easier.
    learnedfixed_smallfixed_largelearned_rangeN)rf   rg   rh   ri   LEARNEDFIXED_SMALLFIXED_LARGELEARNED_RANGEr   r   r   r   rm      s    rm   c                   @   s$   e Zd ZdZdZdZdZdd ZdS )LossTypemserescaled_mseklrescaled_klc                 C   s   | t jkp	| t jkS N)rv   KLRESCALED_KL)selfr   r   r   is_vb   s   zLossType.is_vbN)rf   rg   rh   MSERESCALED_MSEr|   r}   r   r   r   r   r   rv      s    rv   c                   @   sl  e Zd ZdZddddddddZd	d
 Zd;ddZdd Zd<ddZdd Z	dd Z
dd Zdd Zd;ddZd;ddZ							d=ddZd d! Z				d>d"d#Z							d=d$d%Z							d=d&d'Z					(d?d)d*Z				(d@d+d,Z								(dAd-d.Z								(dAd/d0ZdBd1d2ZdCd3d4Z	dCd5d6Zd7d8 ZdBd9d:ZdS )DGaussianDiffusionaO  
    Utilities for training and sampling diffusion models.

    Ported directly from here, and then adapted over time to further experimentation.
    https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py#L42

    :param betas: a 1-D numpy array of betas for each diffusion timestep,
                  starting at T and going to 1.
    :param model_mean_type: a ModelMeanType determining what the model outputs.
    :param model_var_type: a ModelVarType determining how variance is output.
    :param loss_type: a LossType determining the loss function to use.
    :param rescale_timesteps: if True, pass floating point timesteps into the
                              model so that they are always scaled like in the
                              original paper (0 to 1000).
    Fr?   Tr   )rescale_timestepsconditioning_freeconditioning_free_kramp_conditioning_freesamplerc       	         C   s  |	| _ t|| _t|| _t|| _|| _|| _|| _	|| _
tj|tjd}|| _t|jdks4J d|dk r@|dk sBJ t|jd | _d| }
tj|
dd| _td| jd d | _t| jdd  d| _| jj| jfksxJ t| j| _td| j | _td| j | _td| j | _td| j d | _|d| j  d| j  | _ tt| j d | j dd  | _!|t| j d| j  | _"d| j t|
 d| j  | _#d S )	NrI   r?   zbetas must be 1-Dr   r"   )axis        )$r   rb   model_mean_typerm   model_var_typerv   	loss_typer   r   r   r   r&   r[   rQ   r^   rE   r.   allintnum_timestepscumprodalphas_cumprodrZ   alphas_cumprod_prevalphas_cumprod_nextr'   sqrt_alphas_cumprodsqrt_one_minus_alphas_cumprodr/   log_one_minus_alphas_cumprodsqrt_recip_alphas_cumprodsqrt_recipm1_alphas_cumprodposterior_varianceposterior_log_variance_clippedposterior_mean_coef1posterior_mean_coef2)r~   r^   r   r   r   r   r   r   r   r   alphasr   r   r   __init__   s6   


$$zGaussianDiffusion.__init__c                 C   sB   t | j||j| }t d| j ||j}t | j||j}|||fS )a  
        Get the distribution q(x_t | x_0).

        :param x_start: the [N x C x ...] tensor of noiseless inputs.
        :param t: the number of diffusion steps (minus 1). Here, 0 means one step.
        :return: A tuple (mean, variance, log_variance), all of x_start's shape.
        r"   )_extract_into_tensorr   r.   r   r   )r~   x_startrN   rB   variancelog_variancer   r   r   q_mean_variance   s   
z!GaussianDiffusion.q_mean_varianceNc                 C   sJ   |du r	t |}|j|jksJ t| j||j| t| j||j|  S )am  
        Diffuse the data for a given number of diffusion steps.

        In other words, sample from q(x_t | x_0).

        :param x_start: the initial data batch.
        :param t: the number of diffusion steps (minus 1). Here, 0 means one step.
        :param noise: if specified, the split-out normal noise.
        :return: A noisy version of x_start.
        N)r   
randn_liker.   r   r   r   )r~   r   rN   noiser   r   r   q_sample  s   
zGaussianDiffusion.q_samplec                 C   s   |j |j ksJ t| j||j | t| j||j |  }t| j||j }t| j||j }|j d |j d   krH|j d   krH|j d ksKJ  J |||fS )zo
        Compute the mean and variance of the diffusion posterior:

            q(x_{t-1} | x_t, x_0)

        r   )r.   r   r   r   r   r   )r~   r   x_trN   posterior_meanr   r   r   r   r   q_posterior_mean_variance  s   
z+GaussianDiffusion.q_posterior_mean_variancec              
      s  |du ri }|j dd \}}|j |fksJ ||| |fi |}	| jr4||| |fddi|}
| jtjtjfv r|	j ||d g|j dd R ksPJ tj|	|dd\}	}| jrgtj|
|dd\}
}| jtjkru|}t	|}net
| j||j }t
t| j||j }|d d }|| d| |  }t	|}n<tjt| jd | jdd tt| jd | jdd ftj| j| jfi| j \}}t
|||j }t
|||j }| jr	| jr|j d dksJ | jd| |d  | j   }n| j}d| |	 ||
  }	 fdd	}| jtjkr$|| j|||	d
}|	}n1| jtjtjfv rP| jtjkr:||	}n
|| j|||	d}| j|||d\}}}nt| j|j |j   krl|j   krl|j ksoJ  J ||||dS )a  
        Apply the model to get p(x_{t-1} | x_t), as well as a prediction of
        the initial x, x_0.

        :param model: the model, which takes a signal and a batch of timesteps
                      as input.
        :param x: the [N x C x ...] tensor at time t.
        :param t: a 1-D Tensor of timesteps.
        :param clip_denoised: if True, clip the denoised signal into [-1, 1].
        :param denoised_fn: if not None, a function which applies to the
            x_start prediction before it is used to sample. Applies before
            clip_denoised.
        :param model_kwargs: if not None, a dict of extra keyword arguments to
            pass to the model. This can be used for conditioning.
        :return: a dict with the following keys:
                 - 'mean': the model mean output.
                 - 'variance': the model variance output.
                 - 'log_variance': the log of 'variance'.
                 - 'pred_xstart': the prediction for x_0.
        Nr   r   Tr?   r@   r   c                    s$   d ur| }  r|  ddS | S )Nr   r?   )r0   r*   clip_denoiseddenoised_fnr   r   process_xstarto  s
   z9GaussianDiffusion.p_mean_variance.<locals>.process_xstart)r   rN   xprev)r   rN   epsr   r   rN   )rB   r   r   pred_xstart) r.   _scale_timestepsr   r   rm   rr   ru   r   splitr   r   r   r&   r/   r^   rt   rZ   r   rs   r   r   itemr   r   rb   rj   _predict_xstart_from_xprevrk   rl   _predict_xstart_from_epsr   rS   )r~   modelr   rN   r   r   model_kwargsBCmodel_outputmodel_output_no_conditioningmodel_var_values_model_log_variancemodel_variancemin_logmax_logfraccfkr   r   
model_meanr   r   r   p_mean_variance*  sn   & $

4z!GaussianDiffusion.p_mean_variancec                 C   s8   |j |j ksJ t| j||j | t| j||j |  S r{   )r.   r   r   r   )r~   r   rN   r   r   r   r   r     s
   z*GaussianDiffusion._predict_xstart_from_epsc                 C   sB   |j |j ksJ td| j ||j | t| j| j ||j |  S )Nr"   )r.   r   r   r   )r~   r   rN   r   r   r   r   r     s
   z,GaussianDiffusion._predict_xstart_from_xprevc                 C   s(   t | j||j| | t | j||j S r{   )r   r   r.   r   )r~   r   rN   r   r   r   r   _predict_eps_from_xstart  s   z*GaussianDiffusion._predict_eps_from_xstartc                 C   s   | j r| d| j  S |S )N     @@)r   floatr   r~   rN   r   r   r   r     s   z"GaussianDiffusion._scale_timestepsc                 C   s8   |||  |fi |}|d  |d |   }|S )a[  
        Compute the mean for the previous step, given a function cond_fn that
        computes the gradient of a conditional log probability with respect to
        x. In particular, cond_fn computes grad(log(p(y|x))), and we want to
        condition on y.

        This uses the conditioning strategy from Sohl-Dickstein et al. (2015).
        rB   r   )r   r   )r~   cond_fn
p_mean_varr   rN   r   gradientnew_meanr   r   r   condition_mean  s   	z GaussianDiffusion.condition_meanc           
      C   s   t | j||j}| |||d }|d|  ||| |fi |  }| }| ||||d< | j|d ||d\|d< }	}	|S )a3  
        Compute what the p_mean_variance output would have been, should the
        model's score function be conditioned by cond_fn.

        See condition_mean() for details on cond_fn.

        Unlike condition_mean(), this instead uses the conditioning strategy
        from Song et al (2020).
        r   r?   r   rB   )	r   r   r.   r   r'   r   copyr   r   )
r~   r   r   r   rN   r   r\   r   outr   r   r   r   condition_score  s   
(z!GaussianDiffusion.condition_scorec              
      s   t tsJ |	d u rt  j}	||jd g} fdd	 tdddd}fdd	}t||d
dt	
dt	
d| jd}t||dd}|j|| jdddd}|S )Nr   c                     s4    | i |}t j||jd d dd\}}||fS )Nr?   r   r@   )r   r   r.   )argskwargsr   model_epsilon	model_var)r   r   r   model_split  s   z>GaussianDiffusion.k_diffusion_sample_loop.<locals>.model_splitrG   g?g      @)schedulecontinuous_beta_0continuous_beta_1c                    sf   	 |  d\} }|d  d\}}t| |fddi d | |fi  d g}d |S )aT  
                x_in = torch.cat([x] * 2)
                t_in = torch.cat([t_continuous] * 2)
                c_in = torch.cat([unconditional_condition, condition])
                noise_uncond, noise = noise_pred_fn(x_in, t_in, cond=c_in).chunk(2)
            print(t)
            print(self.timestep_map)
            exit()
            r   rH   r   Tr   r?   )chunktorchcatupdate)r   rN   r   r   r   res)r   r   pbarr   r   model_fn_prewrap  s   

zCGaussianDiffusion.k_diffusion_sample_loop.<locals>.model_fn_prewrapr   zclassifier-freer?   )
model_typer   guidance_type	conditionunconditional_conditionguidance_scalezdpmsolver++)algorithm_typer   time_uniform	multistep)stepsorder	skip_typemethod)r   dictnext
parametersdevicenew_onesr.   r   r   r   r   r   r   sampler   )r~   	k_samplerr   r   r.   r   r   r   r   r   r   progresss_innoise_scheduler   model_fn
dpm_solverx_sampler   )r   r   r   r   r   k_diffusion_sample_loop  s6   
z)GaussianDiffusion.k_diffusion_sample_loopc                 O   s   | j }|dkr| j|i |S |dkr| j|i |S |dkrW| jdur(tdt| jd}td u r7td| j	t| |g|R i |W  d    S 1 sPw   Y  d S td)	Nr   r   r
   Tzcond_free must be true)totalz2Install k_diffusion for using k_diffusion samplerszsampler not impl)
r   p_sample_loopddim_sample_loopr   RuntimeErrorr   r   K_DIFFUSION_SAMPLERSModuleNotFoundErrorr   )r~   r   r   sr   r   r   r   sample_loop  s   
$zGaussianDiffusion.sample_loopc                 C   s   | j ||||||d}t|}	|dk jdgdgt|jd  R  }
|dur5| j|||||d|d< |d |
td|d	   |	  }||d
 dS )a  
        Sample x_{t-1} from the model at the given timestep.

        :param model: the model to sample from.
        :param x: the current tensor at x_{t-1}.
        :param t: the value of t, starting at 0 for the first diffusion step.
        :param clip_denoised: if True, clip the x_start prediction to [-1, 1].
        :param denoised_fn: if not None, a function which applies to the
            x_start prediction before it is used to sample.
        :param cond_fn: if not None, this is a gradient function that acts
                        similarly to the model.
        :param model_kwargs: if not None, a dict of extra keyword arguments to
            pass to the model. This can be used for conditioning.
        :return: a dict containing the following keys:
                 - 'sample': a random sample from the model.
                 - 'pred_xstart': a prediction of x_0.
        r   r   r   r   r   r?   Nr   rB   r   r   r   r   r   )	r   r   r   r   viewrE   r.   r   r   )r~   r   r   rN   r   r   r   r   r   r   nonzero_maskr   r   r   r   p_sample"  s   
*"zGaussianDiffusion.p_samplec
                 C   s2   d}
| j |||||||||	d	D ]}|}
q|
d S )a  
        Generate samples from the model.

        :param model: the model module.
        :param shape: the shape of the samples, (N, C, H, W).
        :param noise: if specified, the noise from the encoder to sample.
                      Should be of the same shape as `shape`.
        :param clip_denoised: if True, clip x_start predictions to [-1, 1].
        :param denoised_fn: if not None, a function which applies to the
            x_start prediction before it is used to sample.
        :param cond_fn: if not None, this is a gradient function that acts
                        similarly to the model.
        :param model_kwargs: if not None, a dict of extra keyword arguments to
            pass to the model. This can be used for conditioning.
        :param device: if specified, the device to create the samples on.
                       If not specified, use a model parameter's device.
        :param progress: if True, show a tqdm progress bar.
        :return: a non-differentiable batch of samples.
        N)r   r   r   r   r   r   r   r   )p_sample_loop_progressive)r~   r   r.   r   r   r   r   r   r   r   finalr   r   r   r   r   L  s   
zGaussianDiffusion.p_sample_loopc
                 c   s    |du rt | j}t|ttfsJ |dur|}
ntj|d|i}
tt| j	ddd }t
||	 dD ]5}tj|g|d  |d}t  | j||
|||||d}|V  |d }
W d   n1 sgw   Y  q7dS )	a  
        Generate samples from the model and yield intermediate samples from
        each timestep of diffusion.

        Arguments are the same as p_sample_loop().
        Returns a generator over dicts, where each dict is the return value of
        p_sample().
        Nr   r   disabler   r   )r   r   r   r   r   )r   r   r   r   tuplerC   r   randnrD   r   r   r   no_gradr
  )r~   r   r.   r   r   r   r   r   r   r   imgindicesr_   rN   r   r   r   r   r  z  s2   
	
z+GaussianDiffusion.p_sample_loop_progressiver   c	                 C   s  | j ||||||d}	|dur| j||	|||d}	| |||	d }
t| j||j}t| j||j}|td| d|   td||   }t	|}|	d t| td| |d  |
  }|dk
 jdgdgt|jd  R  }||| |  }||	d d	S )
z^
        Sample x_{t-1} from the model using DDIM.

        Same usage as p_sample().
        r  Nr  r   r?   r   r   r   r  )r   r   r   r   r   r.   r   r   r'   r   r   r  rE   )r~   r   r   rN   r   r   r   r   etar   r   r\   alpha_bar_prevsigmar   	mean_predr	  r   r   r   r   ddim_sample  s&   ,
,*zGaussianDiffusion.ddim_samplec                 C   s   |dksJ d| j ||||||d}t| j||j| |d  t| j||j }	t| j||j}
|d t|
 td|
 |	  }||d dS )zG
        Sample x_{t+1} from the model using DDIM reverse ODE.
        r   z'Reverse ODE only for deterministic pathr  r   r?   r  )r   r   r   r.   r   r   r   r'   )r~   r   r   rN   r   r   r   r  r   r   alpha_bar_nextr  r   r   r   ddim_reverse_sample  s   $z%GaussianDiffusion.ddim_reverse_samplec                 C   s4   d}| j |||||||||	|
d
D ]}|}q|d S )ze
        Generate samples from the model using DDIM.

        Same usage as p_sample_loop().
        N)r   r   r   r   r   r   r   r  r   )ddim_sample_loop_progressive)r~   r   r.   r   r   r   r   r   r   r   r  r  r   r   r   r   r     s   
z"GaussianDiffusion.ddim_sample_loopc                 c   s    |du rt | j}t|ttfsJ |dur|}ntj|d|i}tt| j	ddd }|	r?ddl
m} |||	 d}|D ]6}tj|g|d  |d}t  | j||||||||
d}|V  |d	 }W d   n1 srw   Y  qAdS )
z
        Use DDIM to sample from the model and yield intermediate samples from
        each timestep of DDIM.

        Same usage as p_sample_loop_progressive().
        Nr   r   r   r   r  r  )r   r   r   r   r  r   )r   r   r   r   r  rC   r   r  rD   r   	tqdm.autor   r   r  r  )r~   r   r.   r   r   r   r   r   r   r   r  r  r  r   r_   rN   r   r   r   r   r    s:   


z.GaussianDiffusion.ddim_sample_loop_progressivec                 C   s   | j |||d\}}}	| j|||||d}
t||	|
d |
d }t|td }t||
d d|
d  d }|j|jks?J t|td }t	|dk||}||
d	 d
S )ai  
        Get a term for the variational lower-bound.

        The resulting units are bits (rather than nats, as one might expect).
        This allows for comparison to other papers.

        :return: a dict with the following keys:
                 - 'output': a shape [N] tensor of NLLs or KLs.
                 - 'pred_xstart': the x_0 predictions.
        r   )r   r   rB   r   r#   r   )r2   r3   r   r   )outputr   )
r   r   r!   rF   r&   r/   r>   r.   r   r1   )r~   r   r   r   rN   r   r   	true_meanr   true_log_variance_clippedr   ry   decoder_nllr  r   r   r   _vb_terms_bpdE  s   zGaussianDiffusion._vb_terms_bpdc                 C   s  |du ri }|du rt |}| j|||d}i }| jtjks%| jtjkrE| j||||d|dd |d< | jtjkrC|d  | j9  < |S | jtj	ksR| jtj
krA||| |fi |}t|trp|d }	|dd |d	< n|}	| jtjtjfv r|jdd
 \}
}|	j|
|d
 g|jd
d R ksJ t j|	|dd\}	}t j|	 |gdd}| j|ddd|||ddd |d< | jtj
kr|d  | jd 9  < | jtjkr| j|||dd }t|}n!| jtjkr|}|	}n| jtjkr|}| |||	}nt| j|	j|j  kr|jksJ  J t ||	 d
 |d< ||d< d|v r9|d |d  |d< |S |d |d< |S t| j)\  
        Compute training losses for a single timestep.

        :param model: the model to evaluate loss on.
        :param x_start: the [N x C x ...] tensor of inputs.
        :param t: a batch of timestep indices.
        :param model_kwargs: if not None, a dict of extra keyword arguments to
            pass to the model. This can be used for conditioning.
        :param noise: if specified, the specific Gaussian noise to try to remove.
        :return: a dict with the key "loss" containing a tensor of shape [N].
                 Some mean or variance settings may also have other keys.
        Nr   F)r   r   r   rN   r   r   r  lossr   r?   extra_outputsr   r@   rc                 W      | S r{   r   r(  r   r   r   r   rO         z3GaussianDiffusion.training_losses.<locals>.<lambda>r   r   r   rN   r   vbr   r   rw   x_start_predicted)!r   r   r   r   rv   r|   r}   r"  r   r   r   r   r   r  r   rm   rr   ru   r.   r   r   detachr   rb   rj   r   r   zerosrk   rl   r   rS   rF   )r~   r   r   rN   r   r   r   termsmodel_outputsr   r   r   r   
frozen_outtargetx_start_predr   r   r   training_losses`  s   
4
&
&

z!GaussianDiffusion.training_lossesc                 C   sb  |du ri }|du rt |}| j|||d}i }	| jtjks%| jtjkr'J | jtjks4| jtjkr,|||| 	|fi |}
|	
dd t||
D  |	| }| jtjtjfv r|jdd \}}|j||dg|jdd R ksvJ |dddddf |dddddf }}t j| |gdd	}| j|d
dd|||ddd |	d< | jtjkr|	d  | jd 9  < | jtjkr| j|||dd }t|}n | jtjkr|}|}n| jtjkr|}| |||}nt| j|j|j  kr|jksJ  J t|| d |	d< ||	d< d|	v r$|	d |	d  |	d< |	S |	d |	d< |	S t| j)r#  Nr$  Fc                 S   s   i | ]\}}||qS r   r   )r   kor   r   r   
<dictcomp>  s    zDGaussianDiffusion.autoregressive_training_losses.<locals>.<dictcomp>r   r   r?   r@   r'  c                 W   r)  r{   r   r*  r   r   r   rO     r+  zBGaussianDiffusion.autoregressive_training_losses.<locals>.<lambda>r,  r  r-  r   r   rw   r.  r%  ) r   r   r   r   rv   r|   r}   r   r   r   r   zipr   rm   rr   ru   r.   r   r/  r"  r   r   rb   rj   r   r   r0  rk   rl   r   rS   rF   )r~   r   r   rN   model_output_keys
gd_out_keyr   r   r   r1  r2  r   r   r   r   r3  r4  r5  r   r   r   autoregressive_training_losses  sd   
$.
&

z0GaussianDiffusion.autoregressive_training_lossesc                 C   sZ   |j d }tj| jd g| |jd}| ||\}}}t||ddd}t|t	d S )a=  
        Get the prior KL term for the variational lower-bound, measured in
        bits-per-dim.

        This term can't be optimized, as it only depends on the encoder.

        :param x_start: the [N x C x ...] tensor of inputs.
        :return: a batch of [N] KL values (in bits), one per batch element.
        r   r?   r  r   )r   r   r   r   r#   )
r.   r   r   r   r   r   r!   rF   r&   r/   )r~   r   
batch_sizerN   qt_meanr   qt_log_variancekl_priorr   r   r   
_prior_bpd  s
   

zGaussianDiffusion._prior_bpdc              
   C   sJ  |j }|jd }g }g }g }	tt| jddd D ]`}
tj|
g| |d}t|}| j|||d}t	  | j
||||||d}W d   n1 sMw   Y  ||d  |t|d | d	  | |||d }|	t|| d	  qtj|d
d}tj|d
d}tj|	d
d}	| |}|jd
d| }|||||	dS )au  
        Compute the entire variational lower-bound, measured in bits-per-dim,
        as well as other related quantities.

        :param model: the model to evaluate loss on.
        :param x_start: the [N x C x ...] tensor of inputs.
        :param clip_denoised: if True, clip denoised samples.
        :param model_kwargs: if not None, a dict of extra keyword arguments to
            pass to the model. This can be used for conditioning.

        :return: a dict containing the following keys:
                 - total_bpd: the total variational lower-bound, per batch element.
                 - prior_bpd: the prior term in the lower-bound.
                 - vb: an [N x T] tensor of terms in the lower-bound.
                 - xstart_mse: an [N x T] tensor of x_0 MSEs for each timestep.
                 - mse: an [N x T] tensor of epsilon MSEs for each timestep.
        r   Nr   r  )r   rN   r   )r   r   rN   r   r   r  r   r   r?   r@   )	total_bpd	prior_bpdr-  
xstart_mserw   )r   r.   rC   rD   r   r   r   r   r   r  r"  rZ   rF   r   stackrB  sum)r~   r   r   r   r   r   r>  r-  rE  rw   rN   t_batchr   r   r   r   rD  rC  r   r   r   calc_bpd_loop  sD   


	
zGaussianDiffusion.calc_bpd_loopr{   )TNN)NTNNNNF)TNNN)TNNNr   )TNNr   )NTNNNNFr   )TN)NN)rf   rg   rh   ri   r   r   r   r   r   r   r   r   r   r   r   r   r  r
  r   r  r  r  r   r  r"  r6  r=  rB  rI  r   r   r   r   r      s    3

`


O
.
2
1
.
&
&

2
W
Hr   c                       sn   e Zd ZdZ fddZ fddZ fddZ fdd	Z fd
dZ fddZ	dddZ
dd Z  ZS )SpacedDiffusiona#  
    A diffusion process which can skip steps in a base diffusion process.

    :param use_timesteps: a collection (sequence or set) of timesteps from the
                          original diffusion process to retain.
    :param kwargs: the kwargs to create the base diffusion process.
    c                    s   t || _g | _t|d | _tdi |}d}g }t|jD ]\}}|| jv r9|d||   |}| j| qt	
||d< t jdi | d S )Nr^   r"   r?   r   )setuse_timestepstimestep_maprE   original_num_stepsr   	enumerater   rZ   r&   r[   superr   )r~   rL  r   base_diffusionlast_alpha_cumprod	new_betasr_   alpha_cumprod	__class__r   r   r   Q  s   

zSpacedDiffusion.__init__c                        t  j| |g|R i |S r{   )rP  r   _wrap_modelr~   r   r   r   rU  r   r   r   `      zSpacedDiffusion.p_mean_variancec                    rW  r{   )rP  r6  rX  rY  rU  r   r   r6  c  rZ  zSpacedDiffusion.training_lossesc                    s"   t  j| |dg|R i |S )NT)rP  r=  rX  rY  rU  r   r   r=  f  s   "z.SpacedDiffusion.autoregressive_training_lossesc                    rW  r{   )rP  r   rX  r~   r   r   r   rU  r   r   r   i  rZ  zSpacedDiffusion.condition_meanc                    rW  r{   )rP  r   rX  r[  rU  r   r   r   l  rZ  zSpacedDiffusion.condition_scoreFc                 C   s8   t |ts
t |tr|S |rtnt}||| j| j| jS r{   )r   _WrappedModel_WrappedAutoregressiveModelrM  r   rN  )r~   r   autoregressivemodr   r   r   rX  o  s   zSpacedDiffusion._wrap_modelc                 C   s   |S r{   r   r   r   r   r   r   u  s   z SpacedDiffusion._scale_timesteps)F)rf   rg   rh   ri   r   r   r6  r=  r   r   rX  r   __classcell__r   r   rU  r   rJ  H  s    
rJ  c                 C   sL  t |trB|dr8t|tdd }td| D ]}ttd| ||kr/ttd| |  S qtd|  ddd |d	D }| t| }| t| }d}g }t	|D ]K\}}|||k radnd }	|	|k rrtd
|	 d| |dkryd}
n|	d |d  }
d}g }t|D ]}|
|t|  ||
7 }q||7 }||	7 }qVt|S )aT  
    Create a list of timesteps to use from an original diffusion process,
    given the number of timesteps we want to take from equally-sized portions
    of the original process.

    For example, if there's 300 timesteps and the section counts are [10,15,20]
    then the first 100 timesteps are strided to be 10 timesteps, the second 100
    are strided to be 15 timesteps, and the final 100 are strided to be 20.

    If the stride is a string starting with "ddim", then the fixed striding
    from the DDIM paper is used, and only one section is allowed.

    :param num_timesteps: the number of diffusion steps in the original
                          process to divide up.
    :param section_counts: either a list of numbers, or a string containing
                           comma-separated numbers, indicating the step count
                           per section. As a special case, use "ddimN" where N
                           is a number of steps to use the striding from the
                           DDIM paper.
    :return: a set of diffusion steps from the original process to use.
    r   Nr?   r   zcannot create exactly z steps with an integer stridec                 S   s   g | ]}t |qS r   )r   r   r   r   r   r     s    z#space_timesteps.<locals>.<listcomp>,zcannot divide section of z steps into r   )r   str
startswithr   rE   rD   rK  
ValueErrorr   rO  rZ   round)r   section_countsdesired_countr_   size_perextra	start_idx	all_stepssection_countsizefrac_stridecur_idxtaken_stepsr   r   r   r   space_timestepsz  s8   



rq  c                   @      e Zd Zdd Zdd ZdS )r\  c                 C      || _ || _|| _|| _d S r{   r   rM  r   rN  r~   r   rM  r   rN  r   r   r   r        
z_WrappedModel.__init__c                 K   sN   t j| j|j|jd}|| }| jr| d| j  }| j||fi |}|S N)r   rJ   r   	r   r   rM  r   rJ   r   r   rN  r   )r~   r   tsr   
map_tensornew_tsr   r   r   r   __call__  s   z_WrappedModel.__call__Nrf   rg   rh   r   r|  r   r   r   r   r\        r\  c                   @   rr  )r]  c                 C   rs  r{   rt  ru  r   r   r   r     rv  z$_WrappedAutoregressiveModel.__init__c                 K   sL   t j| j|j|jd}|| }| jr| d| j  }| j|||fi |S rw  rx  )r~   r   x0ry  r   rz  r{  r   r   r   r|    s
   z$_WrappedAutoregressiveModel.__call__Nr}  r   r   r   r   r]    r~  r]  c                 C   sR   t | j|jd|  }t|jt|k r$|d }t|jt|k s||S )a  
    Extract values from a 1-D numpy array for a batch of indices.

    :param arr: the 1-D numpy array.
    :param timesteps: a tensor of indices into the array to extract.
    :param broadcast_shape: a larger shape of K dimensions with the batch
                            dimension equal to the length of timesteps.
    :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims.
    r  ).N)r   
from_numpyr   r   r   rE   r.   expand)arr	timestepsbroadcast_shaper   r   r   r   r     s
   

r   )r-   )"ri   enumrL   numpyr&   r   r   r   "TTS.tts.layers.tortoise.dpm_solverr   r   r   k_diffusion.samplingr   r   r  ImportErrorSAMPLERSr!   r+   r>   rF   rY   rR   Enumrb   rm   rv   r   rJ  rq  r\  r]  r   r   r   r   r   <module>   sJ    	


       24