o
    ´‹
j:  ã                   @   sÞ   d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm  m	Z
 d dlZd dlmZ dd„ Zdd„ Zdde d	¡fd
d„ZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZdS )é    N)Úsqrt©Ú	rearrangec                 C   s   | d ur| S |S ©N© )ÚvalÚdr   r   úK/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/xtts/dvae.pyÚdefault   s   r
   c                    s   ‡ fdd„}|S )Nc                    s2   | j }|  ¡  ˆ | g|¢R i |¤Ž}|  |¡ |S r   )ÚtrainingÚevalÚtrain)ÚmodelÚargsÚkwargsÚwas_trainingÚout©Úfnr   r	   Úinner   s
   
zeval_decorator.<locals>.innerr   )r   r   r   r   r	   Úeval_decorator   s   r   z"../experiments/clips_mel_norms.pthÚcpuc                 C   sz   t jjdddddddddd	d

 |¡}|  |¡} || ƒ}t tj|dd¡}|d u r1tj||d}|| d¡ d¡ }|S )Ni   é   é   Fi"V  r   i@  éP   Úslaney)
Ún_fftÚ
hop_lengthÚ
win_lengthÚpowerÚ
normalizedÚsample_rateÚf_minÚf_maxÚn_melsÚnormçñhãˆµøä>)Úmin)Úmap_locationéÿÿÿÿ)	Ú
torchaudioÚ
transformsÚMelSpectrogramÚtoÚtorchÚlogÚclampÚloadÚ	unsqueeze)ÚwavÚmel_norms_fileÚ	mel_normsÚdeviceÚmel_stftÚmelr   r   r	   Údvae_wav_to_mel   s*   öõ
r9   c                       s0   e Zd Zd
‡ fdd„	Zddd„Zdd	„ Z‡  ZS )ÚQuantizeç®Gáz®ï?r&   Fc                    s~   t ƒ  ¡  || _|| _|| _|| _|| _d | _d| _d| _	|| _
t ||¡}|  d|¡ |  dt |¡¡ |  d| ¡ ¡ d S )Ni ú  FÚembedÚcluster_sizeÚ	embed_avg)ÚsuperÚ__init__ÚdimÚn_embedÚdecayÚepsÚbalancing_heuristicÚcodesÚ	max_codesÚ
codes_fullÚnew_return_orderr.   ÚrandnÚregister_bufferÚzerosÚclone)ÚselfrA   rB   rC   rD   rE   rI   r<   ©Ú	__class__r   r	   r@   4   s   
zQuantize.__init__c                 C   s  | j rp| jrptj| j| jd| jdt| jƒ }t |dk|dk ¡ d¡}| j	 
dd¡}| j 
dd¡}t |¡| }||  |  
dd¡| _	||  |  
dd¡| _| j| ¡   | _t |¡rptdt |¡› dƒ d | _d| _| d	| j¡}| d
¡jdddd
| | j	  | j	 d
¡jddd }	|	 }
|
 d¡\}}t || j¡ |j¡}|j|jd d	… Ž }|  |¡}| j rå| jd u rÅ| ¡ | _n t | j| ¡ g¡| _t| jƒ| jkrå| j| j d … | _d| _| j rP| d¡}| !dd¡| }t" #¡ rt" $¡ dkrt" %|¡ t" %|¡ | jj& '| j(¡j)|d| j( d | jj& '| j(¡j)|d| j( d | j ¡ }| j| j* || j| j*   | }| j| d¡ }| j	j& +|¡ | ,¡ |  d
¡ -¡ }|||  ,¡  }|rv||||
 |jd d	… d ¡fS | j.r|||fS |||fS )Nr   )Úbinsr'   ÚmaxgÍÌÌÌÌÌì?g{®Gáz„?é   zReset z embedding codes.Fr)   r   T)Úkeepdim)Úalpha)r)   )/rE   rH   r.   ÚhistcrF   rB   ÚlenÚ
logical_orr2   r<   Úpermuter>   Ú
randn_liker=   ÚsqueezeÚanyÚprintÚsumÚreshaperA   ÚpowrR   ÚFÚone_hotÚtypeÚdtypeÚviewÚshapeÚ
embed_codeÚflattenÚcatrG   r   Ú	transposeÚdistributedÚis_initializedÚget_world_sizeÚ
all_reduceÚdataÚmul_rC   Úadd_rD   Úcopy_ÚdetachÚmeanrI   )rN   ÚinputÚreturn_soft_codesÚhÚmaskÚepÚeaÚ
rand_embedrh   ÚdistÚ
soft_codesÚ_Ú	embed_indÚembed_onehotÚquantizeÚembed_onehot_sumÚ	embed_sumÚnr=   Úembed_normalizedÚdiffr   r   r	   ÚforwardG   s\   "
8




  
  

zQuantize.forwardc                 C   s   t  || j dd¡¡S )Nr   rS   )ra   Ú	embeddingr<   rj   )rN   Úembed_idr   r   r	   rg   €   s   zQuantize.embed_code)r;   r&   FF)F)Ú__name__Ú
__module__Ú__qualname__r@   r‡   rg   Ú__classcell__r   r   rO   r	   r:   3   s    
9r:   c                       s&   e Zd Zd‡ fdd„	Zdd„ Z‡  ZS )ÚDiscretizationLossr   c                    sŒ   t ƒ  ¡  || _|| _tjjd|d| _|dkrAd| _|  	dtj
dtjdd¡ |  	dtj
dtjdd¡ |  	d	t 
||¡¡ d S d
| _d S )Nr   )ÚscaleTÚaccumulator_indexrS   r   )rd   r6   Úaccumulator_filledÚaccumulatorF)r?   r@   Údiscrete_binsrA   r.   ÚdistributionsÚNormalr|   Úrecord_pastrK   rL   Úlong)rN   r“   rA   Úexpected_varianceÚ
store_pastrO   r   r	   r@   ˆ   s   

zDiscretizationLoss.__init__c                 C   sì   t tt|jƒƒƒt | jgƒ }|jt|ƒd| ¡  }|| ¡  }| jrl| j	jd }| 
¡  ¡ }| jdkrGtj| j	dd|d  | ||  }|| j	| j< |  jd7  _| j|krl|  jd9  _| jdkrl|  jd7  _t | j |¡ ¡S )N)rA   r   rS   )ÚsetÚrangerW   rf   rA   r^   Útuplert   r–   r’   rs   rM   r‘   r.   r   r|   Úlog_prob)rN   ÚxÚ
other_dimsÚaveragedÚ	acc_countÚavgr   r   r	   r‡   •   s   
$

zDiscretizationLoss.forward)r   ©rŠ   r‹   rŒ   r@   r‡   r   r   r   rO   r	   rŽ   ‡   s    rŽ   c                       ó$   e Zd Z‡ fdd„Zdd„ Z‡  ZS )ÚResBlockc              
      sF   t ƒ  ¡  t |||ddd|ƒ |||ddd|ƒ |||dƒ¡| _d S )Né   rS   )Úpadding)r?   r@   ÚnnÚ
SequentialÚnet)rN   ÚchanÚconvÚ
activationrO   r   r	   r@   ¬   s   


ûzResBlock.__init__c                 C   s   |   |¡| S r   )rª   )rN   rž   r   r   r	   r‡   ¶   s   zResBlock.forwardr£   r   r   rO   r	   r¥   «   s    
r¥   c                       r¤   )ÚUpsampledConvc                    s>   t ƒ  ¡  d| ¡ v sJ ‚|d | _|d= ||i |¤Ž| _d S )NÚstride)r?   r@   Úkeysr¯   r¬   )rN   r¬   r   r   rO   r   r	   r@   »   s
   

zUpsampledConv.__init__c                 C   s   t jj|| jdd}|  |¡S )NÚnearest)Úscale_factorÚmode)r¨   Ú
functionalÚinterpolater¯   r¬   )rN   rž   Úupr   r   r	   r‡   Â   s   
zUpsampledConv.forwardr£   r   r   rO   r	   r®   º   s    r®   c                       s†   e Zd Zdddddddddddd	ddd
ddi f‡ fdd„	Zdd„ Zdd„ Ze ¡ edd„ ƒƒZ	dd„ Z
dd„ Zdd„ Zdd„ Z‡  ZS )ÚDiscreteVAEr   i   r¦   r   é@   é   TFÚreluNéd   c           &         sÂ  t ƒ  ¡  |dk}|| _|| _|| _|| _t|dd|d  |ƒ| _|dkr)|dk s+J ‚|dkr6tj	}tj
}ntj}tj}|
sDt t|¡}|dkrLtj}n|dkrTtj}ntƒ sYJ ‚g }g }|dkrÙ‡ fdd„t|ƒD ƒ}tt|ƒƒ}|g|¢}|s{|n|d }|g|¢}td	d
„ ||fƒ\}}|	d d }t||ƒD ]5\\}} \}!}"| t ||| |	||d|ƒ ¡¡ |r¾| t d| ¡¡ | t ||!|"|	||d|ƒ ¡¡ qš|d }#|d }$n| t ||ˆ dƒ|ƒ ¡¡ ˆ }#ˆ }$t|ƒD ]}%| dt|$||ƒ¡ | t|$||ƒ¡ qï|dkr| d|||$dƒ¡ | ||$|dƒ¡ | ||#|dƒ¡ tj|Ž | _tj|Ž | _|r8tj ntj!| _"t#||dd| _$|| _%|| _&|r\t'j(dt'j)d| _*d| _+d| _,d| _-d S )Nr   r   rS   r¦   rº   Úsiluc                    s   g | ]}ˆ d |  ‘qS )r   r   )Ú.0Úi©Ú
hidden_dimr   r	   Ú
<listcomp>ÿ   s    z(DiscreteVAE.__init__.<locals>.<listcomp>c                 S   s   t t| d d… | dd … ƒƒS )Nr)   rS   )ÚlistÚzip©Útr   r   r	   Ú<lambda>  s    z&DiscreteVAE.__init__.<locals>.<lambda>)r¯   r§   é   r)   T)rI   )i À )rd   ).r?   r@   Ú
num_tokensÚ
num_layersÚstraight_throughÚpositional_dimsrŽ   Údiscrete_lossr¨   ÚConv2dÚConvTranspose2dÚConv1dÚConvTranspose1dÚ	functoolsÚpartialr®   ÚReLUÚSiLUÚNotImplementedErrorr›   rÂ   ÚreversedÚmaprÃ   Úappendr©   Ú	GroupNormÚinsertr¥   ÚencoderÚdecoderra   Úsmooth_l1_lossÚmse_lossÚloss_fnr:   ÚcodebookÚnormalizationÚrecord_codesr.   rL   r—   rF   Úcode_indÚtotal_codesÚinternal_step)&rN   rË   rÈ   Úcodebook_dimrÉ   Únum_resnet_blocksrÀ   Úchannelsr¯   Úkernel_sizeÚuse_transposed_convsÚencoder_normr­   rÝ   rÊ   rá   râ   Ú#discretization_loss_averaging_stepsÚlr_quantizer_argsÚhas_resblocksr¬   Úconv_transposeÚactÚ
enc_layersÚ
dec_layersÚ	enc_chansÚ	dec_chansÚdec_init_chanÚenc_chans_ioÚdec_chans_ioÚpadÚenc_inÚenc_outÚdec_inÚdec_outÚdec_out_chansÚinnermost_dimr~   rO   r¿   r	   r@   Ê   s|   
ÿ


"ÿ


zDiscreteVAE.__init__c                    sn   | j d u rˆS t‡fdd„| j ƒ\}}| jdkrdnd‰ t‡ fdd„||fƒ\}}ˆ ¡ ‰ˆ |¡ |¡ ˆS )Nc                    s   t  | ¡ ˆ ¡S r   )r.   Ú	as_tensorr-   rÄ   )Úimagesr   r	   rÆ   5  s    z"DiscreteVAE.norm.<locals>.<lambda>r   zc -> () c () ()zc -> () c ()c                    s
   t | ˆ ƒS r   r   rÄ   )Úarranger   r	   rÆ   7  s   
 )rá   r×   rË   rM   Úsub_Údiv_)rN   r   ÚmeansÚstdsr   )r  r   r	   r%   1  s   
zDiscreteVAE.normc                 C   s(   | j r| jdkrd| jd | j… iS i S )Nr   Úhistogram_codes)râ   rä   rF   )rN   ÚstepÚ__r   r   r	   Úget_debug_values<  s   zDiscreteVAE.get_debug_valuesc                 C   sJ   |   |¡}|  |¡ t|jƒdkrdnd¡}|  |¡\}}}|  |¡ |S ©Nr¹   ©r   r   r¦   rS   ©r   r   rS   )r%   rÛ   rY   rW   rf   rà   Ú	log_codes)rN   r   ÚimgÚlogitsÚsampledrF   r~   r   r   r	   Úget_codebook_indicesC  s
   
"
z DiscreteVAE.get_codebook_indicesc                 C   s¸   |   |¡ t| jdƒr| j |¡}nt || jj¡}|j\}}}i }| jdkr*d}ntt	|ƒƒ }}	d}||	dœ}t
||fi |¤Ž}|g}
| jD ]}|
 ||
d ƒ¡ qH|
d |
d fS )Nrg   rS   zb n d -> b d nzb (h w) d -> b d h w)rw   Úwr)   éþÿÿÿ)r  Úhasattrrà   rg   ra   rˆ   rf   rË   Úintr   r   rÜ   rØ   )rN   Úimg_seqÚimage_embedsÚbr„   r   r   r  rw   r  r   Úlayerr   r   r	   ÚdecodeL  s    



zDiscreteVAE.decodec                 C   sF   |   |¡}|  |¡ t|jƒdkrdnd¡}|  |¡\}}}|  |¡S r
  )r%   rÛ   rY   rW   rf   rà   r  )rN   r  r  r  rF   Úcommitment_lossr   r   r	   Úinfera  s   
"
zDiscreteVAE.inferc           
      C   sª   |   |¡}|  |¡ t|jƒdkrdnd¡}|  |¡\}}}| t|jƒdkr)dnd¡}| jrA|}| jD ]}||ƒ}q4|  |¡ n|  	|¡\}}| j
||dd}	|	||fS )Nr¹   r  r  )r   r¦   rS   r   Únone)Ú	reduction)r%   rÛ   rY   rW   rf   rà   r   rÜ   r  r  rß   )
rN   r  r  r  rF   r  r   r   r~   Ú
recon_lossr   r   r	   r‡   j  s   
"


zDiscreteVAE.forwardc                 C   s¬   | j rM| jd dkrM| ¡ }|jd }| jjd | j |kr!| jn| jjd | }| ¡ | j||| …< | j| | _| j| jjd krFd| _|  jd7  _|  jd7  _d S )Né
   r   rS   )râ   rå   rh   rf   rF   rã   r   rä   )rN   rF   Úlr¾   r   r   r	   r  ~  s   
,zDiscreteVAE.log_codes)rŠ   r‹   rŒ   r@   r%   r	  r.   Úno_gradr   r  r  r  r‡   r  r   r   r   rO   r	   r·   É   s:    íg	r·   )rÑ   Úmathr   r.   Útorch.distributedrk   Útorch.nnr¨   Útorch.nn.functionalr´   ra   r*   Úeinopsr   r
   r   r6   r9   ÚModuler:   rŽ   r¥   r®   r·   r   r   r   r	   Ú<module>   s"    
ÿT$