o
    ´‹
j_x  ã                   @   s,  d dl Z d dlZd dlmZmZmZ d dlZd dlm  m	Z
 d dlmZmZ d dlmZ G dd„ dejƒZG dd„ dejƒZG d	d
„ d
ejƒZG dd„ dejjƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZdd„ Zdejdejfdd„Zdefdd„Zdd„ Zdd„ ZG d d!„ d!ejƒZ dS )"é    N)ÚDictÚOptionalÚTuple)ÚTensorÚnn)Ú	Parameterc                       ó&   e Zd Zd‡ fdd„	Zdd„ Z‡  ZS )ÚTransposeLastNc                    s   t ƒ  ¡  || _d S ©N)ÚsuperÚ__init__Údeconstruct_idx)Úselfr   ©Ú	__class__© úV/home/kuhnn/.local/lib/python3.10/site-packages/TTS/vc/modules/freevc/wavlm/modules.pyr      s   

zTransposeLast.__init__c                 C   s    | j d ur
|| j  }| dd¡S )Néþÿÿÿéÿÿÿÿ)r   Ú	transpose©r   Úxr   r   r   Úforward   s   

zTransposeLast.forwardr
   ©Ú__name__Ú
__module__Ú__qualname__r   r   Ú__classcell__r   r   r   r   r	      s    r	   c                       ó$   e Zd Z‡ fdd„Zdd„ Z‡  ZS )ÚFp32LayerNormc                    ó   t ƒ j|i |¤Ž d S r
   ©r   r   ©r   ÚargsÚkwargsr   r   r   r       ó   zFp32LayerNorm.__init__c                 C   óL   t  | ¡ | j| jd ur| j ¡ nd | jd ur| j ¡ nd | j¡}| |¡S r
   )ÚFÚ
layer_normÚfloatÚnormalized_shapeÚweightÚbiasÚepsÚtype_as©r   ÚinputÚoutputr   r   r   r   #   ó   û
zFp32LayerNorm.forwardr   r   r   r   r   r      ó    r   c                       r   )ÚFp32GroupNormc                    r    r
   r!   r"   r   r   r   r   /   r%   zFp32GroupNorm.__init__c                 C   r&   r
   )r'   Ú
group_normr)   Ú
num_groupsr+   r,   r-   r.   r/   r   r   r   r   2   r2   zFp32GroupNorm.forwardr   r   r   r   r   r4   .   r3   r4   c                   @   s$   e Zd Zedd„ ƒZedd„ ƒZdS )ÚGradMultiplyc                 C   s   || _ | |¡}|S r
   )ÚscaleÚnew)Úctxr   r8   Úresr   r   r   r   >   s   
zGradMultiply.forwardc                 C   s   || j  d fS r
   )r8   )r:   Úgradr   r   r   ÚbackwardD   s   zGradMultiply.backwardN)r   r   r   Ústaticmethodr   r=   r   r   r   r   r7   =   s
    
r7   c                       r   )ÚSamePadFc                    s6   t ƒ  ¡  |r|d | _d S |d dkrdnd| _d S )Né   é   r   )r   r   Úremove)r   Úkernel_sizeÚcausalr   r   r   r   J   s   
zSamePad.__init__c                 C   s,   | j dkr|d d …d d …d | j  …f }|S )Nr   )rB   r   r   r   r   r   Q   s   
zSamePad.forward)Fr   r   r   r   r   r?   I   s    r?   c                       s(   e Zd ZdZ‡ fdd„Zdd„ Z‡  ZS )ÚSwishzSwish functionc                    s   t t| ƒ ¡  tj ¡ | _dS )z)Construct an MultiHeadedAttention object.N)r   rE   r   Útorchr   ÚSigmoidÚact©r   r   r   r   r   Z   s   zSwish.__init__c                 C   s   ||   |¡ S r
   )rH   r   r   r   r   r   _   s   zSwish.forward)r   r   r   Ú__doc__r   r   r   r   r   r   r   rE   W   s    rE   c                       s&   e Zd Zd‡ fdd„	Zdd„ Z‡  ZS )Ú
GLU_LinearÚsigmoidTc                    s    t t| ƒ ¡  || _|| _|dkrtj ¡ | _n|dkr!t	ƒ | _n|dkr,tj 
¡ | _n
|dkr6tj ¡ | _|rDt ||d d¡| _d S t ||d d¡| _d S )NrL   ÚswishÚreluÚgelurA   TF)r   rK   r   Úglu_typeÚ
output_dimrF   r   rG   Úglu_actrE   ÚReLUÚGELUÚLinearÚlinear)r   Ú	input_dimrQ   rP   Úbias_in_glur   r   r   r   d   s   
zGLU_Linear.__init__c              	   C   sž   |   |¡}| jdkr+|d d …d d …d| j…f |d d …d d …| j| jd …f  }|S |d d …d d …d| j…f |  |d d …d d …| j| jd …f ¡ }|S )NÚbilinearr   rA   )rV   rP   rQ   rR   r   r   r   r   r   x   s   

>DþzGLU_Linear.forward)rL   Tr   r   r   r   r   rK   c   s    rK   c                 C   sH   t tdƒst dtj ¡t_d|  dt tj| dt | d¡   ¡  S )NÚ_arA   g      à?r@   g÷Hmâä¦?é   )	ÚhasattrÚgelu_accurateÚmathÚsqrtÚpirZ   rF   ÚtanhÚpow©r   r   r   r   r]   „   s   
,r]   r   Úreturnc                 C   s   t jj |  ¡ ¡ | ¡S r
   )rF   r   Ú
functionalrO   r)   r.   rc   r   r   r   rO   Š   s   rO   Ú
activationc                 C   sx   | dkrt jS | dkrtS | dkrt d¡ tS | dkrtS | dkr%tjS | dkr-dd	„ S | d
kr5dd	„ S td 	| ¡ƒ‚)z=Returns the activation function corresponding to `activation`rN   rO   Ú	gelu_fastz;--activation-fn=gelu_fast has been renamed to gelu_accurater]   ra   rV   c                 S   ó   | S r
   r   rc   r   r   r   Ú<lambda>   ó    z#get_activation_fn.<locals>.<lambda>Úgluc                 S   rh   r
   r   rc   r   r   r   ri   Ÿ   rj   z --activation-fn {} not supported)
r'   rN   rO   ÚwarningsÚwarnr]   rF   ra   ÚRuntimeErrorÚformat)rf   r   r   r   Úget_activation_fnŽ   s    
rp   c                 C   s¦   dd„ }t | tjƒr|| jjƒ | jdur| jj ¡  t | tjƒr5|| jjƒ | jdur5| jj| j  ¡  t | t	ƒrQ|| j
jjƒ || jjjƒ || jjjƒ dS dS )aš  
    Initialize the weights specific to the BERT Model.
    This overrides the default initializations depending on the specified arguments.
        1. If normal_init_linear_weights is set then weights of linear
           layer will be initialized using the normal distribution and
           bais will be set to the specified value.
        2. If normal_init_embed_weights is set then weights of embedding
           layer will be initialized using the normal distribution.
        3. If normal_init_proj_weights is set then weights of
           in_project_weight for MultiHeadAttention initialized using
           the normal distribution (to be validated).
    c                 S   s$   |   |  ¡ jddd | j¡¡ d S )Nç        g{®Gáz”?)ÚmeanÚstd)Úcopy_ÚcpuÚnormal_ÚtoÚdevice)Údatar   r   r   rv   ²   s   $z!init_bert_params.<locals>.normal_N)Ú
isinstancer   rU   r+   ry   r,   Úzero_Ú	EmbeddingÚpadding_idxÚMultiheadAttentionÚq_projÚk_projÚv_proj)Úmodulerv   r   r   r   Úinit_bert_params¤   s   


ýrƒ   c                    s¼   ˆdkr| S t | tjtjtjfƒsJ ‚| jjdk‰ˆs*| j d¡ˆ  dks)J dƒ‚n%| jdkr;| j	ˆ  dks:J dƒ‚n| jd | jd  }|ˆ  dksOJ dƒ‚‡ ‡‡fdd	„}|  
|¡ | S )
aN  
    Wraps modules and applies quantization noise to the weights for
    subsequent quantization with Iterative Product Quantization as
    described in "Training with Quantization Noise for Extreme Model Compression"

    Args:
        - module: nn.Module
        - p: amount of Quantization Noise
        - block_size: size of the blocks for subsequent quantization with iPQ

    Remarks:
        - Module weights must have the right sizes wrt the block size
        - Only Linear, Embedding and Conv2d modules are supported for the moment
        - For more detail on how to quantize by blocks with convolutional weights,
          see "And the Bit Goes Down: Revisiting the Quantization of Neural Networks"
        - We implement the simplest form of noise here as stated in the paper
          which consists in randomly dropping blocks
    r   é   r@   z0Input features must be a multiple of block sizes©r@   r@   z0Input channels must be a multiple of block sizesz,Kernel size must be a multiple of block sizec           	         s8  | j ršˆs.| j}| d¡}| d¡}tj|ˆ  | |jd}| ˆ¡ | ˆ d¡ d|¡}nT| j}| j	}| j
}| jdkrZtjt|ˆ  | ƒ|jd}| ˆ¡ | ˆ d¡ d|¡}n(tj| d¡| d¡|jd}| ˆ¡ | d¡ d¡ dd| jd | jd ¡}| tj¡}ddˆ  }|| |d¡ | j_d S d S )Nr@   r   ©rx   r   r…   rA   r[   )Útrainingr+   ÚsizerF   Úzerosrx   Ú
bernoulli_Úrepeat_interleaveÚviewÚin_channelsÚout_channelsrC   ÚintÚ	unsqueezeÚrepeatrw   ÚboolÚmasked_fillry   )	Úmodr0   r+   Úin_featuresÚout_featuresÚmaskr   rŽ   Ús©Ú
block_sizeÚis_convÚpr   r   Ú_forward_pre_hookñ   s2   



þ

(Þz&quant_noise.<locals>._forward_pre_hook)rz   r   rU   r|   ÚConv2dr+   Úndimrˆ   rC   r   Úregister_forward_pre_hook)r‚   rœ   rš   Úkr   r   r™   r   Úquant_noiseÅ   s   

&r¢   c                       s  e Zd ZdZ															d,‡ fd	d
„	Zdd„ Zd-dd„Zdd„ Z								d.dee	 dee	 dee	 dee
ee
eee	 f f  dededee	 dededee	 dee	ee	 ee	 f fdd„Zedee	 dee	 ded ededee	 fd!d"„ƒZdee
ee
eee	 f f  de
eee	 f fd#d$„Zde
ee
eee	 f f d%e
eee	 f fd&d'„Zd(ed ed)efd*d+„Z‡  ZS )/r~   zSMulti-headed attention.

    See "Attention Is All You Need" for more details.
    Nrq   TFé   é    é€   c                    sà  t ƒ  ¡  || _|d ur|n|| _|d ur|n|| _| j|ko#| j|k| _|| _t |¡| _	|| _
|| _|| _| j
rAt ||¡| _|| | _| j| _| j| _| j| | jksZJ dƒ‚| jd | _|	| _|
| _| jrp| jspJ dƒ‚d}|rvd}|}|}ttj| j||d||ƒ| _ttj| j||d||ƒ| _ttj|||d||ƒ| _ttj|||d||ƒ| _|rÇtt dd|¡ƒ| _tt dd|¡ƒ| _nd  | _| _|| _ || _!| j!rêt | jd¡| _"t t #d|dd¡¡| _$|  %¡  d S )	Nz(embed_dim must be divisible by num_headsg      à¿zCSelf-attention requires query, key and value to be of the same sizeTF)r,   r@   r£   )&r   r   Ú	embed_dimÚkdimÚvdimÚqkv_same_dimÚ	num_headsr   ÚDropoutÚdropout_moduleÚhas_relative_attention_biasÚnum_bucketsÚmax_distancer|   Úrelative_attention_biasÚhead_dimÚ
q_head_dimÚ
k_head_dimÚscalingÚself_attentionÚencoder_decoder_attentionr¢   rU   r€   r   r   Úout_projr   rF   r   Úbias_kÚbias_vÚadd_zero_attnÚgru_rel_posÚgrep_linearÚonesÚgrep_aÚreset_parameters)r   r¦   rª   r§   r¨   Údropoutr,   Úadd_bias_kvrº   rµ   r¶   Úq_noiseÚqn_block_sizer­   r®   r¯   r»   Úrescale_initÚk_biasÚk_embed_dimÚq_embed_dimr   r   r   r   !  sR   

ÿzMultiheadAttention.__init__c                 C   s  | j r1tjj| jjdt d¡ d tjj| jjdt d¡ d tjj| j	jdt d¡ d ntj | jj¡ tj | jj¡ tj | j	j¡ tj | j
j¡ | j
jd ur`tj | j
jd¡ | jd urltj | j¡ | jd urxtj | j¡ | jr…tj | jj¡ d S d S )Nr@   rA   )Úgainrq   )r©   r   ÚinitÚxavier_uniform_r€   r+   r^   r_   r   r   r·   r,   Ú	constant_r¸   Úxavier_normal_r¹   r­   r°   rI   r   r   r   r¿   m  s"    

ÿz#MultiheadAttention.reset_parametersc           	      C   sÆ   | j }| j}d}|r |d }||dk tj¡| 7 }t |¡}n
t |t |¡¡ }|d }||k }|t | 	¡ | ¡t
 || ¡ ||   tj¡ }t |t ||d ¡¡}|t |||¡7 }|S )Nr   rA   r@   )r®   r¯   rw   rF   ÚlongÚabsÚminÚ
zeros_likeÚlogr)   r^   Ú	full_likeÚwhere)	r   Úrelative_positionsÚbidirectionalr®   r¯   Úrelative_bucketsÚ	max_exactÚis_smallÚrelative_postion_if_larger   r   r   Ú_relative_positions_bucketƒ  s.   ÿþüÿz-MultiheadAttention._relative_positions_bucketc                 C   sz   t j|t jdd d …d f }t j|t jdd d d …f }|| }| j|dd}| | jjj¡}|  |¡}| g d¢¡}|S )N)ÚdtypeT)rÕ   )rA   r   r@   )	rF   ÚarangerÍ   rÚ   rw   r°   r+   rx   Úpermute)r   Úquery_lengthÚ
key_lengthÚcontext_positionÚmemory_positionÚrelative_positionÚrelative_position_bucketÚvaluesr   r   r   Úcompute_biasž  s   
zMultiheadAttention.compute_biasÚkeyÚvalueÚkey_padding_maskÚincremental_stateÚneed_weightsÚ	static_kvÚ	attn_maskÚbefore_softmaxÚneed_head_weightsÚposition_biasrd   c           ,      C   sþ	  |
rd}|j jdk}| ¡ \}}}|}|| jksJ ‚t| ¡ ƒ|||gks'J ‚|durP| ¡ \}}}tj ¡ sP||ks=J ‚|dusCJ ‚|sPJ ||jdd… kƒ‚| j	rp|du rp|  
||¡}| d¡ |ddd¡ || j ||¡}|sY|du rY|sYtj ¡ sY| j| jkrY|dur|dus’J ‚|du s˜J ‚d}|dur|}| jrþ| dd¡}| ¡ dd… | jdf }|j|Ž }| dddd¡}| ¡ \}}}}t |  |¡ |||dd	¡jdd
d¡jddd\}}||| j d  d }| || j dd¡| }| d||f¡}| jj}|du rt | jj¡}tj|||| j| jt dg¡t  | jj| jj| j!jf¡| j"| j#| j$| j%j&| j'j(| j'j| j)|||d| jj(| jj(| j!j(d\}}|||fS |dur|  *|¡} | dur~d| v r~|r~| j+rx| j,rzJ ‚d }}nd} | j,r•|  |¡}!|  |¡}"|  !|¡}#n@| j+rº|  |¡}!|du r¯|du sªJ ‚d }"}#n&|  |¡}"|  !|¡}#n|durÄ|dusÆJ ‚|  |¡}!|  |¡}"|  !|¡}#|!| j-9 }!| j"dur,| j#dusèJ ‚t  |"| j" d|d¡g¡}"t  |#| j# d|d¡g¡}#|durtj || .| d¡d¡gdd}|dur,tj || .| d¡d¡gdd}|! /¡  ||| j | j¡ dd¡}!|"durS|" /¡  d|| j | j0¡ dd¡}"|#duri|# /¡  d|| j | j¡ dd¡}#| dur"d| v r¤| d }$|$dus~J ‚|$ || j d| j¡}%|r|%}"n|"dus–J ‚tj |%|"gdd}"|" d¡}d| v rÕ| d }&|&dus´J ‚|& || j d| j¡}'|rÅ|'}#n|#dusÌJ ‚tj |'|#gdd}#d}(d| v rà| d }(|"durê|#dusìJ ‚t1j2||(||" d¡|d}|" || jd| j¡| d< |# || jd| j¡| d< || d< |dusJ ‚|  3|| ¡}|"dus)J ‚|" d¡|ks3J ‚|durA| 4¡ dkrAd}|durZ| d¡|ksPJ ‚| d¡|ksZJ ‚| j$rÈ|#duseJ ‚|d7 }tj |"|" .|" d¡df|" ¡ dd…  ¡gdd}"tj |#|# .|# d¡df|# ¡ dd…  ¡gdd}#|dur°tj || .| d¡d¡gdd}|durÈtj |t 5| d¡d¡ 6|¡gdd}t 7|!|" dd¡¡})|  8|)|||¡})t|) ¡ ƒ|| j ||gksëJ ‚|durù| d¡}|)|7 })|dur;|) || j||¡})|s|) 9| d¡ d¡ :tj;¡t<dƒ¡})n|) dd¡})|) 9|t<dƒ¡})|) dd¡})|) || j ||¡})|	rC|)|#|fS |dur| jdkr’|! || j|| j¡}| ¡ \}}}}t |  |¡ |||dd	¡jdd
d¡jddd\}}||| j d  d }| || j dd¡| }| |) ¡ ¡}|)| })tj=|)dd}*|* 6|)¡})|  %|)¡}+|#dusµJ ‚t 7|+|#¡}t| ¡ ƒ|| j || jgksÍJ ‚| dd¡ /¡  |||¡}|  '|¡}d})|rú|* || j||¡ dd¡})|
sú|)j>dd})||)|fS )a¥  Input shape: Time x Batch x Channel

        Args:
            key_padding_mask (ByteTensor, optional): mask to exclude
                keys that are pads, of shape `(batch, src_len)`, where
                padding elements are indicated by 1s.
            need_weights (bool, optional): return the attention weights,
                averaged over heads (default: False).
            attn_mask (ByteTensor, optional): typically used to
                implement causal attention, where the mask prevents the
                attention from looking forward in time (default: None).
            before_softmax (bool, optional): return the raw attention
                weights and values before the attention softmax.
            need_head_weights (bool, optional): return the attention
                weights for each head. Implies *need_weights*. Default:
                return the average attention weights over all heads.
        TÚxlaNrA   r   r@   r   r[   r„   F)Úkeepdim©Údimg      ð?g       @)Úuse_separate_proj_weightÚq_proj_weightÚk_proj_weightÚv_proj_weightÚprev_keyÚ
prev_valueÚprev_key_padding_mask)rè   rú   Ú
batch_sizeÚsrc_lenrë   z-inf)?rx   Útyperˆ   r¦   ÚlistrF   ÚjitÚis_scriptingÚshaper­   rå   r   r‘   rŒ   rª   r²   r±   r»   r   rÝ   rL   r¼   ÚsumÚchunkr¾   r€   r,   rÐ   r   r'   Úmulti_head_attention_forwardÚemptyÚcatr   r¸   r¹   rº   r¬   rœ   r·   r+   r‡   Ú_get_input_bufferr¶   rµ   r´   Ú	new_zerosÚ
contiguousr³   r~   Ú_append_prev_key_padding_maskÚ_set_input_bufferró   r‰   r.   ÚbmmÚapply_sparse_maskr“   rw   r’   r)   Úsoftmaxrr   ),r   Úqueryræ   rç   rè   ré   rê   rë   rì   rí   rî   rï   Úis_tpuÚtgt_lenÚbszr¦   rü   Úkey_bszÚ_Úattn_mask_rel_posÚquery_layerÚnew_x_shapeÚ_BÚ_HÚ_LÚ__Úgate_aÚgate_bÚgate_a_1Úk_proj_biasr   ÚattnÚsaved_stateÚqr¡   ÚvÚ	_prev_keyrø   Ú_prev_valuerù   rú   Úattn_weightsÚattn_weights_floatÚ
attn_probsr   r   r   r   ¨  sª  
&ÿ
ÿü

 ÿ
þ


ê


€










 
þû"
"
"




û
22
 
þû"


þ

 ÿ
þ

$

zMultiheadAttention.forwardrú   rû   rü   c                 C   s  |d ur
|r
|}|S |d ur!| d ur!t j| ¡ |  ¡ gdd}|S |d urP|| d¡krJt j||| d¡ f|jd}t j| ¡ | ¡ gdd}|S | ¡ }|S | d ur||  d¡kryt j|||  d¡ f| jd}t j| ¡ |  ¡ gdd}|S |  ¡ }|S |}|S )Nr@   rò   r†   )rF   r  r)   rˆ   r‰   rx   )rè   rú   rû   rü   rë   Únew_key_padding_maskÚfillerr   r   r   r
  È  s8   	çìþôõþýÿz0MultiheadAttention._append_prev_key_padding_maskc                 C   s    |   |d¡}|d ur|S i }|S ©NÚ
attn_state)Úget_incremental_state)r   ré   ÚresultÚempty_resultr   r   r   r  î  s
   z$MultiheadAttention._get_input_bufferÚbufferc                 C   s   |   |d|¡S r+  )Úset_incremental_state)r   ré   r0  r   r   r   r  ø  s   z$MultiheadAttention._set_input_bufferr  r  c                 C   s   |S r
   r   )r   r&  r  rü   r  r   r   r   r  ÿ  s   z$MultiheadAttention.apply_sparse_mask)NNrq   TFFFFrq   r£   Fr¤   r¥   FF)T)NNTFNFFN)r   r   r   rJ   r   r¿   rÚ   rå   r   r   r   Ústrr’   r   r   r>   r   r
  r  r  r  r   r   r   r   r   r~     sž    	îL
ôýüûúùø	÷
öõô
ó  "ÿþýüûú%ÿ
þ
þ
ýr~   )!r^   rl   Útypingr   r   r   rF   Útorch.nn.functionalr   re   r'   r   Útorch.nnr   ÚModuler	   Ú	LayerNormr   Ú	GroupNormr4   ÚautogradÚFunctionr7   r?   rE   rK   r]   rO   r2  rp   rƒ   r¢   r~   r   r   r   r   Ú<module>   s(   	!!V