o
    
j                     @   s  d dl Z d dlmZ d dlZd dlZd dlmZ d dlm  m	Z
 d dlmZ dee dejfddZded	ed
ejdejfddZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZdS )    N)Tuple)ConvNormshapereturnc                 C   s0   t | dks
J dt| td| d   S )N   z.Can only initialize 2-D embedding matrices ...   )lentorchrandnnpsqrt)r    r   Y/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/delightful_tts/networks.pyinitialize_embeddings   s   r   d_modellengthdevicec                 C   s   t j|| |d}t jd|t j|dd}t t jd| d|d td|    }t || |d d dd df< t 	|| |d d dd df< |d}|S )N)r   r   )dtyper   r   r   g     @)
r	   zerosarangefloat	unsqueezeexpmathlogsincos)r   r   r   pepositiondiv_termr   r   r   positional_encoding   s   ,  
r    c                       s2   e Zd ZdZ				d
 fdd	Zdd	 Z  ZS )BottleneckLayera  
    Bottleneck layer for reducing the dimensionality of a tensor.

    Args:
        in_dim: The number of input dimensions.
        reduction_factor: The factor by which to reduce the number of dimensions.
        norm: The normalization method to use. Can be "weightnorm" or "instancenorm".
        non_linearity: The non-linearity to use. Can be "relu" or "leakyrelu".
        kernel_size: The size of the convolutional kernel.
        use_partial_padding: Whether to use partial padding with the convolutional kernel.

    Shape:
        - Input: :math:`[N, in_dim]` where `N` is the batch size and `in_dim` is the number of input dimensions.

        - Output: :math:`[N, out_dim]` where `out_dim` is the number of output dimensions.
    
weightnormrelu   Fc           	         s   t t|   || _t|| }|| _| jdkrDt||||dkd}|dkr1t|tj	|dd}|| _
t | _|dkrFt | _d S d S d S )Nr   r"   )kernel_sizeuse_weight_norminstancenormT)affine	leakyrelu)superr!   __init__reduction_factorintout_dimr   nn
SequentialInstanceNorm1dprojection_fnReLUnon_linearity	LeakyReLU)	selfin_dimr,   normr4   r%   use_partial_paddingreduced_dimfn	__class__r   r   r+   .   s   	

zBottleneckLayer.__init__c                 C   s"   | j dkr| |}| |}|S )Nr   )r,   r2   r4   r6   xr   r   r   forwardF   s   


zBottleneckLayer.forward)r"   r#   r$   F)__name__
__module____qualname____doc__r+   r@   __classcell__r   r   r<   r   r!      s    r!   c                       s<   e Zd ZdZdef fddZdejdejfddZ  Z	S )	GLUActivationzClass that implements the Gated Linear Unit (GLU) activation function.

    The GLU activation function is a variant of the Leaky ReLU activation function,
    where the output of the activation function is gated by an input tensor.

    slopec                    s   t    t|| _d S N)r*   r+   r/   r5   lrelu)r6   rG   r<   r   r   r+   U   s   
zGLUActivation.__init__r?   r   c                 C   s$   |j ddd\}}|| | }|S )Nr   r   dim)chunkrI   )r6   r?   outgater   r   r   r@   Y   s   zGLUActivation.forward)
rA   rB   rC   rD   r   r+   r	   Tensorr@   rE   r   r   r<   r   rF   M   s    rF   c                       sJ   e Zd Zdedededef fddZdejdejd	ejfd
dZ  ZS )StyleEmbedAttention	query_dimkey_dim	num_units	num_headsc                    sV   t    || _|| _|| _tj||dd| _tj||dd| _tj||dd| _	d S )NF)in_featuresout_featuresbias)
r*   r+   rS   rT   rR   r/   LinearW_queryW_keyW_value)r6   rQ   rR   rS   rT   r<   r   r   r+   `   s   
zStyleEmbedAttention.__init__querykey_softr   c           	      C   s   |  |}| j| j }tjtj||dddd}d  }}| |}| |}tjtj||dddd}tjtj||dddd}t||	dd}|| j
d  }tj|dd}t||}tjtj|dddddd}|S )Nr   rJ   r   r$         ?r   )r[   rS   rT   r	   stacksplitrY   rZ   matmul	transposerR   Fsoftmaxcatsqueeze)	r6   r\   r]   values
split_sizeout_softscores_softqueryskeysr   r   r   r@   j   s   


 zStyleEmbedAttention.forward	rA   rB   rC   r-   r+   r	   rO   r@   rE   r   r   r<   r   rP   _   s    $
rP   c                       s@   e Zd Zdededef fddZdejdejfdd	Z  ZS )
EmbeddingPaddednum_embeddingsembedding_dimpadding_idxc                    sL   t    tj|dftjd}d||< | d| tjt	||f| _
d S )Nr   )r   r   padding_mult)r*   r+   r	   onesint64register_bufferr/   	parameter	Parameterr   
embeddings)r6   ro   rp   rq   rr   r<   r   r   r+      s
   
zEmbeddingPadded.__init__idxr   c                 C   s   | j | j }t||}|S rH   )rx   rr   rc   	embedding)r6   ry   embeddings_zeroedr?   r   r   r   r@      s   zEmbeddingPadded.forwardrm   r   r   r<   r   rn      s    rn   c                       s8   e Zd Zdef fddZdejdejfddZ  ZS )EmbeddingProjBlockrp   c                    s>   t    tt||tdt||tdg| _d S )Ng333333?)r*   r+   r/   
ModuleListrX   r5   layers)r6   rp   r<   r   r   r+      s   



zEmbeddingProjBlock.__init__r?   r   c                 C   s$   |}| j D ]}||}q|| }|S rH   )r~   )r6   r?   reslayerr   r   r   r@      s
   

zEmbeddingProjBlock.forwardrm   r   r   r<   r   r|      s    r|   c                       sB   e Zd Zddededef fddZdejdejfd	d
Z  Z	S )
LinearNormFrU   rV   rW   c                    sH   t    t|||| _tj| jj |r"tj| jj	d d S d S )Ng        )
r*   r+   r/   rX   linearinitxavier_uniform_weight	constant_rW   )r6   rU   rV   rW   r<   r   r   r+      s   
zLinearNorm.__init__r?   r   c                 C   s   |  |}|S rH   )r   r>   r   r   r   r@      s   
zLinearNorm.forward)F)
rA   rB   rC   r-   boolr+   r	   rO   r@   rE   r   r   r<   r   r      s    r   c                       s@   e Zd ZdZdedef fddZdejdejfdd	Z  Z	S )
STLa0  
    A PyTorch module for the Style Token Layer (STL) as described in
    "A Style-Based Generator Architecture for Generative Adversarial Networks"
    (https://arxiv.org/abs/1812.04948)

    The STL applies a multi-headed attention mechanism over the learned style tokens,
    using the text input as the query and the style tokens as the keys and values.
    The output of the attention mechanism is used as the text's style embedding.

    Args:
        token_num (int): The number of style tokens.
        n_hidden (int): Number of hidden dimensions.
    n_hidden	token_numc                    sr   t t|   d}|}|| _tt| j|| | _|d }|| }t	||||d| _
tjjj| jddd d S )Nr   r   )rQ   rR   rS   rT   r   r^   )meanstd)r*   r   r+   r   r/   rw   r	   FloatTensorembedrP   	attentionr   normal_)r6   r   r   rT   Ed_qd_kr<   r   r   r+      s   zSTL.__init__r?   r   c                 C   s@   | d}|d}t| jd|dd}| ||}|S )Nr   r   )sizer   r	   tanhr   expandr   )r6   r?   Nr\   	keys_softemotion_embed_softr   r   r   r@      s
   

zSTL.forward)
rA   rB   rC   rD   r-   r+   r	   rO   r@   rE   r   r   r<   r   r      s    r   )r   typingr   numpyr   r	   torch.nnr/   torch.nn.functional
functionalrc   )TTS.tts.layers.delightful_tts.conv_layersr   r-   rO   r   r   r    Moduler!   rF   rP   rn   r|   r   r   r   r   r   r   <module>   s     
1'