o
    
j$                     @   s   d Z ddlZddlmZ ddlZddlmZ ddlmZ ddlm	Z
 G dd dejZG d	d
 d
ejZG dd dejZG dd dejZeG dd deZG dd dejZdS )zc
Much of this code is adapted from Andrej Karpathy's NanoGPT
(https://github.com/karpathy/nanoGPT)
    N)	dataclass)Coqpit)nn)
functionalc                       s(   e Zd ZdZ fddZdd Z  ZS )	LayerNormzNLayerNorm but with an optional bias. PyTorch doesn't support simply bias=Falsec                    s@   t    tt|| _|rtt|| _d S d | _d S N)	super__init__r   	Parametertorchonesweightzerosbias)selfndimr   	__class__ L/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/bark/model.pyr	      s   
$zLayerNorm.__init__c                 C   s   t || jj| j| jdS )Ngh㈵>)F
layer_normr   shaper   r   xr   r   r   forward   s   zLayerNorm.forward)__name__
__module____qualname____doc__r	   r   __classcell__r   r   r   r   r      s    r   c                       &   e Zd Z fddZdddZ  ZS )CausalSelfAttentionc              	      s   t    |j|j dksJ tj|jd|j |jd| _tj|j|j|jd| _t	|j
| _t	|j
| _|j| _|j| _|j
| _
ttjjd| _| jsg| dtt|j|jdd|j|j d S d S )Nr      r   scaled_dot_product_attentionr      )r   r	   n_embdn_headr   Linearr   c_attnc_projDropoutdropoutattn_dropoutresid_dropouthasattrr   r   flashregister_buffertrilr   
block_sizeviewr   configr   r   r   r	      s$   
zCausalSelfAttention.__init__NFc                 C   s  |  \}}}| |j| jdd\}}}	|||| j|| j dd}|||| j|| j dd}|	||| j|| j dd}	|d urc|d }
|d }tj|
|fdd}tj||	fdd}	|j	d }|du rq||	f}nd }| j
r|d ur}d}nd}tjjj|||	| j|d}n=||dd	 d
t| d	  }|| jd d d d || |d |f dktd}tj|d	d}| |}||	 }|dd |||}| | |}||fS )N   dimr&   r   TF)	dropout_p	is_causalg      ?z-inf)sizer*   splitr'   r5   r(   	transposer   catr   r1   r   r   r%   r-   mathsqrtmasked_fillr   floatr   softmaxr.   
contiguousr/   r+   )r   r   past_kv	use_cacheBTCqkvpast_key
past_valueFULL_Tpresentr=   yattr   r   r   r   4   s6      

$6
zCausalSelfAttention.forwardNFr   r   r   r	   r   r    r   r   r   r   r"      s    r"   c                       s$   e Zd Z fddZdd Z  ZS )MLPc                    s^   t    tj|jd|j |jd| _tjd|j |j|jd| _t|j	| _	t
 | _d S )N   r$   )r   r	   r   r)   r'   r   c_fcr+   r,   r-   GELUgelur6   r   r   r   r	   g   s
   
zMLP.__init__c                 C   s,   |  |}| |}| |}| |}|S r   )r[   r]   r+   r-   r   r   r   r   r   n   s
   



zMLP.forwardrX   r   r   r   r   rY   f   s    rY   c                       r!   )Blockc                    sL   t    t|j|jd| _t|| _t|j|jd| _t	|| _
|| _d S )Nr$   )r   r	   r   r'   r   ln_1r"   attnln_2rY   mlp	layer_idx)r   r7   rc   r   r   r   r	   w   s   



zBlock.__init__NFc                 C   s>   | j | |||d\}}|| }|| | | }||fS )NrI   rJ   )r`   r_   rb   ra   )r   r   rI   rJ   attn_outputprev_kvsr   r   r   r      s   zBlock.forwardrW   rX   r   r   r   r   r^   v   s    r^   c                   @   sn   e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	eed< d	Z
eed
< dZeed< dZeed< dS )	GPTConfigi   r4   i@'  input_vocab_sizeoutput_vocab_size   n_layerr(   i   r'   g        r-   Tr   N)r   r   r   r4   int__annotations__rh   ri   rk   r(   r'   r-   rF   r   boolr   r   r   r   rg      s   
 rg   c                       s0   e Zd Z fddZd
ddZddd	Z  ZS )GPTc                    s   t     jd usJ  jd usJ  jd usJ  | _ttt	 j j
t	 j j
t jt fddt jD t j
 jdd| _tj j
 jdd| _d S )Nc                    s   g | ]}t  |qS r   )r^   ).0idxr7   r   r   
<listcomp>   s    z GPT.__init__.<locals>.<listcomp>r$   )wtewpedrophln_fF)r   r	   rh   ri   r4   r7   r   
ModuleDictdict	Embeddingr'   r,   r-   
ModuleListrangerk   r   r   transformerr)   lm_headr6   r   rr   r   r	      s   

	zGPT.__init__Tc                 C   sB   t dd |  D }|r|| jjj 8 }|| jjj 8 }|S )a8  
        Return the number of parameters in the model.
        For non-embedding count (default), the position embeddings get subtracted.
        The token embeddings would too, except due to the parameter sharing these
        params are actually used as weights in the final layer, so we include them.
        c                 s   s    | ]}|  V  qd S r   )numel)rp   pr   r   r   	<genexpr>   s    z%GPT.get_num_params.<locals>.<genexpr>)sum
parametersr~   rt   r   r   ru   )r   non_embeddingn_paramsr   r   r   get_num_params   s
   zGPT.get_num_paramsFNc                 C   s  |j }| \}}|d ur|dksJ | j|}	n]|r-|jd dks%J |jd d }n|| jjks?J d| d| jj |rqtj| j|d d d df | j|d d ddf  | j|d d dd f gdd}	n| j|}	|d u rd}
t	d gt
| jj }n	|d d d	}
|d u rtj|
||
 tj|d
}|d}|jd|fksJ | j|}| j|	| }|rdnd }tt| jj|D ]\}\}}||||d\}}|r||f }q| j|}| |d d dgd d f }||fS )Nr&   i     z"Cannot forward sequence of length z, block size is only i   r9   r   r;   )dtypedevicer   rd   r>   )r   r?   r~   rt   r   r7   r4   r   rB   tuplelenrw   arangelong	unsqueezeru   rv   	enumerateziprx   r   )r   rq   merge_contextrI   position_idsrJ   r   _ttok_embpast_lengthpos_embr   new_kvblockpast_layer_kvkvlogitsr   r   r   r      sL   6

zGPT.forward)T)FNNF)r   r   r   r	   r   r   r    r   r   r   r   ro      s    
ro   )r   rC   dataclassesr   r   coqpitr   r   torch.nnr   r   Moduler   r"   rY   r^   rg   ro   r   r   r   r   <module>   s    L