o
    
j                     @   s   d Z ddlZddlmZ ddlZddlmZ ddlmZ ddl	m
Z
mZmZ G dd	 d	ejZG d
d dejZG dd de
ZeG dd deZdS )zc
Much of this code is adapted from Andrej Karpathy's NanoGPT
(https://github.com/karpathy/nanoGPT)
    N)	dataclass)nn)
functional   )GPTMLP	GPTConfigc                       $   e Zd Z fddZdd Z  ZS )NonCausalSelfAttentionc                    s   t    |j|j dksJ tj|jd|j |jd| _tj|j|j|jd| _t	|j
| _t	|j
| _|j| _|j| _|j
| _
ttjjdoN| j
dk| _d S )Nr      biasscaled_dot_product_attentiong        )super__init__n_embdn_headr   Linearr   c_attnc_projDropoutdropoutattn_dropoutresid_dropouthasattrtorchr   flashselfconfig	__class__ Q/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/bark/model_fine.pyr      s   
zNonCausalSelfAttention.__init__c           
      C   s   |  \}}}| |j| jdd\}}}|||| j|| j dd}|||| j|| j dd}|||| j|| j dd}| jrWtj	j
j|||d | jdd}n"||dd dt| d  }	tj|	dd}	| |	}	|	| }|dd |||}| | |}|S )	N   dimr   F)	attn_mask	dropout_p	is_causalg      ?)sizer   splitr   viewr   	transposer   r   r   r   r   r   mathsqrtFsoftmaxr   
contiguousr   r   )
r   xBTCqkvyattr"   r"   r#   forward    s       $
zNonCausalSelfAttention.forward__name__
__module____qualname__r   r>   __classcell__r"   r"   r    r#   r
      s    r
   c                       r	   )	FineBlockc                    s>   t    t|j| _t|| _t|j| _t	|| _
d S N)r   r   r   	LayerNormr   ln_1r
   attnln_2r   mlpr   r    r"   r#   r   =   s
   

zFineBlock.__init__c                 C   s,   ||  | | }|| | | }|S rE   )rH   rG   rJ   rI   )r   r5   r"   r"   r#   r>   D   s   zFineBlock.forwardr?   r"   r"   r    r#   rD   <   s    rD   c                       s.   e Zd Z fddZdd ZdddZ  ZS )	FineGPTc                    s   t    | ` | _ j| _ttt fddt	 jD t
 j jt jt fddt	 jD t jd| _t fddt	 j| jD | _t	| j j D ]}| j| j| jj|d  _q^d S )Nc                    s   g | ]
}t  j jqS r"   )r   	Embeddinginput_vocab_sizer   .0_r   r"   r#   
<listcomp>S   s    z$FineGPT.__init__.<locals>.<listcomp>c                    s   g | ]}t  qS r"   )rD   rN   rQ   r"   r#   rR   W   s    )wteswpedrophln_fc                    s    g | ]}t j j jd dqS )Fr   )r   r   r   output_vocab_sizerN   rQ   r"   r#   rR   \   s    r   )r   r   lm_headr   n_codes_totalr   
ModuleDictdict
ModuleListrangerL   
block_sizer   r   r   n_layerrF   transformern_codes_givenlm_headsweightrS   )r   r   ir    rQ   r#   r   K   s.   


zFineGPT.__init__c                    s&   j }  \}}}|| jjksJ d| d| jj |dks$J d|| jks0J |||ftjd|tj|dd} fddt	| j
jD }tj|dd	}	| j
|}
|	d d d d d d d |d
 f jdd	}| j
||
 }| j
jD ]}||}qy| j
|}| j|| jj  |}|S )Nz"Cannot forward sequence of length z, block size is only r   zcannot predict 0th codebook)dtypedevicec                    s0   g | ]\}}| d d d d |f  dqS )Nr+   )	unsqueeze)rO   re   wteidxr"   r#   rR   o   s    $z#FineGPT.forward.<locals>.<listcomp>r+   r%   r   )rg   r,   r   r_   rZ   r   arangelongrh   	enumeratera   rS   catrT   sumrU   rV   rW   rc   rb   )r   pred_idxrk   rg   btcodespostok_embstok_embpos_embr5   blocklogitsr"   rj   r#   r>   d   s(   

,
zFineGPT.forwardTc                 C   sL   t dd |  D }|r$| jjD ]	}||j 8 }q|| jjj 8 }|S )a8  
        Return the number of parameters in the model.
        For non-embedding count (default), the position embeddings get subtracted.
        The token embeddings would too, except due to the parameter sharing these
        params are actually used as weights in the final layer, so we include them.
        c                 s   s    | ]}|  V  qd S rE   )numel)rO   pr"   r"   r#   	<genexpr>   s    z)FineGPT.get_num_params.<locals>.<genexpr>)rp   
parametersra   rS   rd   r{   rT   )r   non_embeddingn_paramsri   r"   r"   r#   get_num_params|   s   zFineGPT.get_num_params)T)r@   rA   rB   r   r>   r   rC   r"   r"   r    r#   rK   J   s    rK   c                   @   s&   e Zd ZU dZeed< dZeed< dS )FineGPTConfig   rZ   r   rb   N)r@   rA   rB   rZ   int__annotations__rb   r"   r"   r"   r#   r      s   
 r   )__doc__r0   dataclassesr   r   r   torch.nnr   r2   modelr   r   r   Moduler
   rD   rK   r   r"   r"   r"   r#   <module>   s    -A