o
    ´‹
j€  ã                   @   st   d Z ddlZddlZddlmZ ddlZddlZddlmZm	Z	 G dd„ dej
ƒZG dd„ dƒZddefdd„ZdS )zN
Custom tokenizer model.
Author: https://www.github.com/gitmylo/
License: MIT
é    N)ÚZipFile)ÚnnÚoptimc                       s^   e Zd Zd‡ fdd„	Zdd„ Ze ¡ d	d
„ ƒZdd„ Zddd„Z	dd„ Z
eddd„ƒZ‡  ZS )ÚHubertTokenizeré   é   é'  r   c                    s¤   t ƒ  ¡  |}|dkrtj||ddd| _|}|dkr.tj||ddd| _t |d¡| _d}t ||¡| _tjdd| _	d | _
t ¡ | _|| _|| _|| _|| _d S )Nr   é   T)Úbatch_firsté   i   ©Údim)ÚsuperÚ__init__r   ÚLSTMÚlstmÚLinearÚintermediateÚfcÚ
LogSoftmaxÚsoftmaxÚ	optimizerÚCrossEntropyLossÚlossfuncÚ
input_sizeÚhidden_sizeÚoutput_sizeÚversion)Úselfr   r   r   r   Ú	next_size©Ú	__class__© úW/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/bark/hubert/tokenizer.pyr      s"   


zHubertTokenizer.__init__c                 C   s:   |   |¡\}}| jdkr|  |¡}|  |¡}|  |¡}|S )Nr   )r   r   r   r   r   )r   ÚxÚ_r"   r"   r#   Úforward%   s   



zHubertTokenizer.forwardc                 C   s   t j| |ƒddS )aº  
        Used to get the token for the first
        :param x: An array with shape (N, input_size) where N is a whole number greater or equal to 1, and input_size is the input size used when creating the model.
        :return: An array with shape (N,) where N is the same as N from the input. Every number in the array is a whole number in range 0...output_size - 1 where output_size is the output size used when creating the model.
        r   r   )ÚtorchÚargmax)r   r$   r"   r"   r#   Ú	get_token-   s   zHubertTokenizer.get_tokenc                 C   s   t  |  ¡ d¡| _d S )Ngü©ñÒMbP?)r   ÚAdamÚ
parametersr   )r   r"   r"   r#   Úprepare_training6   s   z HubertTokenizer.prepare_trainingFc                 C   sÒ   | j }| j}|  ¡  | |ƒ}t|ƒ}|jd }||kr&|| }	||	d … }n||k r9|| }	|d |	 …d d …f }t t|ƒ| j¡}
d|
tt|ƒƒ|f< |
 	d¡}
|||
ƒ}|r_t
d| ¡ ƒ | ¡  | ¡  d S )Nr   r   ÚcudaÚLoss)r   r   Ú	zero_gradÚlenÚshaper'   Úzerosr   ÚrangeÚtoÚprintÚitemÚbackwardÚstep)r   Úx_trainÚy_trainÚlog_lossr   r   Úy_predÚy_train_lenÚ
y_pred_lenÚdiffÚy_train_hotÚlossr"   r"   r#   Ú
train_step9   s(   


zHubertTokenizer.train_stepc                 C   s’   d  tj |¡ d¡d d… ¡d }t |  ¡ |¡ t| j	| j
| j| jƒ}t|dƒ}| || ¡ ¡ | ¡  W d   ƒ d S 1 sBw   Y  d S )NÚ.éÿÿÿÿú/.infoÚa)ÚjoinÚosÚpathÚbasenameÚsplitr'   ÚsaveÚ
state_dictÚDatar   r   r   r   r   ÚwritestrÚclose)r   rI   Ú	info_pathÚdata_from_modelÚ	model_zipr"   r"   r#   rL   `   s   $
"þzHubertTokenizer.saveNc                 C   s¼   d}t | ƒ,}dd„ | ¡ D ƒ}|r|d nd }|r'd}t | |¡ d¡¡}| ¡  W d   ƒ n1 s5w   Y  |r@tƒ }nt|j|j	|j
|jƒ}| tj| |d¡ |r\| |¡}|S )NTc                 S   s   g | ]	}|  d ¡r|‘qS )rE   )Úendswith)Ú.0Úfiler"   r"   r#   Ú
<listcomp>l   s    z8HubertTokenizer.load_from_checkpoint.<locals>.<listcomp>r   Fzutf-8)Úmap_location)r   ÚnamelistrN   ÚloadÚreadÚdecoderP   r   r   r   r   r   Úload_state_dictr'   r4   )rI   rX   ÚoldrS   Ú
filesMatchrV   rR   Úmodelr"   r"   r#   Úload_from_checkpointh   s*   

úü
z$HubertTokenizer.load_from_checkpoint)r   r   r   r   )F©N)Ú__name__Ú
__module__Ú__qualname__r   r&   r'   Úno_gradr)   r,   rB   rL   Ústaticmethodra   Ú__classcell__r"   r"   r    r#   r      s    

'r   c                   @   sL   e Zd ZU eed< eed< eed< eed< dd	d
„Zedd„ ƒZdd„ ZdS )rN   r   r   r   r   r   r   r   r   c                 C   s   || _ || _|| _|| _d S rb   ©r   r   r   r   )r   r   r   r   r   r"   r"   r#   r   ‡   s   
zData.__init__c                 C   s(   t  | ¡}t|d |d |d |d ƒS )Nr   r   r   r   )ÚjsonÚloadsrN   )ÚstringÚdatar"   r"   r#   rZ      s   
z	Data.loadc                 C   s    | j | j| j| jdœ}t |¡S )Nri   )r   r   r   r   rj   Údumps)r   rm   r"   r"   r#   rL   ’   s   ü
z	Data.saveN)r   r   r   r   )	rc   rd   re   ÚintÚ__annotations__r   rg   rZ   rL   r"   r"   r"   r#   rN      s   
 

rN   ú	model.pthr   Ú
load_modelc                 C   s†  g g }}|rt j |¡rtd|ƒ t |d¡}ntdƒ tdd d¡}t j | |¡}d | d¡d d… ¡}d}d	}	t j | d
¡}
t  	|
¡D ]$}t j |
|¡}| 
|¡r_| t |¡¡ qH| 
|	¡rl| t |¡¡ qH| ¡  d}	 t|ƒD ])}d}t||ƒD ]\}}| t |¡ d¡t |¡ d¡|d dk¡ |d7 }qqx|}|› d|› d}| |¡ | |¡ td|› dƒ |d7 }qt)NzLoading model fromr-   zCreating new model.r   )r   rC   rD   z_semantic.npyz_semantic_features.npyÚreadyr   é2   Ú_epoch_z.pthzEpoch z
 completed)rH   rI   Úisfiler5   r   ra   r4   rG   rK   ÚlistdirrT   ÚappendÚnumpyrZ   r,   r3   ÚziprB   r'   ÚtensorrL   )Ú	data_pathÚ	save_pathrr   Úsave_epochsÚdata_xÚdata_yÚmodel_trainingÚbase_save_pathÚ
sem_stringÚfeat_stringrs   Ú
input_fileÚ	full_pathÚepochr%   Újr$   ÚyÚsave_pÚsave_p_2r"   r"   r#   Ú
auto_trainœ   sH   



€&ÿ
ü

órŒ   )rq   Nr   )Ú__doc__rj   Úos.pathrH   Úzipfiler   ry   r'   r   r   ÚModuler   rN   ÚstrrŒ   r"   r"   r"   r#   Ú<module>   s    q