o
    
jX2                     @   s   d dl Z d dlZd dlmZmZmZmZmZ d dlZd dl	Z
d dlZd dlmZ d dlmZ d dlmZ defddZd	edefd
dZG dd dZG dd deZdS )    N)AnyDictListTupleUnion)load_config)setup_encoder_model)AudioProcessorpathc                 C   s   |  dr"t| d}t|W  d    S 1 sw   Y  d S |  drFt| d}tj|ddW  d    S 1 s?w   Y  d S td)N.jsonr.pthrbcpu)map_locationUnsupported file type)endswithfsspecopenjsonloadtorch
ValueError)r
   f r   I/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/utils/managers.py	load_file   s   
$
$r   objc                 C   s   | dr&t|d}tj| |dd W d    d S 1 sw   Y  d S | drJt|d}t| | W d    d S 1 sCw   Y  d S td)Nr   w   indentr   wbr   )r   r   r   r   dumpr   saver   )r   r
   r   r   r   r   	save_file   s   
"
"r%   c                   @   s   e Zd ZdZddefddZededefdd	Zeded
e	ddfddZ
dededdfddZdeddfddZdeddfddZdefddZedededee fddZdS )BaseIDManagerz~Base `ID` Manager class. Every new `ID` manager must inherit this.
    It defines common `ID` manager specific functions.
     id_file_pathc                 C   s   i | _ |r| | d S d S N)
name_to_idload_ids_from_file)selfr(   r   r   r   __init__)   s   zBaseIDManager.__init__json_file_pathreturnc                 C   s:   t | d}t|W  d    S 1 sw   Y  d S )Nr   )r   r   r   r   )r.   r   r   r   r   
_load_json/   s   $zBaseIDManager._load_jsondataNc                 C   sB   t | d}tj||dd W d    d S 1 sw   Y  d S )Nr   r   r    )r   r   r   r#   )r.   r1   r   r   r   r   
_save_json4   s   "zBaseIDManager._save_jsonitems	parse_keyc                 C   s   | j ||d| _dS )z{Set IDs from data samples.

        Args:
            items (List): Data sampled returned by `load_tts_samples()`.
        r4   N)parse_ids_from_datar*   )r,   r3   r4   r   r   r   set_ids_from_data9   s   zBaseIDManager.set_ids_from_data	file_pathc                 C   s   t || _dS )z[Set IDs from a file.

        Args:
            file_path (str): Path to the file.
        N)r   r*   r,   r8   r   r   r   r+   A   s   z BaseIDManager.load_ids_from_filec                 C      t | j| dS )zfSave IDs to a json file.

        Args:
            file_path (str): Path to the output file.
        N)r%   r*   r9   r   r   r   save_ids_to_fileI      zBaseIDManager.save_ids_to_filec                 C   s(   | j r| j tt| j  d  S dS )dGet a random embedding.

        Args:

        Returns:
            np.ndarray: embedding.
        r   N)r*   randomchoiceslistkeysr,   r   r   r   get_random_idQ   s   zBaseIDManager.get_random_idc                    s,   t  fdd| D }dd t|D }|S )a  Parse IDs from data samples retured by `load_tts_samples()`.

        Args:
            items (list): Data sampled returned by `load_tts_samples()`.
            parse_key (str): The key to being used to parse the data.
        Returns:
            Tuple[Dict]: speaker IDs.
        c                    s   h | ]}|  qS r   r   ).0itemr5   r   r   	<setcomp>h       z4BaseIDManager.parse_ids_from_data.<locals>.<setcomp>c                 S      i | ]\}}||qS r   r   rD   inamer   r   r   
<dictcomp>i       z5BaseIDManager.parse_ids_from_data.<locals>.<dictcomp>)sorted	enumerate)r3   r4   classesidsr   r5   r   r6   ^   s   
z!BaseIDManager.parse_ids_from_data)r'   )__name__
__module____qualname____doc__strr-   staticmethodr   r0   dictr2   r   r7   r+   r;   r   rC   r   r6   r   r   r   r   r&   $   s     r&   c                       s  e Zd ZdZ					d9deeee f dedededef
 fd	d
Ze	dd Z
e	dd Ze	dd Ze	dd ZdeddfddZedefddZdeddfddZdee ddfddZdedefd d!Zd"edee fd#d$Zdefd%d&Zd:d"ed'ed(edejfd)d*Zdefd+d,Zdefd-d.Zd;d/ed0eddfd1d2Zd3eeee f defd4d5Z d6ee!j"ejf defd7d8Z#  Z$S )<EmbeddingManagerau  Base `Embedding` Manager class. Every new `Embedding` manager must inherit this.
    It defines common `Embedding` manager specific functions.

    It expects embeddings files in the following format:

    ::

        {
            'audio_file_key':{
                'name': 'category_name',
                'embedding'[<embedding_values>]
            },
            ...
        }

    `audio_file_key` is a unique key to the audio file in the dataset. It can be the path to the file or any other unique key.
    `embedding` is the embedding vector of the audio file.
    `name` can be name of the speaker of the audio file.
    r'   Fembedding_file_pathr(   encoder_model_pathencoder_config_pathuse_cudac                    sx   t  j|d i | _i | _g | _d | _d | _|| _|r+t|t	r&| 
| n| | |r8|r:| ||| d S d S d S )N)r(   )superr-   
embeddingsembeddings_by_namesclip_idsencoder
encoder_apr]   
isinstancer@   "load_embeddings_from_list_of_filesload_embeddings_from_fileinit_encoder)r,   rZ   r(   r[   r\   r]   	__class__r   r   r-      s   

zEmbeddingManager.__init__c                 C   
   t | jS zGet number of embeddings.)lenr_   rB   r   r   r   num_embeddings      
zEmbeddingManager.num_embeddingsc                 C   rj   rk   )rl   r`   rB   r   r   r   	num_names   rn   zEmbeddingManager.num_namesc                 C   s*   | j rt| j t| j  d  d S dS )zIDimensionality of embeddings. If embeddings are not loaded, returns zero.r   	embedding)r_   rl   r@   rA   rB   r   r   r   embedding_dim   s    zEmbeddingManager.embedding_dimc                 C      t | j S )zGet embedding names.)r@   r`   rA   rB   r   r   r   embedding_names   s   z EmbeddingManager.embedding_namesr8   r/   Nc                 C   r:   )zmSave embeddings to a json file.

        Args:
            file_path (str): Path to the output file.
        N)r%   r_   r9   r   r   r   save_embeddings_to_file   r<   z(EmbeddingManager.save_embeddings_to_filec                 C   s   t | }tdd | D }dd t|D }tttdd | D }i }| D ]}|d | vrA|d g||d < q-||d  |d  q-||||fS )	zhLoad embeddings from a json file.

        Args:
            file_path (str): Path to the file.
        c                 S   s   h | ]}|d  qS )rK   r   )rD   xr   r   r   rF      rG   z=EmbeddingManager.read_embeddings_from_file.<locals>.<setcomp>c                 S   rH   r   r   rI   r   r   r   rL      rM   z>EmbeddingManager.read_embeddings_from_file.<locals>.<dictcomp>c                 s   s    | ]}|V  qd S r)   r   )rD   	clip_namer   r   r   	<genexpr>   s    z=EmbeddingManager.read_embeddings_from_file.<locals>.<genexpr>rK   rp   )r   rN   valuesrO   r@   setrA   append)r8   r_   speakersr*   ra   r`   ru   r   r   r   read_embeddings_from_file   s   z*EmbeddingManager.read_embeddings_from_filec                 C   s   |  |\| _| _| _| _dS )ztLoad embeddings from a json file.

        Args:
            file_path (str): Path to the target json file.
        N)r|   r*   ra   r_   r`   r9   r   r   r   rf      s   z*EmbeddingManager.load_embeddings_from_file
file_pathsc                 C   s   i | _ g | _i | _i | _|D ]<}| |\}}}}t| j t| @ }|r2td| d| | j | | j	| | j| | j| qdd t
| j D | _ dS )zLoad embeddings from a list of json files and don't allow duplicate keys.

        Args:
            file_paths (List[str]): List of paths to the target json files.
        z  [!] Duplicate embedding names <z> in c                 S   rH   r   r   rI   r   r   r   rL      rM   zGEmbeddingManager.load_embeddings_from_list_of_files.<locals>.<dictcomp>N)r*   ra   r`   r_   r|   ry   rA   r   updateextendrO   )r,   r}   r8   rQ   ra   r_   r`   
duplicatesr   r   r   re      s   z3EmbeddingManager.load_embeddings_from_list_of_filesclip_idxc                 C   s   | j | d S )zGet embedding by clip ID.

        Args:
            clip_idx (str): Target clip ID.

        Returns:
            List: embedding as a list.
        rp   )r_   )r,   r   r   r   r   get_embedding_by_clip   s   	z&EmbeddingManager.get_embedding_by_clipidxc                 C   s
   | j | S )zGet all embeddings of a speaker.

        Args:
            idx (str): Target name.

        Returns:
            List[List]: all the embeddings of the given speaker.
        )r`   )r,   r   r   r   r   get_embeddings_by_name   s   
	z'EmbeddingManager.get_embeddings_by_namec                 C   sR   i }| j  D ]}|d | vr|d g||d < q||d  |d  q|S )zmGet all embeddings by names.

        Returns:
            Dict: all the embeddings of each speaker.
        rK   rp   )r_   rx   rA   rz   )r,   r`   ru   r   r   r   get_embeddings_by_names  s   z(EmbeddingManager.get_embeddings_by_namesnum_samples	randomizec                 C   s   |  |}|du rt|d}|S t||ks#J d| d| |r4ttj||dd}|S t|d| d}|S )aS  Get mean embedding of a idx.

        Args:
            idx (str): Target name.
            num_samples (int, optional): Number of samples to be averaged. Defaults to None.
            randomize (bool, optional): Pick random `num_samples` of embeddings. Defaults to False.

        Returns:
            np.ndarray: Mean embedding.
        Nr   z [!] z has number of samples < )k)r   npstackmeanrl   r>   r?   )r,   r   r   r   r_   r   r   r   get_mean_embedding  s   
 z#EmbeddingManager.get_mean_embeddingc                 C   s,   | j r| j tt| j  d  d S dS )r=   r   rp   N)r_   r>   r?   r@   rA   rB   r   r   r   get_random_embedding+  s   "z%EmbeddingManager.get_random_embeddingc                 C   rr   r)   )rN   r_   rA   rB   r   r   r   	get_clips8  s   zEmbeddingManager.get_clips
model_pathconfig_pathc                 C   sN   || _ t|| _t| j| _| jj| j|d|dd| _tdi | jj| _	dS )zInitialize a speaker encoder model.

        Args:
            model_path (str): Model file path.
            config_path (str): Model config file path.
            use_cuda (bool, optional): Use CUDA. Defaults to False.
        T)evalr]   cacheNr   )
r]   r   encoder_configr   rb   load_checkpointencoder_criterionr	   audiorc   )r,   r   r   r]   r   r   r   rg   ;  s   
zEmbeddingManager.init_encoderwav_filec                    sp   dt f fdd}t|tr.d}|D ]}||}|du r|}q||7 }q|t| d  S ||}|d  S )zCompute a embedding from a given audio file.

        Args:
            wav_file (Union[str, List[str]]): Target file path.

        Returns:
            list: Computed embedding.
        r   c                    sn    j j|  j jd} jjdds j |}t|}nt|} j	r*|
 }|d} j|}|S )N)sruse_torch_specFr   )rc   load_wavsample_rater   model_paramsgetmelspectrogramr   
from_numpyr]   cuda	unsqueezerb   compute_embedding)r   waveformm_inputrp   rB   r   r   _computeU  s   

z>EmbeddingManager.compute_embedding_from_clip.<locals>._computeNr   )rV   rd   r@   rl   tolist)r,   r   r   r_   wfrp   r   rB   r   compute_embedding_from_clipK  s   


z,EmbeddingManager.compute_embedding_from_clipfeatsc                 C   sD   t |tjrt|}|jdkr|d}| jr| }| j	
|S )zCompute embedding from features.

        Args:
            feats (Union[torch.Tensor, np.ndarray]): Input features.

        Returns:
            List: computed embedding.
           r   )rd   r   ndarrayr   r   ndimr   r]   r   rb   r   )r,   r   r   r   r   compute_embeddingsp  s   	


z#EmbeddingManager.compute_embeddings)r'   r'   r'   r'   F)NF)F)%rR   rS   rT   rU   r   rV   r   boolr-   propertyrm   ro   rq   rs   rt   rW   r|   rf   re   r   r   r   r   intr   r   r   r   r   r   rg   r@   r   r   Tensorr   __classcell__r   r   rh   r   rY   m   sP    




&%rY   )r   r>   typingr   r   r   r   r   r   numpyr   r   
TTS.configr   TTS.encoder.utils.generic_utilsr   TTS.utils.audior	   rV   r   r%   r&   rY   r   r   r   r   <module>   s    I