o
    
j%                     @   s   d dl Z d dlZd dlmZmZmZmZ d dlZd dlZ	d dl
Z
d dlmZ d dlmZ d dlmZ G dd deZdd	 Zd
d Zdd Zddededededef
ddZdefddZdS )    N)AnyDictListUnion)Coqpit)*get_from_config_or_model_args_with_default)EmbeddingManagerc                       s   e Zd ZdZ						ddeee  dededed	ed
ef fddZe	dd Z
e	dd ZdefddZeddddeee ee f dd fddZ  ZS )SpeakerManageruj  Manage the speakers for multi-speaker 🐸TTS models. Load a datafile and parse the information
    in a way that can be queried by speaker or clip.

    There are 3 different scenarios considered:

    1. Models using speaker embedding layers. The datafile only maps speaker names to ids used by the embedding layer.
    2. Models using d-vectors. The datafile includes a dictionary in the following format.

    ::

        {
            'clip_name.wav':{
                'name': 'speakerA',
                'embedding'[<d_vector_values>]
            },
            ...
        }


    3. Computing the d-vectors by the speaker encoder. It loads the speaker encoder model and
    computes the d-vectors for a given clip or speaker.

    Args:
        d_vectors_file_path (str, optional): Path to the metafile including x vectors. Defaults to "".
        speaker_id_file_path (str, optional): Path to the metafile that maps speaker names to ids used by
        TTS models. Defaults to "".
        encoder_model_path (str, optional): Path to the speaker encoder model file. Defaults to "".
        encoder_config_path (str, optional): Path to the spealer encoder config file. Defaults to "".

    Examples:
        >>> # load audio processor and speaker encoder
        >>> ap = AudioProcessor(**config.audio)
        >>> manager = SpeakerManager(encoder_model_path=encoder_model_path, encoder_config_path=encoder_config_path)
        >>> # load a sample audio and compute embedding
        >>> waveform = ap.load_wav(sample_wav_path)
        >>> mel = ap.melspectrogram(waveform)
        >>> d_vector = manager.compute_embeddings(mel.T)
    N F
data_itemsd_vectors_file_pathspeaker_id_file_pathencoder_model_pathencoder_config_pathuse_cudac                    s0   t  j|||||d |r| j|dd d S d S )N)embedding_file_pathid_file_pathr   r   r   speaker_name	parse_key)super__init__set_ids_from_data)selfr   r   r   r   r   r   	__class__ I/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/utils/speakers.pyr   6   s   	zSpeakerManager.__init__c                 C   s
   t | jS N)len
name_to_idr   r   r   r   num_speakersJ   s   
zSpeakerManager.num_speakersc                 C   s   t | j S r   )listr    keysr!   r   r   r   speaker_namesN   s   zSpeakerManager.speaker_namesreturnc                 C   s   | j S r   r    r!   r   r   r   get_speakersR   s   zSpeakerManager.get_speakersconfigr   samplesc                 C   s   d}t | ddr-|rt|d}t | ddrtt | ddd}t | ddr-tt | ddd}t | ddrEt }t | d	drEtt | d	dd
}|S )aH  Initialize a speaker manager from config

        Args:
            config (Coqpit): Config object.
            samples (Union[List[List], List[Dict]], optional): List of data samples to parse out the speaker names.
                Defaults to None.

        Returns:
            SpeakerEncoder: Speaker encoder object.
        Nuse_speaker_embeddingF)r   speaker_file)r   speakers_fileuse_d_vector_filed_vector_file)r   )r   r	   )r)   r*   speaker_managerr   r   r   init_from_configU   s&   



zSpeakerManager.init_from_config)Nr
   r
   r
   r
   Fr   )__name__
__module____qualname____doc__r   r   strboolr   propertyr"   r%   r(   staticmethodr   r   r1   __classcell__r   r   r   r   r	      s8    )


2r	   c                 C   sZ   t jt j| d}t j| d}t| j}||r|S ||r&|S td|  )zFind the speakers.json under the given path or the above it.
    Intended to band aid the different paths returned in restored and continued training.speakers.jsonz" [!] `speakers.json` not found in )	ospathjoindirnamefsspec
get_mapperfsexistsFileNotFoundError)r=   path_restorepath_continuerB   r   r   r   _set_file_pathw   s   

rG   c                 C   s\   t j| d dkr| }nt| }t|d}t|W  d   S 1 s'w   Y  dS )z)Loads speaker mapping if already present.   z.jsonrN)r<   r=   splitextrG   r@   openjsonload)out_path	json_filefr   r   r   load_speaker_mapping   s   $rQ   c                 C   sV   | dur)t | }t|d}tj||dd W d   dS 1 s"w   Y  dS dS )z)Saves speaker mapping if not yet present.Nw   )indent)rG   r@   rK   rL   dump)rN   speaker_mappingspeakers_json_pathrP   r   r   r   save_speaker_mapping   s   "rX   cdatarestore_pathrN   r&   c                    sd  t   | jr|dur j|dd |rVt|}| jr;tj|s5td tj| j	s/t
d | j	  | n@| jsU j} | t fdd|D sUJ dn%| jrc| j	rc | j	 n| jrk| j	skd	| jr{d
| v r{| jr{ | j  jdkrtd jd j |rtj|d}td| d | jr| j	r |  S  |  S )a	  Initiate a `SpeakerManager` instance by the provided config.

    Args:
        c (Coqpit): Model configuration.
        restore_path (str): Path to a previous training folder.
        data (List): Data samples used in training to infer speakers from. It must be provided if speaker embedding
            layers is used. Defaults to None.
        out_path (str, optional): Save the generated speaker IDs to a output path. Defaults to None.

    Returns:
        SpeakerManager: initialized and ready to use instance.
    Nr   r   zXWARNING: speakers.json was not found in restore_path, trying to use CONFIG.d_vector_filezaYou must copy the file speakers.json to restore_path, or set a valid file in CONFIG.d_vector_filec                 3   s    | ]}| j v V  qd S r   r'   ).0speakerr0   r   r   	<genexpr>   s    

z&get_speaker_manager.<locals>.<genexpr>z> [!] You cannot introduce new speakers to a pre-trained model.zNuse_d_vector_file is True, so you need pass a external speaker embedding file.r-   r   z1 > Speaker manager is loaded with {} speakers: {}z, r;   z > Saving `speakers.json` to .)r	   r+   r   rG   r.   r<   r=   rC   printr/   RuntimeErrorload_embeddings_from_filer    load_ids_from_fileallr-   r"   formatr>   save_embeddings_to_filesave_ids_to_file)rY   rZ   r[   rN   r-   speaker_ids_from_dataout_file_pathr   r^   r   get_speaker_manager   sX   




rk   itemsc                    s   t dd | D  t   fdd D }t  fddD }d| t fdd|D }|t j| }t| S )Nc                 S   s   g | ]}|d  qS )r   r   )r\   itemr   r   r   
<listcomp>       z0get_speaker_balancer_weights.<locals>.<listcomp>c                    s   g | ]}  |qS r   )indexr\   l)unique_speaker_namesr   r   rn      s    c                    s"   g | ]}t t |kd  qS )r   )r   npwhererq   )r%   r   r   rn      s   " g      ?c                    s   g | ]} | qS r   r   rq   )weight_speakerr   r   rn      ro   )	rt   arrayuniquetolistlinalgnormtorch
from_numpyfloat)rl   speaker_idsspeaker_countdataset_samples_weightr   )r%   rs   rv   r   get_speaker_balancer_weights   s   r   )NNN)rL   r<   typingr   r   r   r   r@   numpyrt   r|   coqpitr   
TTS.configr   TTS.tts.utils.managersr   r	   rG   rQ   rX   r6   rk   r#   r   r   r   r   r   <module>   s    i
 ?