o
    
j@&                     @   s|   d dl Z d dlZd dlZd dlZd dlm  mZ d dlZd dl	m
Z
 ed dd ZdddZG d	d
 d
ejjjZdS )    N)
load_audio   c                 C   sF   i }| D ]}|| }t |tsJ ||vrg ||< || | q|S )z2Returns a dictionary of samples keyed by language.)
isinstancestrappend)samplescolsamples_by_colsamplecol_val r   V/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/xtts/trainer/dataset.pykey_samples_by_col   s   r   Fc                 C   s   t | |}|rt|| d }nt||}|jd | }|dk r(|jd d }|jd | }|r4d}ntd|}|| }	|d d ||	f }tj|d||jd  fd}||	g}
||jd |
fS )N   r   )pad)r   intrandomrandintshapeFr   )gt_pathmax_sample_lengthmin_sample_lengthsample_rateis_evalrel_clipsample_lengthgap
rand_startrand_end	cond_idxsr   r   r   get_prompt_slice   s    
r"   c                   @   sF   e Zd ZdddZdd Zdd Zdd	 Zd
d Zdd Zdd Z	dS )XTTSDatasetFc                 C   s   || _ |j}t | _|j| _|j| _|j| _|| _|| _	|| _
|j| _|j| _|j| _| jd ur5| jd us7J || _|sYt|j t| j t| jd| _td| j  d S |   d S )Nlanguagez > Sampling by language:)config
model_argssetfailed_samplesdebug_loading_failuresdebug_failuresmax_conditioning_lengthmin_conditioning_lengthr   	tokenizerr   max_wav_lengthmax_wav_lenmax_text_lengthmax_text_len"gpt_use_masking_gt_prompt_approachuse_masking_gt_prompt_approachr   r   seedtraining_seedshuffler   printkeyscheck_eval_samples)selfr%   r   r-   r   r   r&   r   r   r   __init__4   s(   zXTTSDataset.__init__c                 C   s   t d g }| jD ]9}z| |\}}}}}}W n   Y q	|d u s<| jd ur/|jd | jks<| jd ur=|jd | jkr=q	|| q	|| _t dt| j d S )Nz# > Filtering invalid eval samples!!r   r   z& > Total eval samples after filtering:)r7   r   	load_itemr/   r   r1   r   len)r:   new_samplesr
   tseq_wavr   r   r   r9   O   s   
zXTTSDataset.check_eval_samplesc                 C   sb   | j ||}t|}t|dkr!J d| d| j | t|dkr/J d| |S )Nr   zUNK token found in z -> r   zStop token found in )r-   encodetorch	IntTensoranydecode)r:   textlangtokensr   r   r   get_textb   s
   
*zXTTSDataset.get_textc                 C   s   t |d }| ||d }|d }t|| j}|d u s$t| dkr&t|d u s4|jd d| j k r6t| jrLt	|| j
| j| j| j\}}}tj}	n"d|v rZ|d d urZ|d n|}
t	|
| j
| j| j| j\}}	}tj}|||||	|fS )NrG   r$   
audio_filer   r   g      ?reference_path)r   rJ   r   r   r=   strip
ValueErrorr   r3   r"   r+   r,   r   rC   nan)r:   r
   rG   r?   	audiopathrA   condr@   r!   cond_len
ref_sampler   r   r   r<   j   s,   

zXTTSDataset.load_itemc              
   C   s   | j r| j| }t|}n&tt| j }tdt| j| d }| j| | }|d t| }|| j	v rI| j
rEtd|d  d | d S z| |\}}}}}	}
W n    | j
rktd|d  dt   | j	| | d  Y S |d u s| jd ur|jd	 | jks| jd ur|jd | jkr| j
r|d ur|d urtd|d  d
|jd	  d|jd   | j	| | d S |tj|jd tjd|tj|jd	 tjd||d|	tjurtj|	tjdnt|	g|
tjurt|
nt|
gd}|S )Nr   r   r@   zIgnoring sample rK   z) because it was already ignored before !!zerror loading  r   z: ranges are out of bounds; z, )dtype)rG   text_lengthsrA   wav_lengths	filenamesconditioning	cond_lensr!   )r   r   r   r   choicelistr8   r   r=   r(   r*   r7   r<   sysexc_infoaddr/   r   r1   rC   tensorlong	unsqueezerO   )r:   indexr
   	sample_idrH   r?   rP   rA   rQ   rR   r!   resr   r   r   __getitem__   sN   


$

zXTTSDataset.__getitem__c                 C   s(   | j rt| jS tdd | j D S )Nc                 S   s   g | ]}t |qS r   )r=   ).0vr   r   r   
<listcomp>       z'XTTSDataset.__len__.<locals>.<listcomp>)r   r=   r   sumvalues)r:   r   r   r   __len__   s   
zXTTSDataset.__len__c           
         sj  t  } fdd d D  t d  d< t d  d< t d  d< t d  d< t d  d< t d  rId  d< t d  rVd  d<  d  } d  }t||}t|d	|}| }| }t	|D ]/} d
 | }t|||d  d | f<  d | }	t|	||d d d  d | f< q{| d< | d<  S )Nc                    s    i | ]   fd dD qS )c                    s   g | ]}|  qS r   r   )rg   dickr   r   ri      rj   z5XTTSDataset.collate_fn.<locals>.<dictcomp>.<listcomp>r   )rg   batchro   r   
<dictcomp>   s     z*XTTSDataset.collate_fn.<locals>.<dictcomp>r   rW   rV   rY   rZ   r!   r   rG   rA   padded_text)
r=   rC   stackrE   isnanmaxrD   FloatTensorzero_range)
r:   rr   Br1   r/   text_padded
wav_paddedirG   rA   r   rq   r   
collate_fn   s2   &zXTTSDataset.collate_fnNF)
__name__
__module____qualname__r;   r9   rJ   r<   rf   rm   r   r   r   r   r   r#   3   s    
 ;r#   r   )osr   r]   rC   torch.nn.functionalnn
functionalr   torch.utils.dataTTS.tts.models.xttsr   set_num_threadsr   r"   utilsdataDatasetr#   r   r   r   r   <module>   s    

