o
    
jJ                     @   s@   d dl Z d dlZd dlmZmZmZ dddZG dd dZdS )	    N)Wav2Vec2CTCTokenizerWav2Vec2FeatureExtractorWav2Vec2ForCTC~c           
      C   s^  |du ri }|| vsJ d| d|  t | dkrdS t |dkr(|t |  S | |kr.| S | d |d krI| d t| dd |dd || S t | t |d f}||v r^|| \}}nt| |dd ||}t ||d}||f||< t | d t |f}||v r|| \}}	nt| dd |||}t ||d}	||	f||< ||	kr|S || S )z
    A clever function that aligns s1 to s2 as best it can. Wherever a character from s1 is not found in s2, a '~' is
    used to replace that character.

    Finally got to use my DP skills!
    NzFound the skip character z in the provided string, r       )lenmax_alignmentreplace)
s1s2skip_characterrecordtake_s1_keytake_s1take_s1_scoretake_s2_keytake_s2take_s2_score r   \/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/tortoise/wav2vec_alignment.pyr	      s0   &r	   c                   @   s.   e Zd ZdZdddZdddZddd	Zd
S )Wav2VecAlignmentz:
    Uses wav2vec2 to perform audio<->text alignment.
    cudac                 C   s2   t d | _td| _td| _|| _d S )Nz3jbetker/wav2vec2-large-robust-ft-libritts-voxpopulizfacebook/wav2vec2-large-960hzjbetker/tacotron-symbols)	r   from_pretrainedcpumodelr   feature_extractorr   	tokenizerdevice)selfr   r   r   r   __init__1   s   
zWav2VecAlignment.__init__]  c                    sZ  |j d }t 9 | j| j| _|| j}tj||d}||	  t
| d  }| |j}| j | _W d    n1 sEw   Y  |d }| j|d }t| |}||j d  }	| j|t|tdkrzdgS d d dg  fdd}
|
 }t|D ]\}}| }||kr ||	  tdkr|
 }q nq|
  tdkrt t|kst||gd J d
 | tt D ]H} | dkr&t|d t D ]} | dkr|} nqt||D ]#} |  |d   }|| d | || d   |d    |< qqވ d d S )Ni>  gHz>r   r   c                     sf   t dkrd S d} d}|dkr1 d t dkr#d S d} d}|dks| S )Nr   r   r"   )r   popappend)poppedpopped_char
alignmentsexpected_charsexpected_tokensr   r   pop_till_you_winP   s   




z0Wav2VecAlignment.align.<locals>.pop_till_you_winzalignment_debug.pthFzSomething went wrong with the alignment algorithm. I've dumped a file, 'alignment_debug.pth' toyour current working directory. Please report this along with the file so it can get fixed.)shapetorchno_gradr   tor   
torchaudio
functionalresamplemeansqrtvarlogitsr   r   decodeargmaxtolistr	   lowerencodelistr   r#   	enumerater$   saverange)r   audioexpected_textaudio_sample_rateorig_len	clip_normr6   pred_stringfixed_expectationw2v_compressionr+   next_expected_tokenilogittopjnext_found_tokengapr   r'   r   align7   sb   




0zWav2VecAlignment.alignc                 C   s  d|vr|S | d}|d g}|dd  D ]}d|v s J d|| d qg }d}tt|D ]$}	|	d dkrOtd|t||	  d }
|||
f |t||	 7 }q3d|}| |||}g }|D ]}|\}}||d d || || f  qhtj	|dd	S )
N[r   r   ]z>Every "[" character must be paired with a "]" with no nesting.   r   r"   )dim)
splitextendr?   r   maxr$   joinrO   r-   cat)r   r@   rA   rB   splittedfully_splitsplnon_redacted_intervals
last_pointrI   end_interval	bare_textr(   output_audionristartstopr   r   r   redact}   s*   


$zWav2VecAlignment.redactN)r   )r!   )__name__
__module____qualname____doc__r    rO   rd   r   r   r   r   r   ,   s
    

Fr   )r   N)r-   r0   transformersr   r   r   r	   r   r   r   r   r   <module>   s
    
&