o
    i                     @  s   d dl mZ d dlmZmZmZ d dlmZ d dlm	Z	m
Z
 er4d dlZd dlZd dlmZ d dlmZ G dd	 d	ed
dZG dd ded
dZG dd deZG dd dZ		
	d!d"dd ZdS )#    )annotations)TYPE_CHECKINGLiteral	TypedDict)	AudioData)TranscribeOutputBaseWhisperCompatibleRecognizerN)Unpack)Whisperc                   @  s&   e Zd ZU ded< ded< ded< dS )LoadModelOptionalParameterszstr | torch.devicedevicestrdownload_rootbool	in_memoryN__name__
__module____qualname____annotations__ r   r   g/home/kuhnn/.local/lib/python3.10/site-packages/speech_recognition/recognizers/whisper_local/whisper.pyr      s   
 r   F)totalc                   @  s2   e Zd ZU dZded< ded< ded< ded	< d
S )TranscribeOptionalParametersz<Transcribe optional parameters & DecodingOptions parameters.zfloat | tuple[float, ...]temperaturez"Literal['transcribe', 'translate']taskr   languager   fp16N)r   r   r   __doc__r   r   r   r   r   r      s   
 r   c                   @  s^   e Zd ZU ded< ded< ded< ded< ded< d	ed
< ded< ded< ded< ded< dS )Segmentintidseekfloatstartendr   textz	list[int]tokensr   avg_logprobcompression_rationo_speech_probNr   r   r   r   r   r   &   s   
 r   c                   @  s    e Zd ZdddZdd
dZdS )TranscribableAdaptermodelr
   returnNonec                 C  s
   || _ d S )N)r,   )selfr,   r   r   r   __init__4   s   
zTranscribableAdapter.__init__audio_array
np.ndarrayTranscribeOutputBase[Segment]c                 K  s2   d|vrdd l }|j |d< | jj|fi |S )Nr   r   )torchcudais_availabler,   
transcribe)r/   r1   kwargsr4   r   r   r   r7   7   s   zTranscribableAdapter.transcribeN)r,   r
   r-   r.   )r1   r2   r-   r3   )r   r   r   r0   r7   r   r   r   r   r+   3   s    
r+   base
audio_datar   r,   r   	show_dictr   load_options"LoadModelOptionalParameters | Nonetranscribe_options$Unpack[TranscribeOptionalParameters]r-   #str | TranscribeOutputBase[Segment]c           	      K  s@   ddl }|j|fi |pi }tt|}|j|fd|i|S )a  Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using Whisper.

    Pick ``model`` from output of :command:`python -c 'import whisper; print(whisper.available_models())'`.
    See also https://github.com/openai/whisper?tab=readme-ov-file#available-models-and-languages.

    If ``show_dict`` is true, returns the full dict response from Whisper, including the detected language. Otherwise returns only the transcription.

    You can specify:

        * ``language``: recognition language, an uncapitalized full language name like "english" or "chinese". See the full language list at https://github.com/openai/whisper/blob/main/whisper/tokenizer.py

            * If not set, Whisper will automatically detect the language.

        * ``task``

            * If you want transcribe + **translate** to english, set ``task="translate"``.

    Other values are passed directly to whisper. See https://github.com/openai/whisper/blob/main/whisper/transcribe.py for all options.
    r   Nr;   )whisper
load_modelr   r+   	recognize)	
recognizerr:   r,   r;   r<   r>   rA   whisper_modelwhisper_recognizerr   r   r   rC   B   s   rC   )r9   FN)r:   r   r,   r   r;   r   r<   r=   r>   r?   r-   r@   )
__future__r   typingr   r   r   speech_recognition.audior   1speech_recognition.recognizers.whisper_local.baser   r   numpynpr4   typing_extensionsr	   rA   r
   r   r   r   r+   rC   r   r   r   r   <module>   s"    