o
    
jF                     @   s  d dl Z d dlZd dlZd dlZd dlmZmZmZmZ d dl	Z
d dlZd dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ d dlmZmZmZ dd ZG dd	 d	eZG d
d deZ dd Z!e"dkrd dlm#Z#m$Z$mZ e j%e_%e j&e_&e#j'dej(dZ)e$'dZ*e)+dZ)e), Z)dZ-e*e-dddj.Z.e.+dZ.e/ E e)j%e.dddddddd d	Z0e1e*j2e0dd e)j%e.dddddddd dd
Z3d Z4e3D ]Z5e*j2e5ddZ6e4e67 Z4qe1e4 W d   dS 1 sw   Y  dS dS )!    N)CallableListOptionalUnion)nn)	BeamSearchScorerConstrainedBeamSearchScorerDisjunctiveConstraintGenerationConfigGenerationMixinLogitsProcessorListPhrasalConstraintPreTrainedModelStoppingCriteriaList)GenerateOutputSampleOutputloggerc                 C   sP   | dkrd S t |  t j rt j|  tj|  t|  dt jj	_
d S )NT)torchmanual_seedcudais_availablemanual_seed_allnprandomseedbackendscudnndeterministic)r    r   W/home/kuhnn/.local/lib/python3.10/site-packages/TTS/tts/layers/xtts/stream_generator.py
setup_seed   s   


r!   c                       s   e Zd Z fddZ  ZS )StreamGenerationConfigc                    s$   t  jdi | |dd| _d S )N	do_streamFr   )super__init__popr#   )selfkwargs	__class__r   r    r%   '   s   zStreamGenerationConfig.__init__)__name__
__module____qualname__r%   __classcell__r   r   r)   r    r"   &   s    r"   c                   @   s"  e Zd Ze 							ddeej dee dee dee	 dee
eejgee f  d	ee d
eeejf fddZe 											ddejdee dee	 dee dee dee deeeee f  dee dee dee dee d	ee d
eeejf fddZdS )NewGenerationMixinNFr   inputsgeneration_configlogits_processorstopping_criteriaprefix_allowed_tokens_fnsynced_gpusreturnc           %         sr
  |     du r"| jjrt| j}	|	| jkrtd |	| _| j t	   j
dBi |}
|dur5|nt }|dur>|nt } jdu rp jdurp|
dddu rXtd  j}t|trd|d }td| d | _| | j|
\}}}
|jd } j|
d<  j|
d	<  j|
d
< dtt| jj v }d|
v}|
dddu r|r|r|  | j j|
d< | jj!sՈ jdurt"#|dddf  jkdkrtd | jj!rd|
vr| $||
|}
| jj!r| j%| j& j|
|j'd}n|}|jd }|ddu o	 j(du}|r j)du rtd j( dt* n|r/ j)dur/ j)|  _(n|s< j)dur<t+d j,durV j, j(krVt+d j, d j( d| j(kru| jj!rcdnd}td| d| d j( d  j-dup j.du} j/duo j/dko j0du o j1duo j1dk} j2dko j3dko j0du o| o| } j2dkoԈ j3dkoԈ j0du oԈ j4du o| o| } j2dko j3dko j4du o| o| } j2dko j3dko j0du o| o| } j2dko j3dko j0du o| o| } j2dko2 j3dko2| o2| } j3 j2kr>t+d|rK j0du rKt+d | j'j5|j'j5krjtd!|j'j5 d"| j'j5 d#| j'j5 d$t* | j6 ||||d%}| j7 |d&}|r j8dkrt+d' j8 d(| j9|f|| j j j: j;|d)|
S |r͈ j8dkrt+d' j8 d*| j<|f j/ j1|| j j j: j;|d+	|
S |r| = }| j>dB| j8| jj!d,|
\}}
| j?|f||| j j j: j;|d-|
S |r+| = }| j>dB| j8| jj!d,|
\}}
| j@|f||| j j j: j;|d-|
S |ry j8 j2kr9t+d.|j(du rCt+d/tA| j2|j' jB jC j8d0}| j>dB| j2| jj!d,|
\}}
| jD||f|| j j j: j;|d)|
S |r| = }|j(du rt+d/tA| j8  j2|j' jB jCd1}| j>dB| j2 j8 | jj!d,|
\}}
| jE||f||| j j j: j;|d-|
S |r9 j8 j2krt+d. j2 j3 dkrt+d2|j(du rt+d/|d3du o jFd4k}|st+d5tA| j2|j(|j' jB jC j8 j3d6}| j>dB| j2| jj!d,|
\}}
| jG||f|| j j j: j;|d)|
S |r7 j8 j2krGt+d.|j(du rQt+d/ j2dkr[t+d7 j0rct+d8 j3durs j3dkrst+d9g }  j-dur~ j-}  j.dur  fd:d;}!t j.trtH j.dkr|!   j.D ]`}"t|"d trt|"trtH|"dkr|!  tId<d= |"D r|!  tId>d= |"D r|!  tJ|"}#n!t|"trtH|"dkr|!  tId?d= |"D r|!  tK|"}#| L|# qtM| | j2|j' jB jC j8d@}$| j>dB| j2| jj!d,|
\}}
| jN|f|$|| j j j: j;|dA|
S dS )Ca  

        Generates sequences of token ids for models with a language modeling head.

        <Tip warning={true}>

        Most generation-controlling parameters are set in `generation_config` which, if not passed, will be set to the
        model's default generation configuration. You can override any `generation_config` by passing the corresponding
        parameters to generate(), e.g. `.generate(inputs, num_beams=4, do_sample=True)`.

        For an overview of generation strategies and code examples, check out the [following
        guide](./generation_strategies).

        </Tip>

        Parameters:
            inputs (`torch.Tensor` of varying shape depending on the modality, *optional*):
                The sequence used as a prompt for the generation or as model inputs to the encoder. If `None` the
                method initializes it with `bos_token_id` and a batch size of 1. For decoder-only models `inputs`
                should of in the format of `input_ids`. For encoder-decoder models *inputs* can represent any of
                `input_ids`, `input_values`, `input_features`, or `pixel_values`.
            generation_config (`~generation.GenerationConfig`, *optional*):
                The generation configuration to be used as base parametrization for the generation call. `**kwargs`
                passed to generate matching the attributes of `generation_config` will override them. If
                `generation_config` is not provided, the default will be used, which had the following loading
                priority: 1) from the `generation_config.json` model file, if it exists; 2) from the model
                configuration. Please note that unspecified parameters will inherit [`~generation.GenerationConfig`]'s
                default values, whose documentation should be checked to parameterize generation.
            logits_processor (`LogitsProcessorList`, *optional*):
                Custom logits processors that complement the default logits processors built from arguments and
                generation config. If a logit processor is passed that is already created with the arguments or a
                generation config an error is thrown. This feature is intended for advanced users.
            stopping_criteria (`StoppingCriteriaList`, *optional*):
                Custom stopping criteria that complement the default stopping criteria built from arguments and a
                generation config. If a stopping criteria is passed that is already created with the arguments or a
                generation config an error is thrown. This feature is intended for advanced users.
            prefix_allowed_tokens_fn (`Callable[[int, torch.Tensor], List[int]]`, *optional*):
                If provided, this function constraints the beam search to allowed tokens only at each step. If not
                provided no constraint is applied. This function takes 2 arguments: the batch ID `batch_id` and
                `input_ids`. It has to return a list with the allowed tokens for the next generation step conditioned
                on the batch ID `batch_id` and the previously generated tokens `inputs_ids`. This argument is useful
                for constrained generation conditioned on the prefix, as described in [Autoregressive Entity
                Retrieval](https://arxiv.org/abs/2010.00904).
            synced_gpus (`bool`, *optional*, defaults to `False`):
                Whether to continue running the while loop until max_length (needed for ZeRO stage 3)
            kwargs:
                Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
                forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
                specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.

        Return:
            [`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
            or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`.

                If the model is *not* an encoder-decoder model (`model.config.is_encoder_decoder=False`), the possible
                [`~utils.ModelOutput`] types are:

                    - [`~generation.GreedySearchDecoderOnlyOutput`],
                    - [`~generation.SampleDecoderOnlyOutput`],
                    - [`~generation.BeamSearchDecoderOnlyOutput`],
                    - [`~generation.BeamSampleDecoderOnlyOutput`]

                If the model is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
                [`~utils.ModelOutput`] types are:

                    - [`~generation.GreedySearchEncoderDecoderOutput`],
                    - [`~generation.SampleEncoderDecoderOutput`],
                    - [`~generation.BeamSearchEncoderDecoderOutput`],
                    - [`~generation.BeamSampleEncoderDecoderOutput`]
        Na%  You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation)attention_maskzThe attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.r   z)Setting `pad_token_id` to `eos_token_id`:z for open-end generation.output_attentionsoutput_hidden_states	use_cacheencoder_outputsr   zA decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.)decoder_start_token_idbos_token_idmodel_kwargsdevice
max_lengthzUNeither `max_length` nor `max_new_tokens` has been set, `max_length` will default to z (`generation_config.max_length`). Controlling `max_length` via the config is deprecated and `max_length` will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.a.  Both `max_new_tokens` and `max_length` have been set but they serve the same purpose -- setting a limit to the generated output length. Remove one of those arguments. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)z3Unfeasible length constraints: the minimum length (z%) is larger than the maximum length ()decoder_input_ids	input_idszInput length of z is z, but `max_length` is set to zX. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.   FTz;`num_beam_groups` has to be smaller or equal to `num_beams`zbDiverse beam search cannot be used in sampling mode. Make sure that `do_sample` is set to `False`.z~You are calling .generate() with the `input_ids` being on a device type different than your model's device. `input_ids` is on z, whereas the model is on z. You may experience unexpected behaviors or slower generation. Please make sure that you have put `input_ids` to the correct device by calling for example input_ids = input_ids.to('z ') before running `.generate()`.)r1   input_ids_seq_lengthencoder_input_idsr4   r2   )r1   r3   z)num_return_sequences has to be 1, but is z when doing greedy search.)r2   r3   pad_token_ideos_token_idoutput_scoresreturn_dict_in_generater5   z when doing contrastive search.)	top_kpenalty_alphar2   r3   rG   rH   rI   rJ   r5   )rC   expand_sizeis_encoder_decoder)r2   logits_warperr3   rG   rH   rI   rJ   r5   zA`num_return_sequences` has to be smaller or equal to `num_beams`.z5`max_length` needs to be a stopping_criteria for now.)
batch_size	num_beamsr?   length_penaltydo_early_stoppingnum_beam_hyps_to_keep)rP   rQ   r?   rR   rS   zK`num_beams` should be divisible by `num_beam_groups` for group beam search.	typical_p      ?z?Decoder argument `typical_p` is not supported with beam groups.)rP   rQ   r@   r?   rR   rS   rT   num_beam_groupszB`num_beams` needs to be greater than 1 for constrained generation.z9`do_sample` needs to be false for constrained generation.z?`num_beam_groups` not supported yet for constrained generation.c                      s   t d j d)Nzn`force_words_ids` has to either be a `List[List[List[int]]]` or `List[List[int]]`of positive integers, but is .)
ValueErrorforce_words_idsr   r1   r   r    	typeerrorB  s
   z.NewGenerationMixin.generate.<locals>.typeerrorc                 s   s    | ]	}t |t V  qd S N)
isinstancelist.0	token_idsr   r   r    	<genexpr>R  s    z.NewGenerationMixin.generate.<locals>.<genexpr>c                 s   s"    | ]}t d d |D V  qdS )c                 s   $    | ]}t |t p|d k V  qdS r   Nr^   intra   token_idr   r   r    rc   U     " z8NewGenerationMixin.generate.<locals>.<genexpr>.<genexpr>N)anyr`   r   r   r    rc   T  s
    
c                 s   rd   re   rf   rh   r   r   r    rc   ^  rj   )constraintsrP   rQ   r?   rR   rS   rT   )constrained_beam_scorerr2   r3   rG   rH   rI   rJ   r5   r   )O_validate_model_classr1   _from_model_configr"   from_model_configconfigwarningswarncopydeepcopyupdater   r   rG   rH   getr   warningr^   r_   _prepare_model_inputsr=   shaper8   r9   r:   setinspect	signatureforward
parameterskeys&_prepare_attention_mask_for_generationrN   r   sum._prepare_encoder_decoder_kwargs_for_generation)_prepare_decoder_input_ids_for_generationr<   r?   r@   max_new_tokensUserWarningrY   
min_lengthrl   rZ   rK   	do_samplerL   rQ   rW   r#   type_get_logits_processor_get_stopping_criterianum_return_sequencesgreedy_searchrI   rJ   contrastive_search_get_logits_warper_expand_inputs_for_generationsamplesample_streamr   rR   early_stoppingbeam_searchbeam_samplerU   group_beam_searchlenrk   r	   r   appendr   constrained_beam_search)%r'   r0   r1   r2   r3   r4   r5   r   r(   new_generation_configr>   rH   inputs_tensormodel_input_namerP   accepts_attention_maskrequires_attention_maskrC   rE   has_default_max_lengthinput_ids_stringis_constraint_gen_modeis_contrastive_search_gen_modeis_greedy_gen_modeis_sample_gen_modeis_sample_gen_stream_modeis_beam_gen_modeis_beam_sample_gen_modeis_group_beam_gen_moderO   beam_scorerhas_default_typical_pfinal_constraintsr\   word_ids
constraintrm   r   r[   r    generate-   sl  T








 	

		
		





	


	










zNewGenerationMixin.generaterC   rO   r@   rG   rH   r8   r9   rI   rJ   c                 +   sJ   |dur|nt  }|dur|nt }|dur"tdt t||}|dur(|nt  }|dur1|n| jj}|dur;|n| jj}t	|t
rG|g}|
durM|
n| jj}
|durW|n| jj}|	dura|	n| jj}	|durk|n| jj}|ru|
rudnd}|r}|r}dnd}|r|rdnd}|r|	rdnd}||jd d}d}	 |rt|rdnd	|j}tj|tjjd
 | dkrdS | j|fi |}| di |d||	d}|r|rq|jdddddf }|||}|||}|r'|
r||f7 }|r|| jjr|jfn|j f7 }| jjr||j!f7 }|	r'|| jjr"|j"fn|j#f7 }t$j%j&|dd}tj'|dd(d |durQ|du rGt)d | |d|     | *|j#d dddf fV  tj+| dddf gdd}| j,||| jjd}|dur|-t. fdd|D / }|0 dks|||r|sdS d}q)a  
        Generates sequences of token ids for models with a language modeling head using **multinomial sampling** and
        can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.

        <Tip warning={true}>

        In most cases, you do not need to call [`~generation.GenerationMixin.sample`] directly. Use generate() instead.
        For an overview of generation strategies and code examples, check the [following
        guide](./generation_strategies).

        </Tip>

        Parameters:
            input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
                The sequence used as a prompt for the generation.
            logits_processor (`LogitsProcessorList`, *optional*):
                An instance of [`LogitsProcessorList`]. List of instances of class derived from [`LogitsProcessor`]
                used to modify the prediction scores of the language modeling head applied at each generation step.
            stopping_criteria (`StoppingCriteriaList`, *optional*):
                An instance of [`StoppingCriteriaList`]. List of instances of class derived from [`StoppingCriteria`]
                used to tell if the generation loop should stop.
            logits_warper (`LogitsProcessorList`, *optional*):
                An instance of [`LogitsProcessorList`]. List of instances of class derived from [`LogitsWarper`] used
                to warp the prediction score distribution of the language modeling head applied before multinomial
                sampling at each generation step.
            max_length (`int`, *optional*, defaults to 20):
                **DEPRECATED**. Use `logits_processor` or `stopping_criteria` directly to cap the number of generated
                tokens. The maximum length of the sequence to be generated.
            pad_token_id (`int`, *optional*):
                The id of the *padding* token.
            eos_token_id (`int`, *optional*):
                The id of the *end-of-sequence* token.
            output_attentions (`bool`, *optional*, defaults to `False`):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more details.
            output_hidden_states (`bool`, *optional*, defaults to `False`):
                Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
                for more details.
            output_scores (`bool`, *optional*, defaults to `False`):
                Whether or not to return the prediction scores. See `scores` under returned tensors for more details.
            return_dict_in_generate (`bool`, *optional*, defaults to `False`):
                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
            synced_gpus (`bool`, *optional*, defaults to `False`):
                Whether to continue running the while loop until max_length (needed for ZeRO stage 3)
            model_kwargs:
                Additional model specific kwargs will be forwarded to the `forward` function of the model. If model is
                an encoder-decoder model the kwargs should include `encoder_outputs`.

        Return:
            [`~generation.SampleDecoderOnlyOutput`], [`~generation.SampleEncoderDecoderOutput`] or `torch.LongTensor`:
            A `torch.LongTensor` containing the generated tokens (default behaviour) or a
            [`~generation.SampleDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
            `return_dict_in_generate=True` or a [`~generation.SampleEncoderDecoderOutput`] if
            `model.config.is_encoder_decoder=True`.

        Examples:

        ```python
        >>> from transformers import (
        ...     AutoTokenizer,
        ...     AutoModelForCausalLM,
        ...     LogitsProcessorList,
        ...     MinLengthLogitsProcessor,
        ...     TopKLogitsWarper,
        ...     TemperatureLogitsWarper,
        ...     StoppingCriteriaList,
        ...     MaxLengthCriteria,
        ... )
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
        >>> model = AutoModelForCausalLM.from_pretrained("gpt2")

        >>> # set pad_token_id to eos_token_id because GPT2 does not have a EOS token
        >>> model.config.pad_token_id = model.config.eos_token_id
        >>> model.generation_config.pad_token_id = model.config.eos_token_id

        >>> input_prompt = "Today is a beautiful day, and"
        >>> input_ids = tokenizer(input_prompt, return_tensors="pt").input_ids

        >>> # instantiate logits processors
        >>> logits_processor = LogitsProcessorList(
        ...     [
        ...         MinLengthLogitsProcessor(15, eos_token_id=model.generation_config.eos_token_id),
        ...     ]
        ... )
        >>> # instantiate logits processors
        >>> logits_warper = LogitsProcessorList(
        ...     [
        ...         TopKLogitsWarper(50),
        ...         TemperatureLogitsWarper(0.7),
        ...     ]
        ... )

        >>> stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=20)])

        >>> torch.manual_seed(0)  # doctest: +IGNORE_RESULT
        >>> outputs = model.sample(
        ...     input_ids,
        ...     logits_processor=logits_processor,
        ...     logits_warper=logits_warper,
        ...     stopping_criteria=stopping_criteria,
        ... )

        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
        ['Today is a beautiful day, and a wonderful day.\n\nI was lucky enough to meet the']
        ```Nz`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.r   r   rD   FTg        rV   )op)return_dictr8   r9   r   )dim)num_sampleszGIf `eos_token_id` is defined, make sure that `pad_token_id` is defined.)rN   c                 3   s    | ]} |kV  qd S r]   r   )ra   inext_tokensr   r    rc   g  s    z3NewGenerationMixin.sample_stream.<locals>.<genexpr>)1r   r   rr   rs   r   validate_stopping_criteriar1   rG   rH   r^   rg   rI   r8   r9   rJ   newrz   fill_r   tensortor?   dist
all_reduceReduceOpSUMitemprepare_inputs_for_generationlogitsrq   rN   decoder_attentions
attentionscross_attentionsdecoder_hidden_stateshidden_statesr   
functionalsoftmaxmultinomialsqueezerY   
final_normcat#_update_model_kwargs_for_generationmulr   longmax)r'   rC   r2   r3   rO   r@   rG   rH   r8   r9   rI   rJ   r5   r>   scoresr   r   r   unfinished_sequencesthis_peer_finishedthis_peer_finished_flagmodel_inputsoutputsnext_token_logitsnext_token_scoresprobsr   r   r    r     s   }








"

 z NewGenerationMixin.sample_stream)NNNNNFr   )NNNNNNNNNNF)r+   r,   r-   r   no_gradr   Tensorr"   r   r   r   rg   r   boolr   r   
LongTensorr   r   r   r   r   r   r    r/   ,   s    
    Y	
r/   c                   C   s   t jt_t jt_dS )z'Overload PreTrainedModel for streaming.N)r/   r   r   generate_streamr   r   r   r   r    init_stream_supportq  s   r   __main__)AutoModelForCausalLMAutoTokenizerr   zbigscience/bloom-560m)torch_dtypezcuda:0zhello? 
ptF)return_tensorsadd_special_tokens   T   g333333?gffffff?g333333?)r   r   rK   top_ptemperaturerepetition_penaltyr   r   )skip_special_tokens)	r   r   rK   r   r   r   r   r   r#    )7rt   r|   r   rr   typingr   r   r   r   numpyr   r   torch.distributeddistributedr   r   transformersr   r   r	   r
   r   r   r   r   r   transformers.generation.utilsr   r   r   r!   r"   r/   r   r+   r   r   r   r   from_pretrainedfloat16model	tokenizerr   evalprompt_textrC   r   resultprintdecode	generatorstream_resultxchunkr   r   r   r    <module>   s   ,      K





"