
    ˟fh-                         S SK Jr  S SKJr  S SKJr  S SKJrJrJ	r	J
r
JrJrJr  SSKJr  SSKJrJrJr   " S S	5      r " S
 S5      r\\\4   r\
\   r " S S5      rg)    )aliases)sha256)dumps)AnyDictIteratorListOptionalTupleUnion   )TOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                   X   \ rS rSr S&S\S\S\S\SSS	\\   4S
 jjr	S\
S\4S jrS\
S\4S jr\S\4S j5       rS\4S jrS\4S jrS'S jr\S\4S j5       r\S\\   4S j5       r\S\4S j5       r\S\4S j5       r\S\\   4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\S    4S j5       r\S\4S j5       r\S\\   4S  j5       r \S\\   4S! j5       r!S(S"\S\4S# jjr"\S\4S$ j5       r#S%r$g))CharsetMatch
   Npayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesCoherenceMatchesdecoded_payloadc                     Xl         X l        X0l        XPl        X@l        S U l        / U l        SU l        S U l        S U l	        X`l
        g )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string)selfr   r   r   r   r   r   s          P/home/kuhnn/miniconda3/lib/python3.13/site-packages/charset_normalizer/models.py__init__CharsetMatch.__init__   sK      '.'6,5%348+-,/"04/3&5    otherreturnc                     [        U[        5      (       dA  [        SR                  [	        UR
                  5      [	        U R
                  5      5      5      eU R                  UR                  :H  =(       a    U R                  UR                  :H  $ )Nz&__eq__ cannot be invoked on {} and {}.)
isinstancer   	TypeErrorformatstr	__class__encodingfingerprintr)   r.   s     r*   __eq__CharsetMatch.__eq__$   sl    %..8??(#dnn*= 
 }}.X43C3CuGXGX3XXr-   c                    [        U[        5      (       d  [        e[        U R                  UR                  -
  5      n[        U R
                  UR
                  -
  5      nUS:  a  US:  a  U R
                  UR
                  :  $ US:  aU  US::  aO  [        U R                  5      [        :  a  U R                  UR                  :  $ U R                  UR                  :  $ U R                  UR                  :  $ )zA
Implemented to make sorted available upon CharsetMatches items.
g{Gz?g{Gz?)
r1   r   
ValueErrorabschaos	coherencelenr   r   multi_byte_usage)r)   r.   chaos_differencecoherence_differences       r*   __lt__CharsetMatch.__lt__-   s     %.."%djj5;;&>"?&)$..5??*J&K d"';d'B>>EOO33$)=)E 4==!%55zzEKK//((5+A+AAAzzEKK''r-   c                 \    S[        [        U 5      5      [        U R                  5      -  -
  $ )Ng      ?)r@   r4   rawr)   s    r*   rA   CharsetMatch.multi_byte_usageC   s"    c#d)ns488}455r-   c                     U R                   c&  [        U R                  U R                  S5      U l         U R                   $ )Nstrict)r(   r4   r   r   rH   s    r*   __str__CharsetMatch.__str__G   s.    <<t}}dnnhGDL||r-   c                 N    SR                  U R                  U R                  5      $ )Nz<CharsetMatch '{}' bytes({})>)r3   r6   r7   rH   s    r*   __repr__CharsetMatch.__repr__M   s    .55dmmTEUEUVVr-   c                     [        U[        5      (       a  X:X  a$  [        SR                  UR                  5      5      eS Ul        U R                  R                  U5        g )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r1   r   r<   r3   r5   r(   r$   appendr8   s     r*   add_submatchCharsetMatch.add_submatchP   sP    %..%-MTTOO  E"r-   c                     U R                   $ N)r   rH   s    r*   r6   CharsetMatch.encoding[   s    ~~r-   c                     / n[         R                  " 5        HK  u  p#U R                  U:X  a  UR                  U5        M(  U R                  U:X  d  M:  UR                  U5        MM     U$ )zr
Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
)r   itemsr6   rR   )r)   also_known_asups       r*   encoding_aliasesCharsetMatch.encoding_aliases_   sW    
 $&MMODA}}!$$Q'!#$$Q'	 $
 r-   c                     U R                   $ rV   r"   rH   s    r*   bomCharsetMatch.boml       ###r-   c                     U R                   $ rV   r`   rH   s    r*   byte_order_markCharsetMatch.byte_order_markp   rc   r-   c                 J    U R                    Vs/ s H  oS   PM	     sn$ s  snf )z
Return the complete list of possible languages found in decoded sequence.
Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
r   r!   )r)   es     r*   r   CharsetMatch.languagest   s"     #oo.o!o...s    c                 ,   U R                   (       dr  SU R                  ;   a  gSSKJnJn  [        U R                  5      (       a  U" U R                  5      OU" U R                  5      n[        U5      S:X  d  SU;   a  gUS   $ U R                   S   S   $ )zz
Most probable language found in decoded sequence. If none were detected or inferred, the property will return
"Unknown".
asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r!   could_be_from_charsetcharset_normalizer.cdrn   ro   r   r6   r@   )r)   rn   ro   r   s       r*   languageCharsetMatch.language|   s      $444  X *$--88 &dmm4'6  9~"my&@ Q<q!!$$r-   c                     U R                   $ rV   )r    rH   s    r*   r>   CharsetMatch.chaos   s    $$$r-   c                 J    U R                   (       d  gU R                   S   S   $ )Nr   r   r   rh   rH   s    r*   r?   CharsetMatch.coherence   s     q!!$$r-   c                 0    [        U R                  S-  SS9$ Nd      )ndigits)roundr>   rH   s    r*   percent_chaosCharsetMatch.percent_chaos   s    TZZ#%q11r-   c                 0    [        U R                  S-  SS9$ rz   )r~   r?   rH   s    r*   percent_coherenceCharsetMatch.percent_coherence   s    T^^c)155r-   c                     U R                   $ )z
Original untouched bytes.
)r   rH   s    r*   rG   CharsetMatch.raw   s    
 }}r-   c                     U R                   $ rV   )r$   rH   s    r*   submatchCharsetMatch.submatch   s    ||r-   c                 2    [        U R                  5      S:  $ Nr   )r@   r$   rH   s    r*   has_submatchCharsetMatch.has_submatch   s    4<< 1$$r-   c                    U R                   b  U R                   $ [        U 5       Vs/ s H  n[        U5      PM     nn[        [	        U Vs1 s H  o3(       d  M  UiM     sn5      5      U l         U R                   $ s  snf s  snf rV   )r#   r4   r   sortedlist)r)   chardetected_rangesrs       r*   	alphabetsCharsetMatch.alphabets   s}    +''' -0I0
,5DM$I 	 0
  &d+L!!A+L&MN###0
 ,Ms   A<
BBc                 t    U R                   /U R                   Vs/ s H  oR                  PM     sn-   $ s  snf )z
The complete list of encoding that output the exact SAME str result and therefore could be the originating
encoding.
This list does include the encoding available in property 'encoding'.
)r   r$   r6   )r)   ms     r*   rq   "CharsetMatch.could_be_from_charset   s.     t||"D|!::|"DDD"Ds   5r6   c                     U R                   b  U R                   U:w  a&  Xl         [        U 5      R                  US5      U l        U R                  $ )z
Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
Any errors will be simply ignored by the encoder NOT replaced.
replace)r'   r4   encoder&   )r)   r6   s     r*   outputCharsetMatch.output   sH    
   (D,A,AX,M$,!#&t9#3#3Hi#HD ###r-   c                 P    [        U R                  5       5      R                  5       $ )zg
Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
)r   r   	hexdigestrH   s    r*   r7   CharsetMatch.fingerprint   s    
 dkkm$..00r-   )r   r"   r!   r$   r%   r    r'   r&   r   r(   r#   rV   )r.   r   r/   N)utf_8)%__name__
__module____qualname____firstlineno__bytesr4   floatboolr
   r+   objectr9   rD   propertyrA   rL   rO   rS   r6   r	   r]   ra   re   r   rs   r>   r?   r   r   rG   r   r   r   rq   r   r7   __static_attributes__ r-   r*   r   r   
   s    *.66 6 	6
 6 &6 "#62YF Yt Y(F (t (, 6% 6 6 W# W	# #   
$s) 
 
 $T $ $ $ $ $ /49 / / %# % %6 %u % % %5 % %
 2u 2 2 65 6 6 U   $~.   %d % % 	$49 	$ 	$ EtCy E E	$s 	$ 	$ 1S 1 1r-   r   c                       \ rS rSrSrSS\\\      4S jjrS\	\   4S jr
S\\\4   S\4S	 jrS\4S
 jrS\4S jrS\SS4S jrS\S   4S jrS\S   4S jrSrg)CharsetMatches   z
Container with every CharsetMatch items ordered by default from most probable to the less one.
Act like a list(iterable) but does not implements all related methods.
Nresultsc                 B    U(       a  [        U5      U l        g / U l        g rV   )r   _results)r)   r   s     r*   r+   CharsetMatches.__init__   s    ?FF7OBr-   r/   c              #   8   #    U R                    S h  vN   g  N7frV   r   rH   s    r*   __iter__CharsetMatches.__iter__   s     ==  s   itemc                     [        U[        5      (       a  U R                  U   $ [        U[        5      (       a2  [	        US5      nU R                   H  nXR
                  ;   d  M  Us  $    [        e)z
Retrieve a single item either by its position or encoding name (alias may be used here).
Raise KeyError upon invalid index or encoding not present in results.
F)r1   intr   r4   r   rq   KeyError)r)   r   results      r*   __getitem__CharsetMatches.__getitem__   s_    
 dC  ==&&dC  T5)D--777!M ( r-   c                 ,    [        U R                  5      $ rV   r@   r   rH   s    r*   __len__CharsetMatches.__len__   s    4==!!r-   c                 2    [        U R                  5      S:  $ r   r   rH   s    r*   __bool__CharsetMatches.__bool__   s    4==!A%%r-   c                    [        U[        5      (       d-  [        SR                  [	        UR
                  5      5      5      e[        UR                  5      [        ::  a\  U R                   HL  nUR                  UR                  :X  d  M  UR                  UR                  :X  d  M;  UR                  U5          g   U R                  R                  U5        [        U R                  5      U l	        g)zf
Insert a single match. Will be inserted accordingly to preserve sort.
Can be inserted as a submatch.
z-Cannot append instance '{}' to CharsetMatchesN)r1   r   r<   r3   r4   r5   r@   rG   r   r   r7   r>   rS   rR   r   )r)   r   matchs      r*   rR   CharsetMatches.append  s    
 $--?FF'  txx=,,$$(8(88U[[DJJ=V&&t, ' 	T"t}}-r-   r   c                 D    U R                   (       d  gU R                   S   $ )zA
Simply return the first match. Strict equivalent to matches[0].
Nr   r   rH   s    r*   bestCharsetMatches.best  s     }}}}Qr-   c                 "    U R                  5       $ )z@
Redundant method, call the method best(). Kept for BC reasons.
)r   rH   s    r*   firstCharsetMatches.first  s     yy{r-   r   rV   )r   r   r   r   __doc__r
   r	   r   r+   r   r   r   r   r4   r   r   r   r   rR   r   r   r   r   r-   r*   r   r      s    
Ol); < O!(<0 !c3h L " "&$ &.< .D .( h~.  x/ r-   r   c                       \ rS rSrS\S\\   S\\   S\\   S\S\\   S\S	\S
\S\\   S\4S jr	\
S\\\4   4S j5       rS\4S jrSrg)CliDetectionResulti(  pathr6   r]   alternative_encodingsrs   r   r   r>   r?   unicode_pathis_preferredc                     Xl         Xl        X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl	        Xl
        g rV   )r   r   r6   r]   r   rs   r   r   r>   r?   r   )r)   r   r6   r]   r   rs   r   r   r>   r?   r   r   s               r*   r+   CliDetectionResult.__init__)  s@     	+7'/+;0E"%$-$2!
 )".r-   r/   c                     U R                   U R                  U R                  U R                  U R                  U R
                  U R                  U R                  U R                  U R                  U R                  S.$ )Nr   r6   r]   r   rs   r   r   r>   r?   r   r   r   rH   s    r*   __dict__CliDetectionResult.__dict__C  se     II $ 5 5%)%?%?"11ZZ -- --
 	
r-   c                 ,    [        U R                  SSS9$ )NT   )ensure_asciiindent)r   r   rH   s    r*   to_jsonCliDetectionResult.to_jsonS  s    T]]a@@r-   )r   r   r>   r?   r6   r]   r   r   rs   r   r   N)r   r   r   r   r4   r
   r	   r   r   r+   r   r   r   r   r   r   r   r-   r*   r   r   (  s    // 3-/ s)	/
  $Cy/ / 9/ / / / sm/ /4 
$sCx. 
 
A Ar-   r   N)encodings.aliasesr   hashlibr   jsonr   typingr   r   r   r	   r
   r   r   constantr   utilsr   r   r   r   r   r4   r   CoherenceMatchr   r   r   r-   r*   <module>r      sa    %   D D D & C CT1 T1n@ @F sEz"' ,A ,Ar-   