
    ˟fiR                        S SK r S SKJr  S SKJrJrJrJrJr  SSK	J
r
JrJrJr  SSKJrJrJrJr  SSKJr  SSKJrJr  SS	KJrJrJrJrJrJrJr  \ R@                  " S
5      r!\ RD                  " 5       r#\#RI                  \ RJ                  " S5      5                 SS\\&\'4   S\(S\(S\)S\\\*      S\\\*      S\+S\+S\)S\+S\4S jjr,         SS\S\(S\(S\)S\\\*      S\\\*      S\+S\+S\)S\+S\4S jjr-         SS\\*\&\4   S\(S\(S\)S\\\*      S\\\*      S\+S\+S\)S\+S\4S jjr.         SS\\\*\\&4   S\(S\(S\)S\\\*      S\\\*      S\+S\+S\)S\+S\+4S jjr/g)     N)PathLike)BinaryIOListOptionalSetUnion   )coherence_ratioencoding_languagesmb_encoding_languagesmerge_coherence_ratios)IANA_SUPPORTEDTOO_BIG_SEQUENCETOO_SMALL_SEQUENCETRACE)
mess_ratio)CharsetMatchCharsetMatches)any_specified_encodingcut_sequence_chunks	iana_nameidentify_sig_or_bomis_cp_similaris_multi_byte_encodingshould_strip_sig_or_bomcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)s	sequencessteps
chunk_size	thresholdcp_isolationcp_exclusionpreemptive_behaviourexplainlanguage_thresholdenable_fallbackreturnc
                    [        U [        [        45      (       d#  [        SR	                  [        U 5      5      5      eU(       aB  [        R                  n
[        R                  [        5        [        R                  [        5        [        U 5      nUS:X  a{  [        R                  S5        U(       aE  [        R                  [        5        [        R                  W
=(       d    [        R                   5        [#        [%        U SSS/ S5      /5      $ UbG  [        R'                  [        S	S
R)                  U5      5        U Vs/ s H  n[+        US5      PM     nnO/ nUbG  [        R'                  [        SS
R)                  U5      5        U Vs/ s H  n[+        US5      PM     nnO/ nXU-  ::  a!  [        R'                  [        SUUU5        SnUnUS:  a  X-  U:  a  [-        X-  5      n[        U 5      [.        :  n[        U 5      [0        :  nU(       a*  [        R'                  [        SR	                  U5      5        O0U(       a)  [        R'                  [        SR	                  U5      5        / nU(       a  [3        U 5      OSnUb,  UR5                  U5        [        R'                  [        SU5        [7        5       n/ n/ nSnSnSn[#        5       n[9        U 5      u  nnUb6  UR5                  U5        [        R'                  [        S[        U5      U5        UR5                  S5        SU;  a  UR5                  S5        U[:        -    GHJ  nU(       a  UU;  a  M  U(       a  UU;   a  M"  UU;   a  M*  UR=                  U5        SnUU:H  nU=(       a    [?        U5      nUS;   a$  U(       d  [        R'                  [        SU5        M  US;   a$  U(       d  [        R'                  [        SU5        M   [A        U5      n U(       a8  USL a3  [G        USL a  U S[-        S5       OU [        U5      [-        S5       US9  O[G        USL a  U OU [        U5      S US9n Sn U H  n![M        UU!5      (       d  M  Sn   O   U (       a  [        R'                  [        SUW!5        GMZ  [O        U(       d  SO
[        U5      U[-        X-  5      5      n"U=(       a    USL=(       a    [        U5      U:  n#U#(       a  [        R'                  [        SU5        [-        [        U"5      S-  5      n$[Q        U$S5      n$Sn%Sn&/ n'/ n( [S        U UU"UUUUUU5	       H{  n)U'R5                  U)5        U(R5                  [U        U)UUSL =(       a    S[        U5      s=:*  =(       a    S:*  Os  5      5        U(S    U:  a  U%S-  n%U%U$:  d  U(       d  Mt  USL d  M{    O    U&(       d+  U(       a$  U(       d   U [-        S"5      S RW                  US#S$9  U((       a  [Y        U(5      [        U(5      -  OSn*U*U:  d  U%U$:  aw  UR5                  U5        [        R'                  [        S&UU%[[        U*S'-  S(S)95        U	(       a4  USSU4;   a+  U&(       d$  [%        U UUS/ U5      n+UU:X  a  U+nOUS:X  a  U+nOU+nGM\  [        R'                  [        S*U[[        U*S'-  S(S)95        U(       d  []        U5      n,O[_        U5      n,U,(       a3  [        R'                  [        S+R	                  U[G        U,5      5      5        / n-US:w  a?  U' H9  n)[a        U)UU,(       a  S,R)                  U,5      OS5      n.U-R5                  U.5        M;     [c        U-5      n/U/(       a*  [        R'                  [        S-R	                  U/U5      5        UR5                  [%        U UU*UU/U5      5        UUSS4;   ab  U*S.:  a\  [        R                  S/U5        U(       a.  [        R                  [        5        [        R                  W
5        [#        UU   /5      s  $ UU:X  d  GM  [        R                  S0U5        U(       a.  [        R                  [        5        [        R                  W
5        [#        UU   /5      s  $    [        U5      S:X  a  U(       d  U(       d  U(       a  [        R'                  [        S15        U(       a2  [        R                  S2URd                  5        UR5                  U5        OU(       a  Ub+  U(       a!  U(       a  URf                  URf                  :w  d  Ub'  [        R                  S35        UR5                  U5        O-U(       a&  [        R                  S45        UR5                  U5        U(       a<  [        R                  S5URi                  5       Rd                  [        U5      S-
  5        O[        R                  S65        U(       a.  [        R                  [        5        [        R                  W
5        U$ s  snf s  snf ! [B        [D        4 a     [        R'                  [        SU5         GM  f = f! [H        [J        4 aW  n[        U[J        5      (       d%  [        R'                  [        SU[G        U5      5        UR5                  U5         SnAGM}  SnAff = f! [H         a4  n[        R'                  [        S!U[G        U5      5        U$n%Sn& SnAGN7SnAff = f! [H         aB  n[        R'                  [        S%U[G        U5      5        UR5                  U5         SnAGM  SnAff = f)7a2  
Given a raw bytes sequence, return the best possibles charset usable to render str objects.
If there is no results, it is a strong indicator that the source is binary/not text.
By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.

The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
but never take it for granted. Can improve the performance.

You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
purpose.

This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
Custom logging format and handler can be set manually.
z4Expected object of type bytes or bytearray, got: {0}r   z<Encoding detection on empty bytes, assuming utf_8 intention.utf_8g        F Nz`cp_isolation is set. use this flag for debugging purpose. limited list of encoding allowed : %s.z, zacp_exclusion is set. use this flag for debugging purpose. limited list of encoding excluded : %s.z^override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.r	   z>Trying to detect encoding from a tiny portion of ({}) byte(s).zIUsing lazy str decoding because the payload is quite large, ({}) byte(s).z@Detected declarative mark in sequence. Priority +1 given for %s.zIDetected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.ascii>   utf_16utf_32z\Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.>   utf_7zREncoding %s won't be tested as-is because detection is unreliable without BOM/SIG.z2Encoding %s does not provide an IncrementalDecoderg    A)encodingz9Code page %s does not fit given bytes sequence at ALL. %sTzW%s is deemed too similar to code page %s and was consider unsuited already. Continuing!zpCode page %s is a multi byte encoding table and it appear that at least one character was encoded using n-bytes.      zaLazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %sg     j@strict)errorsz^LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %szc%s was excluded because of initial chaos probing. Gave up %i time(s). Computed mean chaos is %f %%.d      )ndigitsz=%s passed initial chaos probing. Mean measured chaos is %f %%z&{} should target any language(s) of {},z We detected language {} using {}皙?z.Encoding detection: %s is most likely the one.zoEncoding detection: %s is most likely the one as we detected a BOM or SIG within the beginning of the sequence.zONothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.z7Encoding detection: %s will be used as a fallback matchz:Encoding detection: utf_8 will be used as a fallback matchz:Encoding detection: ascii will be used as a fallback matchz]Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.z=Encoding detection: Unable to determine any suitable charset.)5
isinstance	bytearraybytes	TypeErrorformattypeloggerlevel
addHandlerexplain_handlersetLevelr   lendebugremoveHandlerloggingWARNINGr   r   logjoinr   intr   r   r   appendsetr   r   addr   r   ModuleNotFoundErrorImportErrorstrUnicodeDecodeErrorLookupErrorr   rangemaxr   r   decodesumroundr   r   r
   r   r/   fingerprintbest)0r   r   r   r    r!   r"   r#   r$   r%   r&   previous_logger_levellengthcpis_too_small_sequenceis_too_large_sequenceprioritized_encodingsspecified_encodingtestedtested_but_hard_failuretested_but_soft_failurefallback_asciifallback_u8fallback_specifiedresultssig_encodingsig_payloadencoding_ianadecoded_payloadbom_or_sig_availablestrip_sig_or_bomis_multi_byte_decoderesimilar_soft_failure_testencoding_soft_failedr_multi_byte_bonusmax_chunk_gave_upearly_stop_countlazy_str_hard_failure	md_chunks	md_ratioschunkmean_mess_ratiofallback_entrytarget_languages	cd_ratioschunk_languagescd_ratios_mergeds0                                                   M/home/kuhnn/miniconda3/lib/python3.13/site-packages/charset_normalizer/api.py
from_bytesr   !   s
   < i)U!344BIIY
 	
 %+\\/*i.F{ST  1OO1DW__E|IwUBPRSTUU

5IIl#		
 8DD|	"e,|D

6IIl#		
 8DD|	"e,|Du$%

l	
 
qyV^j0(
"%i.3E"E"%i.4D"D

LSS	
 


W^^	
 (* .By)t  %$$%78

N	
 uF)+)+-1N*.K15,.G 3I >L+$$\2

W		
   )++$$W-.?M=M\9F"

=!)-%1]%B!5 "
:Q;
 009MJJn
 I%.BJJd
 	*@*O!	$)>%)G'50 kD	*"3{#3c$i@*	 #&'50 "3{#3#56*	#" +0!$; ],@AA,0) %<
 %JJi$	 )As;/?
 " .t+.O$v- 	 JJ-	 "%SWq[!1 115 ! %!		'	),$ %
   '  !4GA\1B,G,Ga,G R=I-$)$$(99((-=-F7
V &%)
#d)+&--mH-M ENY#i.!@SVi'+;?P+P#**=9JJ0 o+Q7  !gw8J%KK-!-}iO" !$66)7&"g-%3N"0K

K/C'3		
 %*<]*K4]CJJ8??!3'7#8 	 G#""1&2BCHH-.#   1 # 2)<JJ299$m 	$ 		
 0'7CC#%LL@- $$_5 56!7=#9":;;L(LL1
 $$_5 56!7=#9":;;m @p 7|q.,>JJa
 LLI"++ NN-.^3"++~/I/II'LLUVNN;'LLUVNN>*kLLN##L1	
 	TU_--.Nq E E^ $[1 	JJD
 	. #K0 		a--

O!F	 $**=9		l 
	) JJsA	  1$(!
	)* & 

t!F	 (..}=s   h.h3;h8>i+i+Bkkkk-l8+i('i(+k;Akk
l)ll
m" 6mm"fpc
                 F    [        U R                  5       UUUUUUUUU	5
      $ )zz
Same thing than the function from_bytes but using a file pointer that is already ready.
Will not close the file pointer.
)r   read)
r   r   r   r    r!   r"   r#   r$   r%   r&   s
             r   from_fpr     s5      
	     pathc
                 x    [        U S5       n
[        U
UUUUUUUUU	5
      sSSS5        $ ! , (       d  f       g= f)z
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
Can raise IOError.
rbN)openr   )r   r   r   r    r!   r"   r#   r$   r%   r&   r   s              r   	from_pathr     sB      
dD	R 
 
		s   +
9fp_or_path_or_payloadc
                    [        U [        [        45      (       a  [        U UUUUUUUUU	S9
n
U
(       + $ [        U [        [
        45      (       a  [        U UUUUUUUUU	S9
n
U
(       + $ [        U UUUUUUUUU	S9
n
U
(       + $ )a  
Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
are disabled to be stricter around ASCII-compatible but unlikely to be a string.
)	r   r   r    r!   r"   r#   r$   r%   r&   )r:   rR   r   r   r<   r;   r   r   )r   r   r   r    r!   r"   r#   r$   r%   r&   guessess              r   	is_binaryr   3  s    " '#x99!!%%!51+
Z ;C 
	

 
 !!%%!51+
4 ; !!%%!51+
 ;r   )	      皙?NNTFr9   T)	r   r   r   NNTFr9   F)0rH   osr   typingr   r   r   r   r   cdr
   r   r   r   constantr   r   r   r   mdr   modelsr   r   utilsr   r   r   r   r   r   r   	getLoggerr@   StreamHandlerrC   setFormatter	Formatterr<   r;   rL   floatrR   boolr   r   r   r    r   r   <module>r      s"     7 7  R Q  0   
		/	0'')   AB (,(,!% # RUI%&RR R 	R
 49%R 49%R R R R R Rn (,(,!% #   	
 49% 49%     @ (,(,!% # 

UH$
%

 
 	

 49%
 49%
 
 
 
 
 
B (,(,!% #!? 3%!?@?? ? 	?
 49%? 49%? ? ? ? ? 
?r   