
    PL
j%,                       U d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
mZmZmZ ddlmZ ddlZddlZddlmZmZ ddlmZ  ej        e          Zdeez  fd	Z eh d
          Zee         ed<    ej         dej!                  Z" ej#        d          Z$dedefdZ%i a&e
ee
ee	f         f         ed<   da'e(ed<   i Z)e
ee
ee	f         f         ed<   dZ*e(ed<   dZ+i Z,e
ee
ee
ee	f         f         f         ed<   i Z-e
ee(f         ed<   dZ.g dZ/e/d         Z0dZ1i ddddddddddd dd!d"d#d$d%d&d'd&d(d$d)d*d+d*d,d&d-d.d/d*d0d1i d2d3d4d3d5d3d6d7d8d9d:dd;dd<dd=dd>d*d?d7d@d1dAddBdCdDd7dEdFdGdHi dId3dJdKdLd9dMdKdNdKdOddPd3dQd7dRd7dSd7dTdCdUdCdVd7dWdCdXdCdYd7dZd7d[dCdCdCdFdCd1d1d1dCdCdHd\Z2d]Z3dedefd^Z4d_Z5d`Z6daZ7dbZ8dcedefddZ9ddfede
eef         fdgZ:dcedefdhZ;dcedefdiZ<i djdkdldkdmdndodpdqdpdrdsdtdudvdsdwdxdydxdzd{d|dEd}d~dd~dddddd0dd>ddddddddddddddZ=e
eef         ed<   	 ddl>m?Z@  e@            D ]&ZAeAB                                ZCeCreCe=vr
eAjD        e=eC<   'n# eE$ r Y nw xY wdcedee         fdZFdcedefdZGdcedefdZHddcedfedee         fdZIde	fdZJdde	deKdeKdeeK         fdZLde
ee	f         deMedf         deeK         fdZNde
ee	f         deeK         fdZOde
ee	f         deeK         fdZPde
ee	f         de
ee	f         fdZQde
ee
ee	f         f         dede
ee	f         ddfdZRddede
ee
ee	f         f         fdZS	 	 ddcedfedede
ee
ee	f         f         fdZT	 ddedcedfedeeK         fdZUdefdZVde
eeKf         fdZWdedcedeKddfdZXdedcedeeK         fdZYdedceddfdZZdeKdeeK         fdZ[dedeeK         fdZ\dedeeK         fdZ]dededefdZ^ddedcedfedeeK         fdZ_ddedcedfedeeK         fdZ`dedefdZaddedcedfedeeK         fdZbdedefdZcdedcedfedeeK         fdZddddd*dddddddÜ
Zee
eeKf         ed<   i afe
eeKf         ed<   dage(ed<   dZhdede
eeKf         fdɄZi	 ddededeeK         fdʄZj	 	 ddedcedfedeeeK         ef         fd˄Zk	 	 	 	 	 ddedcedfedeKdz  dedeldz  deKfdτZmdedeKfdфZndee
ee	f                  deKfdӄZode
ee	f         deKdeKfdքZpde
ee	f         deKfdׄZqdedd؜dee
ee	f                  dedeee
ee	f                           deKfdۄZrdS )zModel metadata, context lengths, and token estimation utilities.

Pure utility functions with no AIAgent dependency. Used by ContextCompressor
and run_agent.py for pre-flight context checks.
    N)Path)AnyDictListOptionalTuple)urlparse)base_url_host_matchesbase_url_hostname)OPENROUTER_MODELS_URLreturnc                      dD ];} t          j        |           }|r#t           j                            |          r|c S <dS )a  Resolve SSL verify setting for `requests` calls from env vars.

    The `requests` library only honours REQUESTS_CA_BUNDLE / CURL_CA_BUNDLE
    by default. Hermes also honours HERMES_CA_BUNDLE (its own convention)
    and SSL_CERT_FILE (used by the stdlib `ssl` module and by httpx), so
    that a single env var can cover both `requests` and `httpx` callsites
    inside the same process.

    Returns either a filesystem path to a CA bundle, or True to defer to
    the requests default (certifi).
    )HERMES_CA_BUNDLEREQUESTS_CA_BUNDLESSL_CERT_FILET)osgetenvpathisfile)env_varvals     8/home/kuhnn/.hermes/hermes-agent/agent/model_metadata.py_resolve_requests_verifyr      sO     O  i   	27>>#&& 	JJJ4    >K   x-aix.aiz-aiz.aikimi-cnarcee-ai	deep-seek	gmi-cloud	novita-ai
ai-gateway
minimax-cn
nvidia-nimmoonshot-cnqwen-portalxiaomi-mimoopencode-zengithub-modelsgoogle-geminiminimax-oauthtencent-cloudgoogle-ai-studiogoglmgminimxaizaizengrokkilokimimimonousqwenarceelocalzhipualiyunclaudecustomgeminigithubgooglenovitanvidiaollamavercelxiaomialibabaarceeaicopilotminimaxstepfuntencentdeepseekgmicloudkilocodemoonshotnemotronnovitaaiopencodetokenhub	anthropic	dashscope
openroutertencentmaas
qwen-oauthcopilot-acpkimi-codingopencode-goollama-cloudopenai-codexgithub-copilotkimi-coding-cntencent-tokenhub_PROVIDER_PREFIXESzE^(\d+\.?\d*b|latest|stable|q\d|fp?\d|instruct|chat|coder|vision|text)z100.64.0.0/10modelc                 ,   d| vs|                      d          r| S |                     dd          \  }}|                                                                }|t          v r0t
                              |                                          r| S |S | S )ua  Strip a recognised provider prefix from a model string.

    ``"local:my-model"`` → ``"my-model"``
    ``"qwen3.5:27b"``   → ``"qwen3.5:27b"``  (unchanged — not a provider prefix)
    ``"qwen:0.5b"``     → ``"qwen:0.5b"``    (unchanged — Ollama model:tag)
    ``"deepseek:latest"``→ ``"deepseek:latest"``(unchanged — Ollama model:tag)
    :http   )
startswithsplitstriplowerrf   _OLLAMA_TAG_PATTERNmatch)rg   prefixsuffixprefix_lowers       r   _strip_provider_prefixru   V   s     %5++F33[[a((NFF<<>>''))L)))$$V\\^^44 	LLr   _model_metadata_cache_model_metadata_cache_time_novita_metadata_cache_novita_metadata_cache_timei  _endpoint_model_metadata_cache#_endpoint_model_metadata_cache_timei,  )       i }  i>  i@  r~   zclaude-opus-4-7@B zclaude-opus-4.7zclaude-opus-4-6zclaude-sonnet-4-6zclaude-opus-4.6zclaude-sonnet-4.6rA   i@ gpt-5.5i zgpt-5.4-nano gpt-5.4-minigpt-5.4gpt-5.3-codex-sparkr}   zgpt-5.1-chatgpt-5zgpt-4.1i zgpt-4rC   i   zgemma-4r|   gemma4zgemma-4-31bzgemma-3i   gemmai    zdeepseek-v4-prozdeepseek-v4-flashzdeepseek-chatzdeepseek-reasonerrQ   llamazqwen3.6-pluszqwen3-coder-pluszqwen3-coderi   r<   rN   i   r1   i  zgrok-code-fastzgrok-4-1-fasti zgrok-2-visionzgrok-4-fastz	grok-4.20grok-4.3zgrok-4zgrok-3zgrok-2r7   r9   zhy3-previewrU   trinityelephantzQwen/Qwen3.5-397B-A17BzQwen/Qwen3.5-35B-A3Bi   )zdeepseek-ai/DeepSeek-V3.2zmoonshotai/Kimi-K2.5zmoonshotai/Kimi-K2.6zmoonshotai/Kimi-K2-ThinkingzMiniMaxAI/MiniMax-M2.5zXiaomiMiMo/MiMo-V2-Flashzmimo-v2-prozmimo-v2.5-proz	mimo-v2.5zmimo-v2-omnizmimo-v2-flashzzai-org/GLM-5)zgrok-3-minizgrok-4.20-multi-agentr   c                     | pd                                                                 sdS dD ]"}|v r                    |d          d         #t          fdt          D                       S )a$  Return True when an xAI Grok model accepts ``reasoning.effort``.

    Allowlist by substring (matches both bare ``grok-3-mini`` and
    aggregator-prefixed ``x-ai/grok-3-mini``). Conservative by design:
    if a future Grok model isn't listed, we send no effort dial rather
    than 400.
     F)/rk   c              3   B   K   | ]}                     |          V  d S N)rl   ).0rr   names     r   	<genexpr>z1grok_supports_reasoning_effort.<locals>.<genexpr>  s/      SS6tv&&SSSSSSr   )rn   ro   rsplitany_GROK_EFFORT_CAPABLE_PREFIXES)rg   sepr   s     @r   grok_supports_reasoning_effortr     s     KR  &&((D u + +$;;;;sA&&r*DSSSS5RSSSSSSr   )context_lengthcontext_windowcontext_sizemax_context_lengthmax_position_embeddingsmax_model_lenmax_input_tokensmax_sequence_lengthmax_seq_lenn_ctx_trainn_ctxctx_size)max_completion_tokensmax_output_tokens
max_tokens)	localhostz	127.0.0.1z::1z0.0.0.0)z.docker.internalz.containers.internalz.lima.internalbase_urlc                 T    | pd                                                     d          S )Nr   r   )rn   rstripr   s    r   _normalize_base_urlr   @  s&    N!!##**3///r   r   api_keyc                 ^    t          | pd                                          }|si S dd| iS )Nr   AuthorizationBearer )strrn   )r   tokens     r   _auth_headersr   D  s@    2$$&&E 	.u..//r   c                 "    t          | d          S )Nopenrouter.ai)r
   r   s    r   _is_openrouter_base_urlr   K  s     ?;;;r   c                 ^    t          |           }t          |          ot          |           S r   )r   boolr   )r   
normalizeds     r   _is_custom_endpointr   O  s.    $X..J
G$;J$G$G GGr   zapi.openai.comopenaizchatgpt.comapi.anthropic.comrY   zapi.z.air5   zopen.bigmodel.cnzapi.moonshot.air_   zapi.moonshot.cnrd   zapi.kimi.comzapi.stepfun.airO   zapi.stepfun.comzapi.arcee.air=   zapi.minimaxzdashscope.aliyuncs.comrK   zdashscope-intl.aliyuncs.comzportal.qwen.air]   r   r[   z!generativelanguage.googleapis.comr;   rM   	fireworksr`   r4   rG   rJ   r2   rF   re   ra   )zinference-api.nousresearch.comzapi.deepseek.comzapi.githubcopilot.comzmodels.github.aizmodels.inference.ai.azure.comzapi.fireworks.aizopencode.aizapi.x.aizintegrate.api.nvidia.comzapi.xiaomimimo.comzxiaomimimo.comzapi.gmi-serving.comapi.novita.aiztokenhub.tencentmaas.comz
ollama.com_URL_TO_PROVIDER)list_providersc                    t          |           }|sdS t          d|v r|nd|           }|j                                        p|j                                        }t
                                          D ]\  }}||v r|c S dS )a  Infer the models.dev provider name from a base URL.

    This allows context length resolution via models.dev for custom endpoints
    like DashScope (Alibaba), Z.AI, Kimi, etc. without requiring the user to
    explicitly set the provider name in config.
    N://zhttps://)r   r	   netlocro   r   r   items)r   r   parsedhosturl_partproviders         r   _infer_provider_from_urlr     s     %X..J tEZ$7$7jj=T
=T=TUUF=  7FK$5$5$7$7D.4466  (tOOO 4r   c                 $    t          |           d uS r   )r   r   s    r   _is_known_provider_base_urlr     s    #H--T99r   c                    t          |           }|sdS d|v r|nd| }	 t          |          }|j        pdn# t          $ r Y dS w xY wt          v rdS t          fdt          D                       rdS 	 t          j                  }|j	        s|j
        s|j        rdS t          |t          j                  r|t          v rdS n# t          $ r Y nw xY w                    d          }t#          |          dk    r	 t%          |d	                   t%          |d
                   }}|dk    rdS |dk    rd|cxk    rdk    rn ndS |dk    r|dk    rdS |dk    rd|cxk    rdk    rn ndS n# t          $ r Y nw xY wdS )a  Return True if base_url points to a local machine.

    Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
    container-internal DNS names (``host.docker.internal`` et al.),
    RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
    link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
    is included so remote-but-trusted Ollama boxes reached over a
    Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
    Fr   zhttp://r   Tc              3   B   K   | ]}                     |          V  d S r   )endswith)r   rs   r   s     r   r   z$is_local_endpoint.<locals>.<genexpr>  s/      
I
IV4==  
I
I
I
I
I
Ir   .   r   rk   
                  d   @      )r   r	   hostname	Exception_LOCAL_HOSTSr   _CONTAINER_LOCAL_SUFFIXES	ipaddress
ip_address
is_privateis_loopbackis_link_local
isinstanceIPv4Address_TAILSCALE_CGNAT
ValueErrorrm   lenint)	r   r   urlr   addrpartsfirstsecondr   s	           @r   is_local_endpointr     s    %X..J u++**1G:1G1GC#$"   uu|t

I
I
I
I/H
I
I
III t#D))? 	d. 	$2D 	4dI122 	t?O7O7O4    JJsOOE
5zzQ	aMM3uQx==6E{{t||f 2 2 2 2 2 2 2 2 2t||#t||f 3 3 3 3 3 3 3 3 3t 	 	 	D	5sK   : 
AA9)C
 $#C
 

CC0E4 5E4 E4 E4 4
F Fc                    ddl }t          |           }|}|                    d          r
|dd         }t          |          }	 |                    d|          5 }	 |                    | d          }|j        dk    r	 ddd           d	S n# t          $ r Y nw xY w	 |                    | d
          }|j        dk    r8	 |                                }d|v r	 ddd           dS n# t          $ r Y nw xY wn# t          $ r Y nw xY w	 |                    | d          }|j        dk    r|                    | d          }|j        dk    rd|j	        v r	 ddd           dS n# t          $ r Y nw xY w	 |                    | d          }|j        dk    r&|                                }d|v r	 ddd           dS n# t          $ r Y nw xY wddd           n# 1 swxY w Y   n# t          $ r Y nw xY wdS )zDetect which local server is running at base_url by probing known endpoints.

    Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
    r   N/v1g       @timeoutheaders/api/v1/models   	lm-studioz	/api/tagsmodelsrH   	/v1/props/propsdefault_generation_settingsllamacppz/versionversionvllm)
httpxr   r   r   Clientgetstatus_coder   jsontext)	r   r   r  r   
server_urlr   clientrdatas	            r   detect_local_server_typer    s`   
 LLL$X..JJ5!! %_
G$$G*\\#w\77 '	6JJ*<<<===C''&'	 '	 '	 '	 '	 '	 '	 '	 (   

JJ*77788=C'' vvxx#t++#+#'	 '	 '	 '	 '	 '	 '	 '	  ,$      JJ*77788=C''

j#8#8#899A=C'',IQV,S,S%9'	 '	 '	 '	 '	 '	 '	 '	:    JJ*66677=C''6688D D((%K'	 '	 '	 '	 '	 '	 '	 '	L    M'	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	P     4s  G/ G##BG/ G#
BG#BG#!#C=C,G/ +C=,
C96C=8C99C=<G#=
D
G#	D

G#AE,G/ +G#,
E96G#8E99G#=;G9G/ G#
GG#GG#G/ #G''G/ *G'+G/ /
G<;G<valuec              #      K   t          | t                    r2| V  |                                 D ]}t          |          E d {V  d S t          | t                    r| D ]}t          |          E d {V  d S d S r   )r   dictvalues_iter_nested_dictslist)r  nesteditems      r   r  r    s      % 0llnn 	2 	2F)&1111111111	2 	2	E4	 	  0 	0 	0D)$//////////0 0	0 	0r      逖 minimummaximumc                 $   	 t          | t                    rd S t          | t                    r(|                                                     dd          } t          |           }n# t          t          f$ r Y d S w xY w||cxk    r|k    rn n|S d S )N,r   )r   r   r   rn   replacer   	TypeErrorr   )r  r  r  results       r   _coerce_reasonable_intr    s    eT"" 	4eS!! 	3KKMM))#r22EUz"   tt&####G#####4s   A& AA& &A;:A;payloadkeys.c                     d |D             }t          |           D ]W}|                                D ]@\  }}t          |                                          |vr)t	          |          }||c c S AXd S )Nc                 6    h | ]}|                                 S  )ro   )r   keys     r   	<setcomp>z%_extract_first_int.<locals>.<setcomp>(  s     ***cciikk***r   )r  r   r   ro   r  )r  r  keysetmappingr#  r  coerceds          r   _extract_first_intr(  '  s    **T***F%g..  !--// 	 	JC3xx~~v--,U33G" #		 4r   c                 ,    t          | t                    S r   )r(  _CONTEXT_LENGTH_KEYSr  s    r   _extract_context_lengthr,  3      g';<<<r   c                 ,    t          | t                    S r   )r(  _MAX_COMPLETION_KEYSr+  s    r   _extract_max_completion_tokensr0  7  r-  r   c                 X  	 |                      d          }|                      d          }||Ri }|%t          t          |          dz  dz            |d<   |%t          t          |          dz  dz            |d<   |S ddd	d
dd}t          |           D ]}d |                                D             	t          	fd|                                D                       sNi }|                                D ]%\  }}|D ]}|	v r	|         dvr	|         ||<    n&|r|c S i S )Ninput_token_price_per_moutput_token_price_per_mi'  r   prompt
completion)r4  inputinput_cost_per_tokenprompt_token_cost)r5  outputoutput_cost_per_tokencompletion_token_cost)requestrequest_cost)
cache_readcached_promptinput_cache_readcache_read_cost_per_token)cache_writecache_creationinput_cache_writecache_write_cost_per_token)r4  r5  r<  r>  rB  c                 X    i | ]'\  }}t          |                                          |(S r"  )r   ro   )r   r#  r  s      r   
<dictcomp>z$_extract_pricing.<locals>.<dictcomp>N  s.    PPP*#uc#hhnn&&PPPr   c              3   N   K   | ]}t          fd |D                       V   dS )c              3       K   | ]}|v V  	d S r   r"  )r   aliasr   s     r   r   z-_extract_pricing.<locals>.<genexpr>.<genexpr>O  s(      @@uu
*@@@@@@r   N)r   )r   aliasesr   s     r   r   z#_extract_pricing.<locals>.<genexpr>O  s@      ccW3@@@@@@@@@ccccccr   >   Nr   )r  r   floatr  r   r   r  )
r  novita_inputnovita_outputpricing	alias_mapr&  targetrK  rJ  r   s
            @r   _extract_pricingrR  ;  s   ;;899LKK :;;M=#<"$# #E,$7$7&$@9$L M MGH$$'m(<(<v(E	(Q$R$RGL! S`.fk I &g..  PPPPP
ccccPYP`P`PbPbccccc 	"$(00 	 	OFG   J&&:e+<J+N+N&0&7GFOE 	NNN	Ir   cachemodel_identryc                     || |<   d|v r4|                     dd          d         }|                     ||           d S d S )Nr   rk   )rm   
setdefault)rS  rT  rU  
bare_models       r   _add_model_aliasesrY  \  sO    E(O
h^^C++A.
U+++++ r   Fforce_refreshc                    | s2t           r+t          j                    t          z
  t          k     rt           S 	 t	          j        t          dt                                }|                                 |	                                }i }|                    dg           D ]}|                    dd          }|                    dd          |                    di                               d	d
          |                    d|          |                    di           d}t          |||           |                    dd          }|r||k    rt          |||           |a t          j                    at                              dt          |                     |S # t          $ r*}t          j        d|            t           pi cY d}~S d}~ww xY w)z9Fetch model metadata from OpenRouter (cached for 1 hour).r   )r   verifyr
  idr   r   r}   top_providerr   i   r   rO  )r   r   r   rO  canonical_slugz.Fetched metadata for %s models from OpenRouterz0Failed to fetch model metadata from OpenRouter: N)rv   timerw   _MODEL_CACHE_TTLrequestsr  r   r   raise_for_statusr  rY  loggerdebugr   r   loggingwarning)	rZ  responser
  rS  rg   rT  rU  	canonicales	            r   fetch_model_metadatark  c  s     %2 %	F`8`dt7t7t$$+< 5rJbJdJdeee!!###}}XXfb)) 	< 	<Eyyr**H"')),<f"E"E).>2)F)F)J)JKbdh)i)i		&(33 99Y33	 E uh666		"2B77I <Y(22"5)U;;; %%)Y[["Es5zzRRR + + +N1NNOOO$*******+s   E*F! !
G+G
GGc                 v   t          |           }|rt          |          ri S |sXt                              |          }t                              |d          }|!t          j                    |z
  t          k     r|S |g}|                    d          r|dd                             d          }n|dz   }|r||vr|	                    |           |rdd| ini }d}	t          |          r	 t          ||          d	k    rP|                    d          r|dd                             d          n|}
t          j        |
                    d          d
z   |dt                                }|                                 |                                }i }|                    dg           D ]y}t!          |t"                    s|                    d          p|                    d          }|sFd|                    d|          i}d}|                    dg           pg D ]y}t!          |t"                    s|                    di           }t!          |t"                    r|                    d          nd}t!          |t$                    r
|dk    r|} nz|||d<   t'          |          }|||d<   t)          |          }|r||d<   t+          |||           |                    d          }t!          |t,                    r|r||k    rt+          |||           {|t          |<   t          j                    t          |<   |S n# t.          $ r}|}	Y d}~nd}~ww xY w|D ]}|                    d          dz   }	 t          j        ||dt                                }|                                 |                                }i }|                    dg           D ]}t!          |t"                    s|                    d          }|s0d|                    d|          i}t1          |          }|||d<   t'          |          }|||d<   t)          |          }|r||d<   t+          |||           t3          d |                    dg           D                       }|r	 |                    d                              dd          }t                      }t          j        |dz   |d|          }|j        st          j        |dz   |d|          }|j        rh|                                }|                    di           } |                     d          }!|                    dd          }"|!r|"r|"|v r|!||"         d<   n# t.          $ r Y nw xY w|t          |<   t          j                    t          |<   |c S # t.          $ r}|}	Y d}~{d}~ww xY w|	rt8                              d ||	           i t          |<   t          j                    t          |<   i S )!zFetch model metadata from an OpenAI-compatible ``/models`` endpoint.

    This is used for explicit custom endpoints where hardcoded global model-name
    defaults are unreliable. Results are cached in memory per base URL.
    r   Nr   r   r   r   r   r   r   r   r   r   r   r\  r   r#  r]  r   loaded_instancesconfigr   r   rO  z/modelsr
  c              3   r   K   | ]2}t          |t                    |                    d           dk    V  3dS )owned_byr   N)r   r  r  )r   ms     r   r   z0fetch_endpoint_model_metadata.<locals>.<genexpr>  sV        Jq$4G4Gj!!Z/     r   r   r      r   r   r   model_aliasz1Failed to fetch model metadata from %s/models: %s)r   r   rz   r  r{   r`  _ENDPOINT_MODEL_CACHE_TTLr   r   appendr   r  rb  r   rc  r  r   r  r   r0  rR  rY  r   r   r,  r   r  okrd  re  )#r   r   rZ  r   cached	cached_at
candidates	alternater   
last_errorr  rh  r  rS  rg   rT  rU  r   instcfgctxr   rO  alt_idexc	candidater   is_llamacppbase_verify
props_resppropsgen_settingsr   ru  s#                                      r   fetch_endpoint_model_metadatar    s#    %X..J 0<< 	 /33J??7;;JJJ	49;;#:>W"W"WMJ5!! 'ssO**3//		&	 %Yj00)$$$8?G 3' 3 344RG&*J$$ 21	'
GDDDSS<F<O<OPU<V<VfZ_33C888\f
#<%%c**-==#355	   ))+++"--//35$[[266 A AE%eT22 ! $yy//B599T??H# ! -3UYYvx5P5P,QE%)N %		*<b A A GR " ")$55 %$"hhx44;Ec4;P;PZcgg&6777VZ%c3// "C!GG-0N!E%12@./,J5,Q,Q),89N56.u55G 3+2i(&uh>>>"YYt__F!&#.. A6 Af>P>P*5&%@@@=B.z:BF)++3J?] T^  	 	 	JJJJJJ	   4 4	s##i/2	|C"MeMgMghhhH%%'''mmooG/1E VR00 ; ;!%..  99T?? )/681L1L(M!8!?!?!-.<E*+(Fu(M(M%(45JE12*511 /'.E)$"5(E::::    VR00    K  $++C0088CCD688G!)d[.@'[\el!m!m!mJ%= o%-\$/7\]fm%n%n%n
!} I * 1 1',yy1NPR'S'S , 0 0 9 9&+iir&B&B  I[ I[E5I5ICHE+./?@    D :?*:6>Bikk/
;LLL 	 	 	JJJJJJ	  bH*V`aaa13":.6:ikk'
3IsW   1I%M 
M."M))M.D/W?C#V#"W#
V0-W/V00)W
W3&W..W3c                    t          ||          }|                    |           }|sht          |          dk    r/t          t	          |                                                    }n&|                                D ]\  }}| |v s|| v r|} n|r,|                    d          }t          |t                    r|S dS )zDResolve context length from an endpoint's live ``/models`` metadata.rm  rk   r   N)	r  r  r   nextiterr  r   r   r   )rg   r   r   endpoint_metadatamatchedr#  rU  r   s           r    _resolve_endpoint_context_lengthr    s     6hPPP##E**G  !!Q&&4 1 8 8 : :;;<<GG/5577  
UC<<3%<<#GE $0  " %566nc** 	"!!4r   c                  (    ddl m}   |             dz  S )z8Return path to the persistent context length cache file.r   get_hermes_homezcontext_length_cache.yaml)hermes_constantsr  r  s    r   _get_context_cache_pathr  .  s(    000000?:::r   c                  ^   t                      } |                                 si S 	 t          | d          5 }t          j        |          pi }ddd           n# 1 swxY w Y   |                    di           S # t          $ r'}t                              d|           i cY d}~S d}~ww xY w)z:Load the model+provider -> context_length cache from disk.utf-8encodingNcontext_lengthsz'Failed to load context length cache: %s)	r  existsopenyaml	safe_loadr  r   rd  re  )r   fr
  rj  s       r   _load_context_cacher  4  s    "$$D;;== 	$))) 	+Q>!$$*D	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+xx)2...   >BBB						s@   A; AA; AA; !A"A; ;
B,B'!B,'B,lengthc                    |  d| }t                      }|                    |          |k    rdS |||<   t                      }	 |j                            dd           t          |dd          5 }t          j        d|i|d	
           ddd           n# 1 swxY w Y   t          	                    d||d           dS # t          $ r&}t                              d|           Y d}~dS d}~ww xY w)zPersist a discovered context length for a model+provider combo.

    Cache key is ``model@base_url`` so the same model name served from
    different providers can have different limits.
    @NTparentsexist_okwr  r  r  Fdefault_flow_stylez%Cached context length %s -> %s tokensr  z'Failed to save context length cache: %s)r  r  r  parentmkdirr  r  dumprd  infor   re  )rg   r   r  r#  rS  r   r  rj  s           r   save_context_lengthr  B  sp    

X

C!!Eyy~~E#J"$$DC$666$g... 	O!I(%0!NNNN	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O;SV--PPPPP C C C>BBBBBBBBBCs<   .C 3BC BC  B!!C 
C4C//C4c                 V    |  d| }t                      }|                    |          S )zBLook up a previously discovered context length for model+provider.r  )r  r  )rg   r   r#  rS  s       r   get_cached_context_lengthr  W  s0    

X

C!!E99S>>r   c                    |  d| }t                      }||vrdS ||= t                      }	 |j                            dd           t	          |dd          5 }t          j        d|i|d	
           ddd           dS # 1 swxY w Y   dS # t          $ r'}t          	                    d||           Y d}~dS d}~ww xY w)zCDrop a stale cache entry so it gets re-resolved on the next lookup.r  NTr  r  r  r  r  Fr  z6Failed to invalidate context length cache entry %s: %s)
r  r  r  r  r  r  r  r   rd  re  )rg   r   r#  rS  r   r  rj  s          r   !_invalidate_cached_context_lengthr  ^  sV   

X

C!!E
%c
"$$DW$666$g... 	O!I(%0!NNNN	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O W W WMsTUVVVVVVVVVWs;   .B B6B BB 
BB 
CB<<Ccurrent_lengthc                 .    t           D ]}|| k     r|c S dS )z@Return the next lower probe tier, or None if already at minimum.N)CONTEXT_PROBE_TIERS)r  tiers     r   get_next_probe_tierr  n  s/    #  .  KKK !4r   	error_msgc                     |                                  }g d}|D ]O}t          j        ||          }|r6t          |                    d                    }d|cxk    rdk    rn K|c S PdS )a?  Try to extract the actual context limit from an API error message.

    Many providers include the limit in their error text, e.g.:
      - "maximum context length is 32768 tokens"
      - "context_length_exceeded: 131072"
      - "Maximum context size 32768 exceeded"
      - "model's max context length is 65536"
    )zY(?:max(?:imum)?|limit)\s*(?:context\s*)?(?:length|size|window)?\s*(?:is|of|:)?\s*(\d{4,})z:context\s*(?:length|size|window)\s*(?:is|of|:)?\s*(\d{4,})z)(\d{4,})\s*(?:token)?\s*(?:context|limit)z">\s*(\d{4,})\s*(?:max|limit|token)z(\d{4,})\s*(?:max(?:imum)?)\brk   r  r  Nro   researchr   group)r  error_lowerpatternspatternrq   limits         r   parse_context_limit_from_errorr  v  s     //##K  H   	';// 	A''Eu****
*****4r   c                     |                                  }d|v od|v pd|v }|sdS g d}|D ]E}t          j        ||          }|r,t          |                    d                    }|dk    r|c S FdS )u#  Detect an "output cap too large" error and return how many output tokens are available.

    Background — two distinct context errors exist:
      1. "Prompt too long"  — the INPUT itself exceeds the context window.
           Fix: compress history and/or halve context_length.
      2. "max_tokens too large" — input is fine, but input + requested_output > window.
           Fix: reduce max_tokens (the output cap) for this call.
           Do NOT touch context_length — the window hasn't shrunk.

    Anthropic's API returns errors like:
      "max_tokens: 32768 > context_window: 200000 - input_tokens: 190000 = available_tokens: 10000"

    Returns the number of output tokens that would fit (e.g. 10000 above), or None if
    the error does not look like a max_tokens-too-large error.
    r   available_tokenszavailable tokensN)zavailable_tokens[:\s]+(\d+)zavailable\s+tokens[:\s]+(\d+)z=\s*(\d+)\s*$rk   r  )r  r  is_output_cap_errorr  r  rq   tokenss          r   (parse_available_output_tokens_from_errorr    s      //##K 	# 	U;.S2D2S   t  H   	';// 	Q((F{{4r   candidate_idlookup_modelc                 b    | |k    rdS d| v r"|                      dd          d         |k    rdS dS )a  Return True if *candidate_id* (from server) matches *lookup_model* (configured).

    Supports two forms:
    - Exact match:  "nvidia-nemotron-super-49b-v1" == "nvidia-nemotron-super-49b-v1"
    - Slug match:   "nvidia/nvidia-nemotron-super-49b-v1" matches "nvidia-nemotron-super-49b-v1"
                    (the part after the last "/" equals lookup_model)

    This covers LM Studio's native API which stores models as "publisher/slug"
    while users typically configure only the slug after the "local:" prefix.
    Tr   rk   F)r   )r  r  s     r   _model_id_matchesr    sH     |##t
l|223::1=MMt5r   c                 @   ddl }t          |           }|                    d          }|                    d          r
|dd         }	 t	          ||          }n# t
          $ r Y dS w xY w|dk    rdS t          |          }	 |                    d|	          5 }|                    | d
d|i          }	|	j	        dk    r	 ddd           dS |	
                                }
|
                    dd          }d|v r|                    d          D ]s}d|v rm|                                                                }t          |          dk    r4	 t          |d                   c cddd           S # t           $ r Y ow xY wt|
                    di           }|                                D ]B\  }}d|v r9t%          |t          t&          f          rt          |          c cddd           S C	 ddd           n# 1 swxY w Y   n# t
          $ r Y nw xY wdS )ay  Query an Ollama server for the model's context length.

    Returns the model's maximum context from GGUF metadata via ``/api/show``,
    or the explicit ``num_ctx`` from the Modelfile if set.  Returns None if
    the server is unreachable or not Ollama.

    This is the value that should be passed as ``num_ctx`` in Ollama chat
    requests to override the default 2048.
    r   Nr   r   r   rm  rH         @r   	/api/showr   r  r   
parametersr   num_ctx
   r   
model_infor   )r  ru   r   r   r  r   r   r  postr  r  r  rm   rn   r   r   r   r   r   rL  )rg   r   r   r  rX  r  server_typer   r  respr
  paramsliner   r  r#  r  s                    r   query_ollama_num_ctxr    s    LLL'..J%%J5!! %_
.xIII   tthtG$$G\\#w\77 	&6;;*777vz>R;SSD3&&	& 	& 	& 	& 	& 	& 	& 	& 99;;D XXlB//FF"""LL.. % %D D(( $

 2 2 4 4u::??%'*59~~ 5 5	& 	& 	& 	& 	& 	& 	& 	& $. % % % $% ,33J(..00 & &
U#s**z%#u/N/N*u::%%-	& 	& 	& 	& 	& 	& 	& 	&(&)	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&.    4s   	A 
A)(A)H )HH BHE7(H*H 7
FHFA#H'H 4H6H HH 	H
H 
HHc                    ddl }|                    d          }|                    d          r
|dd         }t          |          }	 |                    d|          5 }|                    | dd	| i
          }|j        dk    r	 ddd           dS |                                }|                    di           }	|		                                D ]J\  }
}d|
v rAt          |t          t          f          r%t          |          }|dk    r|c cddd           S K|                    dd          }d|v r|                    d          D ]|}d|v rv|                                                                }t          |          dk    r=	 t          |d                   }|dk    r|c cddd           S l# t           $ r Y xw xY w}ddd           n# 1 swxY w Y   n# t"          $ r Y nw xY wdS )u  Query an Ollama server's native ``/api/show`` for context length.

    Provider-agnostic: works against ANY Ollama-compatible server regardless
    of hostname — local Ollama, Ollama Cloud (``ollama.com``), custom Ollama
    hosting behind a reverse proxy, etc.  For non-Ollama servers the POST
    returns 404/405 quickly; the function handles errors gracefully.

    For hosted servers the GGUF ``model_info.*.context_length`` is the
    authoritative source: the user can't set their own ``num_ctx``, and the
    OpenAI-compat ``/v1/models`` endpoint correctly omits ``context_length``
    per the OpenAI schema.

    Resolution order for hosted Ollama:
      1. ``model_info.*.context_length`` — GGUF training max (authoritative)
      2. ``parameters`` → ``num_ctx`` — server-side Modelfile override
    The order is flipped vs ``query_ollama_num_ctx()`` because local users
    control ``num_ctx`` themselves; hosted users can't.
    r   Nr   r   r   g      @r   r  r   r  r   r  r   r  r  r   r  r  r  r   )r  r   r   r   r  r  r  r  r  r   r   r   rL  rm   rn   r   r   r   )rg   r   r   r  r  r   r  r  r
  r  r#  r  r  r  r  r   s                   r   _query_ollama_api_showr    s   & LLL%%J5!! %_
G$$G\\#w\77 	%6;;*777vuo;NND3&&	% 	% 	% 	% 	% 	% 	% 	% 99;;D ,33J(..00 # #
U#s**z%#u/N/N*e**Cd{{"

	% 	% 	% 	% 	% 	% 	% 	%  XXlB//FF"""LL.. 	% 	%D D(( $

 2 2 4 4u::??%&)%)nn#&$;;+.JJ3	% 	% 	% 	% 	% 	% 	% 	%0 $/#- % % % $%5	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	%8    4s   	G#  )G	G# A;GG# A/GF:*G,G# 9G:
GGGGG# GG# GG# #
G0/G0c                 \    |                                  }|                    d          pd|v S )aB  Return True if the model name looks like a Kimi-family model.

    Catches ``kimi-k2.6``, ``kimi-k2.5``, ``kimi-k2-thinking``,
    ``moonshotai/Kimi-K2.6``, and similar variants.  Used as a guard
    against stale OpenRouter metadata that underreports these models
    as 32K context when they actually support 262K+.
    r9   rT   )ro   rl   )rg   ro   s     r   _model_name_suggests_kimir  D  s/     KKMMEF##:zU'::r   c                 	   ddl }t          |           } |                    d          }|                    d          r
|dd         }t	          |          }	 t          ||          }n# t          $ r d}Y nw xY w	 |                    d|          5 }|d	k    rL|                    | d
d| i          }|j	        dk    r$|
                                }	|	                    dd          }
d|
v r|
                    d          D ]s}d|v rm|                                                                }t          |          dk    r4	 t          |d                   c cddd           S # t           $ r Y ow xY wt|	                    di           }|                                D ]B\  }}d|v r9t%          |t          t&          f          rt          |          c cddd           S C|dk    r|                    | d          }|j	        dk    r|
                                }	|	                    dg           D ]}t)          |                    dd          |           s$t)          |                    dd          |           r|                    dg           D ]j}|                    di           }|                    d          }|r;t%          |t          t&          f          rt          |          c c cddd           S k n|                    | d|            }|j	        dk    r|
                                }	|	                    d          p)|	                    d          p|	                    d          }|r7t%          |t          t&          f          rt          |          cddd           S |                    | d           }|j	        dk    r|
                                }	|	                    d!g           }|D ]}t)          |                    dd          |           rz|                    d          p)|                    d          p|                    d          }|r9t%          |t          t&          f          rt          |          c cddd           S ddd           n# 1 swxY w Y   n# t          $ r Y nw xY wdS )"z4Query a local server for the model's context length.r   Nr   r   r   rm  r  r   rH   r  r   r  r   r  r   r  r  r  r   r  r   r   r   r   r#  r]  ro  rp  z/v1/models/r   r   z
/v1/modelsr
  )r  ru   r   r   r   r  r   r  r  r  r  r  rm   rn   r   r   r   r   r   rL  r  )rg   r   r   r  r  r   r  r  r  r
  r  r  r   r  r#  r  rs  r~  r  r  models_lists                        r   _query_local_context_lengthr  P  s#   LLL #5))E %%J5!! %_
G$$G.xIII   E\\#w\77 B	,6h&&{{j#;#;#;65/{RR#s**99;;D "XXlB77F F**$*LL$6$6 - -D(D00(,

(:(:(<(<#&u::??%-/259~~(=(='B	, B	, B	, B	, B	, B	, B	, B	,( ,6 %- %- %-(,%- "&,!;!;J&0&6&6&8&8 . .
U+s22z%#u7V7V2#&u::--5B	, B	, B	, B	, B	, B	, B	, B	,B k))zzZ"?"?"?@@#s**99;;D!XXh33 " ",QUU5"-=-=uEE "IZ[\[`[`aegi[j[jlqIrIr "().@"(E(E 4 4&*hhx&<&<&)gg.>&?&?#& !4:cC<+H+H !4+.s88OOOOYB	, B	, B	, B	, B	, B	, B	, B	,Z "E" ::????@@D3&&yy{{hh//g488<L3M3MgQUQYQYZfQgQg $:cC<88 $s88mB	, B	, B	, B	, B	, B	, B	, B	,t ::77788D3&&yy{{"hhvr22$ , ,A(tR%@@ ,eeO44f>N8O8OfSTSXSXYeSfSf ,:cC<#@#@ ,#&s88OOEB	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	,F     4s   A* *A98A9=S B2R7E*R7S *
E74R76E77A#R7S 'DR79S B(R7.S ;C"R7S *R7+S 7R;;S >R;?S 
SSc                 .    |                      dd          S )zNormalize version separators for matching.

    Nous uses dashes: claude-opus-4-6, claude-sonnet-4-5
    OpenRouter uses dots: claude-opus-4.6, claude-sonnet-4.5
    Normalize both to dashes for comparison.
    r   -)r  )rg   s    r   _normalize_model_versionr    s     ==c"""r   c                 r   |r|                     d          rdS 	 |                    d          }|                    d          r
|dd         }| d}|dd}t          j        ||d	t                      
          }|j        dk    rdS |                                }|                    dg           D ]O}|                    d          | k    r4|                    d          }	t          |	t                    r
|	dk    r|	c S Pn2# t          $ r%}
t                              d|
           Y d}
~
nd}
~
ww xY wdS )zQuery Anthropic's /v1/models endpoint for context length.

    Only works with regular ANTHROPIC_API_KEY (sk-ant-api*).
    OAuth tokens (sk-ant-oat*) from Claude Code return 401.
    z
sk-ant-oatNr   r   r   z/v1/models?limit=1000z
2023-06-01)z	x-api-keyzanthropic-versionr   rn  r   r
  r]  r   r   z%Anthropic /v1/models query failed: %s)rl   r   r   rb  r  r   r  r  r   r   r   rd  re  )rg   r   r   r  r   r   r  r
  rs  r  rj  s              r   _query_anthropic_context_lengthr    sz     g((66 tAs##== 	9D,,, !-
 
 |C"E]E_E_```s""4yy{{&"%% 	 	AuuT{{e##ee.//c3'' C!GGJJJ		
  A A A<a@@@@@@@@A4s%   A-D 
A8D D 
D4D//D4i& )
zgpt-5.1-codex-maxzgpt-5.1-codex-minizgpt-5.3-codexr   zgpt-5.2-codexr   r   r   zgpt-5.2r   _CODEX_OAUTH_CONTEXT_FALLBACK_codex_oauth_context_cacheg        _codex_oauth_context_cache_timeaccess_tokenc                 0   t          j                     }t          r|t          z
  t          k     rt          S 	 t	          j        ddd|  idt                                }|j        dk    r"t          	                    d|j                   i S |
                                }n4# t          $ r'}t          	                    d|           i cY d	}~S d	}~ww xY wt          |t                    r|                    d
g           ng }i }|D ]}t          |t                    s|                    d          }|                    d          }	t          |t                    r2t          |	t                    r|	dk    r|	||                                <   |r|a|a|S )ak  Probe the ChatGPT Codex /models endpoint for per-slug context windows.

    Codex OAuth imposes its own context limits that differ from the direct
    OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The
    `context_window` field in each model entry is the authoritative source.

    Returns a ``{slug: context_window}`` dict. Empty on failure.
    zAhttps://chatgpt.com/backend-api/codex/models?client_version=1.0.0r   r   r   rn  r   zHCodex /models probe returned HTTP %s; falling back to hardcoded defaultszCodex /models probe failed: %sNr   slugr   r   )r`  r  r  _CODEX_OAUTH_CONTEXT_CACHE_TTLrb  r  r   r  rd  re  r  r   r   r  r   r   rn   )
r  nowr  r
  r  entriesr  r  r  r  s
             r   "_fetch_codex_oauth_context_lengthsr    s    )++C"*114RRR))|O$&>&>&>?+--	
 
 
 s""LLZ    Iyy{{   5s;;;						 )34(>(>Fdhhx$$$BGF ' '$%% 	xxhh'((dC   	'ZS%9%9 	'cAgg#&F4::<<  .%+"*-'Ms$   AB! B! !
C+CCCc                    t          |                                           }|sdS |ret          |          }||v r||         S |                                }|                                D ]!\  }}|                                |k    r|c S "|                                }t          t                                          d d          D ]\  }}||v r|c S dS )zResolve a Codex OAuth model's real context window.

    Prefers a live probe of chatgpt.com/backend-api/codex/models (when we
    have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``.
    Nc                 ,    t          | d                   S Nr   r   xs    r   <lambda>z5_resolve_codex_oauth_context_length.<locals>.<lambda>C  s    S1YY r   Tr#  reverse)ru   rn   r  ro   r   sortedr  )rg   r  
model_barelivemodel_lowerr  r  s          r   #_resolve_codex_oauth_context_lengthr  *  s    (..4466J t 1,??
## &&(( 	 	ID#zz||{**


 + ""$$K%++--3F3FPT    	c ;JJJ  4r   c                 8   |rt          | ||          }||dfS t                      }dt          dt          dt          t
                   fd}| |v r || ||                    }||dfS t          |                                           }|                                D ]\  }}	d	|v r|	                    d	d
          d
         n|}
|
                                |                                 k    s%t          |
                                          |k    r |||	          }||dfc S |                                 }|                                D ]\  }}	d	|v r|	                    d	d
          d
         n|}
|
                                |ft          |
                                          |ffD ]g\  }}|
                    |          rMt          |          t          |          k    s|t          |                   dv r |||	          }||dfc c S hdS )uM  Resolve Nous Portal model context length.

    Tries the live Nous inference endpoint first (authoritative), then falls
    back to OpenRouter metadata with suffix/version matching.

    Nous model IDs are bare after prefix-stripping (e.g. 'qwen3.6-plus',
    'claude-opus-4-6') while OpenRouter uses prefixed IDs (e.g.
    'qwen/qwen3.6-plus', 'anthropic/claude-opus-4.6').  Version
    normalization (dot↔dash) is applied to handle name drifts.

    Returns ``(context_length, source)`` where ``source`` is one of:
      - ``"portal"``    — live /v1/models response (authoritative)
      - ``"openrouter"`` — OpenRouter cache fallback (non-authoritative;
        callers must NOT persist this to the on-disk cache or a single
        portal blip will freeze the wrong value in forever)
      - ``""``           — could not resolve
    rm  Nportalor_idrU  r   c                     |                     d          }|d S |dk    r-t          |           rt                              d||            d S |S )Nr      z{Rejecting OpenRouter metadata context=%s for %r (Kimi-family underreport, Nous path); falling through to hardcoded defaults)r  r  rd  r  )r  rU  r  s      r   	_safe_ctxz/_resolve_nous_context_length.<locals>._safe_ctxl  sa    ii());4%<<5e<<<KK^U  
 4
r   r[   r   rk   z-:.)Nr   )r  rk  r   r  r   r   r  ro   r   rm   rl   r   )rg   r   r   
portal_ctxmetadatar  r  r   r  rU  barer  r  querys                 r   _resolve_nous_context_lengthr	  K  sf   4  (5eXwWWW
!x''#%%H T hsm     ix//?$$)%006688J (( ) )u),u{{3""1%%5::<<5;;==((,DT,J,J,P,P,R,RV`,`,`)E5))CL((((++--K (( - -u),u{{3""1%%5"&**,,!<?WX\?]?]?c?c?e?egq>r s 	- 	-Iu##E** -I#e**,,	#e**0E0N0Niu--?,,,,,,	- 8r   config_context_lengthr   custom_providersc                 	   |t          |t                    r|dk    r|S |r.|r,| r*	 ddlm}  || ||          }|r|S n# t          $ r Y nw xY wt          |           } |r|dk    rt          | |          }||dk    r6|dk    r0t                              d| ||d	           t          | |           nw|d
k    r?t          |           r0t                              d| ||d	           t          | |           n2t          |          dk    rt                              d| |           n|S |dk    s4|rTt          |                              d          r2t          |d          r"	 ddlm}	  |	|           S # t$          $ r Y nw xY w|dk    s|r;t          |d          r+t'          | |pd|          }
|
|rt)          | ||
           |
S t+          |          rt-          |          st'          | ||          }||S t-          |          st/          | ||          }
|
t)          | ||
           |
S t1          |          r3t3          | ||          }|r|dk    r|dk    rt)          | ||           |S t                              d| |t4          d	           t4          S |dk    s|r*t          |          dk    rt7          | |pd|          }
|
r|
S |}|r|dv r|rt          |          }|r|}|dv r)	 ddlm}  || |          }
|
r|
S n# t          $ r Y nw xY w|dk    r6t=          | |pd|pd          \  }
}|
r|r|dk    rt)          | ||
           |
S |dk    r*t?          | |pd           }|r|rt)          | ||           |S |d!k    r|rt'          | ||          }
|
|
S |r't/          | ||          }
|
t)          | ||
           |
S |rdd"l m!}  |||           }
|
r|
S |sgtE                      }| |v rU||          #                    d#t4                    }|d
k    r,t          |           rt                              d$||            n|S | $                                }tK          tL          '                                d% d&'          D ]\  }}||v r|c S |rBt1          |          r3t3          | ||          }|r|dk    r|dk    rt)          | ||           |S t4          S )(a  Get the context length for a model.

    Resolution order:
    0. Explicit config override (model.context_length or custom_providers per-model)
    1. Persistent cache (previously discovered via probing).  Nous URLs
       bypass the cache here so step 5b can always reconcile against
       the authoritative portal /v1/models response.
    1b. AWS Bedrock static table (must precede custom-endpoint probe)
    2. Active endpoint metadata (/models for explicit custom endpoints)
    3. Local server query (for local endpoints)
    4. Anthropic /v1/models API (API-key users only, not OAuth)
    5. Provider-aware lookups (before generic OpenRouter cache):
       a. Copilot live /models API
       b. Nous: live /v1/models probe first (authoritative), then OR
          cache fallback with suffix/version normalisation.  Only
          portal-derived values are persisted to disk.
       c. Codex OAuth /models probe
       d. GMI /models endpoint
       e. Ollama native /api/show probe (any base_url, provider-agnostic)
       f. models.dev registry lookup (with :cloud/-cloud suffix fallback)
    6. OpenRouter live API metadata (Kimi-family 32k guard)
    7. Hardcoded defaults (broad family patterns, longest-key-first)
    8. Local server query (last resort)
    9. Default fallback (256K)Nr   )"get_custom_provider_context_length)rg   r   r  lmstudiorb   r   zaDropping stale Codex cache entry %s@%s -> %s (pre-fix value); re-resolving via live /models prober  r  ziDropping stale Kimi cache entry %s@%s -> %s (OpenRouter underreport); re-resolving via hardcoded defaultsr;   z@Bypassing persistent cache for %s@%s (Nous portal authoritative)bedrockzbedrock-runtime.zamazonaws.com)get_bedrock_context_lengthrF   r   zhttps://api.novita.ai/openai/v1rm  u   Could not detect context length for model %r at %s — defaulting to %s tokens (probe-down). Set model.context_length in config.yaml to override.rY   r   zhttps://api.anthropic.com>   rB   r[   >   rM   r^   rc   )get_copilot_model_contextr   )r   r   r   )r  r2   )lookup_models_dev_contextr   zpRejecting OpenRouter metadata context=%s for %r (Kimi-family underreport); falling through to hardcoded defaultsc                 ,    t          | d                   S r  r  r  s    r   r  z*get_model_context_length.<locals>.<lambda>  s    s1Q4yy r   Tr  )(r   r   hermes_cli.configr  r   ru   r  rd  r  r  r  r   re  r   rl   r
   agent.bedrock_adapterr  ImportErrorr  r  r   r   r  r   r  DEFAULT_FALLBACK_CONTEXTr  hermes_cli.modelsr  r	  r  agent.models_devr  rk  r  ro   r  DEFAULT_CONTEXT_LENGTHSr   )rg   r   r   r
  r   r  r  cp_ctxry  r  r  r   	local_ctxeffective_providerinferredr  source	codex_ctxr  r  or_ctxr  default_modelr  s                           r   get_model_context_lengthr#    si   B (Z8Ms-S-S(XmpqXqXq$$  H  
	LLLLLL77!!1  F
   	 	 	D	 #5))E  'H
***5(;; >))f.?.?:8]]  
 2%BBBB5%>u%E%E:8]]  
 2%BBBB *(33v==V8     9 h''223EFF  "(O<< 
	HHHHHH--e444 	 	 	D	 8-B8_-]-].uh6cBcmtuuu? :#E8S999J 8$$ ,-H-R-R ,9%SZ[[[%!!*844 	, )'JJJC#E8S999
 ** %7xQXYYY	 %Q:--+E8YGGG$$KK. x$<!@!@	   ,+ ; &x004GGG-eX5\A\^eff 	J " .!37O!O!O 	./99H .%-" III	CCCCCC++E7CCCC 
 	 	 	D	 V##2HNGMr
 
 
V  		  :Fh..#E8S999J^++ 8GMWYZZZ	 	 @#E8Y???U""x" /uhPPP?J  $UHgFFF?x555J >>>>>>''(:EBB 	J  '))He_(()9;STTF#<U#C#CWE     ++--K!'%%''-@-@$" " "  v K''MMM (  %h// /xQQQ	 	Q:%%#E8Y??? $#s5   A   
AA'E8 8
FF6L 
LLr  c                 4    | sdS t          |           dz   dz  S )a  Rough token estimate (~4 chars/token) for pre-flight checks.

    Uses ceiling division so short texts (1-3 chars) never estimate as
    0 tokens, which would cause the compressor and pre-flight checks to
    systematically undercount when many short tool results are present.
    r      r   r  )r  s    r   estimate_tokens_roughr&    s&      qIIMar   messagesc                 x    d}d}d}| D ]'}|t          |          z  }|t          ||          z  }(|dz   dz  |z   S )ue  Rough token estimate for a message list (pre-flight only).

    Image parts (base64 PNG/JPEG) are counted as a flat ~1500 tokens per
    image — the Anthropic pricing model — instead of counting raw base64
    character length. Without this, a single ~1MB screenshot would be
    estimated at ~250K tokens and trigger premature context compression.
    i  r   r%  r   )_estimate_message_chars_count_image_tokens)r'  _IMAGE_TOKEN_COSTtotal_charsimage_tokensmsgs        r   estimate_messages_tokens_roughr/    sd     KL D D.s333+C1BCCC1_"l22r   r.  cost_per_imagec                 
   d}t          | t                    r|                     d          nd}t          |t                    r9|D ]6}t          |t                    s|                    d          }|dv r|dz  }7t          | t                    r|                     d          nd}t          |t                    r8|D ]5}t          |t                    r|                    d          dk    r|dz  }6t          |t                    ru|                    d	          r`|                    d          }t          |t                    r6|D ]3}t          |t                    r|                    d          d
v r|dz  }4||z  S )zECount image-like content parts in a message; return their token cost.r   contentNtype>   image	image_urlinput_imagerk   _anthropic_content_blocksr4  _multimodal>   r4  r5  )r   r  r  r  )r.  r0  countr2  partptypestashedinners           r   r*  r*    s   E$.sD$9$9Ccggi   tG'4    	 	DdD)) HHV$$E===
6@d6K6KUcgg1222QUG'4    	 	D$%% $((6*:*:g*E*E
'4   W[[%?%? I&&eT"" 	  dD)) dhhv.>.>BX.X.XQJE>!!r   c                    t          | t                    st          t          |                     S i }|                                 D ]\  }}|dk    r|dk    rt          |t
                    rg }|D ]}t          |t                    rY|                    d          dv r,|                    |                    d          dd           Z|                    |           p|                    |           |||<   t          |t                    r/|                    d          r|                    dd	          ||<   |||<   |||<   
t          t          |                    S )
zChar count for token estimation, excluding base64 image data.

    Base64 images are counted via `_count_image_tokens` instead; including
    their raw chars here would massively overestimate token usage.
    r7  r2  r3  >   r4  r5  r6  z
[stripped])r3  r4  r8  text_summaryr   )r   r  r   r   r   r  r  rw  )r.  shadowkvcleanedr:  s         r   r)  r)    sv    c4   3s88}}F		  1+++	>>!T""  - -D!$-- -88F++/TTT#NNDHHV4D4D|+\+\]]]]#NN40000t,,,,#q		At$$ })=)= EE."55q		q		F1IIs6{{r   )system_prompttoolsrD  rE  c                    d}|r|t          |          dz   dz  z  }| r|t          |           z  }|r%|t          t          |                    dz   dz  z  }|S )u  Rough token estimate for a full chat-completions request.

    Includes the major payload buckets Hermes sends to providers:
    system prompt, conversation messages, and tool schemas.  With 50+
    tools enabled, schemas alone can add 20-30K tokens — a significant
    blind spot when only counting messages. Image content is counted
    at a flat per-image cost (see estimate_messages_tokens_rough).
    r   r%  r   )r   r/  r   )r'  rD  rE  totals       r   estimate_request_tokens_roughrH    su     E /#m$$q(Q.. :/999 ,#c%jj//A%!++Lr   )r   )r  r  )F)r   F)r   r   )r   r   Nr   N)s__doc__r   rf  r   r  r`  pathlibr   typingr   r   r   r   r   urllib.parser	   rb  r  utilsr
   r   r  r   	getLogger__name__rd  r   r   r   	frozensetrf   __annotations__compile
IGNORECASErp   IPv4Networkr   ru   rv   rw   rL  rx   ry   ra  rz   r{   rv  r  r  MINIMUM_CONTEXT_LENGTHr  r   r   r*  r/  r   r   r   r   r   r   r   	providersr   _list_providers_ppget_hostname_hostr   r   r   r   r   r  r  r   r  tupler(  r,  r0  rR  rY  rk  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r  r#  r&  r/  r*  r)  rH  r"  r   r   <module>r\     s          				 				        3 3 3 3 3 3 3 3 3 3 3 3 3 3 ! ! ! ! ! !   : : : : : : : : 2 2 2 2 2 2		8	$	$$*    * &/Y 0 0 0 & & IcN   4 !bjLM   )9(99 # #    & 46 tCc3h/0 5 5 5$% E % % %46 S$sCx.01 6 6 6%& U & & & GI S$sDcN/B*C%C D I I I8: #T#u*%5 : : :     /q1 
   n
 wn wn wn n wn n fn& w'n( F)n* F+n, w-n: 6;n< F=n> V?n@ wAnB VCnF gGn nJ vKnL fMnN 6OnP vQnR TSnb ycnd enf Ygnh inj knn Vont Gunv wnx 6ynz F{n@ vAnD 
6En n nR fSnT WUnV TWnX 7YnZ [n\ ]n^ f_n` fanb fcnd Fenh Finp 6qnt unx vyn| }n@ fAnB FCn nD "'""#)$ &[n n n z! T# T$ T T T T$   < 0# 0# 0 0 0 00 03 0S#X 0 0 0 0<c <d < < < <H# H$ H H H H
&$h&$8&$ &$ 	&$
 &$ }&$ '&$ M&$ i&$ y&$ G&$ 9&$ i&$ "9&$ l&$  \!&$" (#&$$ '-"&! &/#  ("  2 K&$ &$ &$ $sCx. & & &T	;;;;;;   / /  "" 	/U"222&)hU#/  	 	 	D	s x}    $:# :$ : : : :1 1 1 1 1 1h: :s :S :(3- : : : :z0c 0 0 0 0 #  S ZbcfZg    	S#X 	eCHo 	(SV- 	 	 	 	=T#s(^ = = = = ==DcN =x} = = = =d38n c3h    B,d3S#X#67 ,3 ,tTWY\T\~ ,bf , , , ,!+ !+ !+c4S>>Q9R !+ !+ !+ !+L M MMM M 
#tCH~
	M M M Mf    c]	   .; ; ; ; ;T#s(^    Cs Cc C3 C4 C C C C*S C HSM    WS WC WD W W W W      c hsm    8( ( ( ( ( (VC s t    &4 4 4s 4S 4(SV- 4 4 4 4n: :# : :s :HUXM : : : :z	;S 	;T 	; 	; 	; 	;[ [s [c [C [QYZ]Q^ [ [ [ [|#C #C # # # #3 #  PXY\P]    P !! #1 1 tCH~   & .0 DcN / / /),  , , ,!% 0S 0T#s(^ 0 0 0 0h %' !c]   F G GGG G 8C=#	G G G GX (,$(^$ ^$^$^$ ^$ :	^$
 ^$ Tk^$ 	^$ ^$ ^$ ^$B		  	  	  	  	  	 3T$sCx.-A 3c 3 3 3 3""T#s(^ "S "S " " " "4c3h C    H ,0	  4S>"  Dc3h()	
 	     s   !7J J! J!