model_configs:
- id: nvidia-glm-cfg
  provider: nvidia
  model_name: z-ai/glm-5.1
  api_base: https://integrate.api.nvidia.com/v1
  rpm_limit: 40
  tpm_limit: 100000
  max_concurrency: 8
  params:
    temperature: 1
    top_p: 1
    max_tokens: 16384
    stream: false
    chat_template_kwargs:
      enable_thinking: false
      clear_thinking: false
- id: llama-local-cfg
  provider: openai
  model_name: llama-local
  api_base: http://192.168.2.101:8848/v1
  api_endpoints:
  - url: http://192.168.2.101:8848/v1
    enabled: true
  - url: http://192.168.1.51:8848/v1
    enabled: true
  - url: http://192.168.1.61:8848/v1
    enabled: true
  rpm_limit: 999999
  tpm_limit: 999999999
  max_concurrency: 999
  params:
    temperature: 1.0
    max_tokens: 6144
    chat_template_kwargs:
      enable_thinking: false
keys:
- config_id: nvidia-glm-cfg
  enabled: true
  keys:
  - key: nvapi-key-1
    owner: name