model_configs: - id: nvidia-glm-cfg provider: nvidia model_name: z-ai/glm-5.1 api_base: https://integrate.api.nvidia.com/v1 rpm_limit: 40 tpm_limit: 100000 max_concurrency: 8 params: temperature: 1 top_p: 1 max_tokens: 16384 stream: false chat_template_kwargs: enable_thinking: false clear_thinking: false - id: llama-local-cfg provider: openai model_name: llama-local api_base: http://192.168.2.101:8848/v1 api_endpoints: - url: http://192.168.2.101:8848/v1 enabled: true - url: http://192.168.1.51:8848/v1 enabled: true - url: http://192.168.1.61:8848/v1 enabled: true rpm_limit: 999999 tpm_limit: 999999999 max_concurrency: 999 params: temperature: 1.0 max_tokens: 6144 chat_template_kwargs: enable_thinking: false keys: - config_id: nvidia-glm-cfg enabled: true keys: - key: nvapi-key-1 owner: name