42 lines
952 B
YAML
Executable File
42 lines
952 B
YAML
Executable File
model_configs:
|
|
- id: nvidia-glm-cfg
|
|
provider: nvidia
|
|
model_name: z-ai/glm-5.1
|
|
api_base: https://integrate.api.nvidia.com/v1
|
|
rpm_limit: 40
|
|
tpm_limit: 100000
|
|
max_concurrency: 8
|
|
params:
|
|
temperature: 1
|
|
top_p: 1
|
|
max_tokens: 16384
|
|
stream: false
|
|
chat_template_kwargs:
|
|
enable_thinking: false
|
|
clear_thinking: false
|
|
- id: llama-local-cfg
|
|
provider: openai
|
|
model_name: llama-local
|
|
api_base: http://192.168.2.101:8848/v1
|
|
api_endpoints:
|
|
- url: http://192.168.2.101:8848/v1
|
|
enabled: true
|
|
- url: http://192.168.1.51:8848/v1
|
|
enabled: true
|
|
- url: http://192.168.1.61:8848/v1
|
|
enabled: true
|
|
rpm_limit: 999999
|
|
tpm_limit: 999999999
|
|
max_concurrency: 999
|
|
params:
|
|
temperature: 1.0
|
|
max_tokens: 6144
|
|
chat_template_kwargs:
|
|
enable_thinking: false
|
|
keys:
|
|
- config_id: nvidia-glm-cfg
|
|
enabled: true
|
|
keys:
|
|
- key: nvapi-key-1
|
|
owner: name
|