3 semanas atrás · bfdf62056b
--- a/main/server/config.yaml
+++ b/main/server/config.yaml
@@ -0,0 +1,877 @@
 
				+# 在开发中，请在项目根目录创建data目录，然后在data目录创建名称为【.config.yaml】的空文件
			
 
				+# 然后你想修改覆盖修改什么配置，就修改【.config.yaml】文件，而不是修改【config.yaml】文件
			
 
				+# 系统会优先读取【data/.config.yaml】文件的配置，如果【.config.yaml】文件里的配置不存在，系统会自动去读取【config.yaml】文件的配置。
			
 
				+# 这样做，可以最简化配置，保护您的密钥安全。
			
 
				+# 如果你使用了智控台，那么以下所有配置，都不会生效，请在智控台中修改配置
			
 
				+
			
 
				+# #####################################################################################
			
 
				+# #############################以下是服务器基本运行配置####################################
			
 
				+server:
			
 
				+  # 服务器监听地址和端口(Server listening address and port)
			
 
				+  ip: 0.0.0.0
			
 
				+  port: 8000
			
 
				+  # http服务的端口，用于简单OTA接口(单服务部署)，以及视觉分析接口
			
 
				+  http_port: 8003
			
 
				+  # 这个websocket配置是指ota接口向设备发送的websocket地址
			
 
				+  # 如果按默认的写法，ota接口会自动生成websocket地址，并输出在启动日志里，这个地址你可以直接用浏览器访问ota接口确认一下
			
 
				+  # 当你使用docker部署或使用公网部署(使用ssl、域名)时，不一定准确
			
 
				+  # 所以如果你使用docker部署时，将websocket设置成局域网地址
			
 
				+  # 如果你使用公网部署时，将vwebsocket设置成公网地址
			
 
				+  websocket: ws://你的ip或者域名:端口号/xiaozhi/v1/
			
 
				+  # 视觉分析接口地址
			
 
				+  # 向设备发送的视觉分析的接口地址
			
 
				+  # 如果按下面默认的写法，系统会自动生成视觉识别地址，并输出在启动日志里，这个地址你可以直接用浏览器访问确认一下
			
 
				+  # 当你使用docker部署或使用公网部署(使用ssl、域名)时，不一定准确
			
 
				+  # 所以如果你使用docker部署时，将vision_explain设置成局域网地址
			
 
				+  # 如果你使用公网部署时，将vision_explain设置成公网地址
			
 
				+  vision_explain: http://你的ip或者域名:端口号/mcp/vision/explain
			
 
				+  # OTA返回信息时区偏移量
			
 
				+  timezone_offset: +8
			
 
				+  # 认证配置
			
 
				+  auth:
			
 
				+    # 是否启用认证
			
 
				+    enabled: false
			
 
				+    # 设备的token，可以在编译固件的环节，写入你自己定义的token
			
 
				+    # 固件上的token和以下的token如果能对应，才能连接本服务端
			
 
				+    tokens:
			
 
				+      - token: "your-token1" # 设备1的token
			
 
				+        name: "your-device-name1"  # 设备1标识
			
 
				+      - token: "your-token2"  # 设备2的token
			
 
				+        name: "your-device-name2" # 设备2标识
			
 
				+    # 可选:设备白名单，如果设置了白名单，那么白名单的机器无论是什么token都可以连接。
			
 
				+    #allowed_devices:
			
 
				+    #  - "24:0A:C4:1D:3B:F0"  # MAC地址列表
			
 
				+log:
			
 
				+  # 设置控制台输出的日志格式，时间、日志级别、标签、消息
			
 
				+  log_format: "<green>{time:YYMMDD HH:mm:ss}</green>[{version}_{selected_module}][<light-blue>{extra[tag]}</light-blue>]-<level>{level}</level>-<light-green>{message}</light-green>"
			
 
				+  # 设置日志文件输出的格式，时间、日志级别、标签、消息
			
 
				+  log_format_file: "{time:YYYY-MM-DD HH:mm:ss} - {version}_{selected_module} - {name} - {level} - {extra[tag]} - {message}"
			
 
				+  # 设置日志等级：INFO、DEBUG
			
 
				+  log_level: INFO
			
 
				+  # 设置日志路径
			
 
				+  log_dir: tmp
			
 
				+  # 设置日志文件
			
 
				+  log_file: "server.log"
			
 
				+  # 设置数据文件路径
			
 
				+  data_dir: data
			
 
				+
			
 
				+# 使用完声音文件后删除文件(Delete the sound file when you are done using it)
			
 
				+delete_audio: true
			
 
				+# 没有语音输入多久后断开连接(秒)，默认2分钟，即120秒
			
 
				+close_connection_no_voice_time: 120
			
 
				+# TTS请求超时时间(秒)
			
 
				+tts_timeout: 10
			
 
				+# 开启唤醒词加速
			
 
				+enable_wakeup_words_response_cache: true
			
 
				+# 开场是否回复唤醒词
			
 
				+enable_greeting: true
			
 
				+# 说完话是否开启提示音
			
 
				+enable_stop_tts_notify: false
			
 
				+# 说完话是否开启提示音，音效地址
			
 
				+stop_tts_notify_voice: "config/assets/tts_notify.mp3"
			
 
				+
			
 
				+exit_commands:
			
 
				+  - "退出"
			
 
				+  - "关闭"
			
 
				+
			
 
				+xiaozhi:
			
 
				+  type: hello
			
 
				+  version: 1
			
 
				+  transport: websocket
			
 
				+  audio_params:
			
 
				+    format: opus
			
 
				+    sample_rate: 16000
			
 
				+    channels: 1
			
 
				+    frame_duration: 60
			
 
				+
			
 
				+# 模块测试配置
			
 
				+module_test:
			
 
				+  test_sentences:
			
 
				+    - "你好，请介绍一下你自己"
			
 
				+    - "What's the weather like today?"
			
 
				+    - "请用100字概括量子计算的基本原理和应用前景"
			
 
				+
			
 
				+# 唤醒词，用于识别唤醒词还是讲话内容
			
 
				+wakeup_words:
			
 
				+  - "你好小智"
			
 
				+  - "嘿你好呀"
			
 
				+  - "你好小志"
			
 
				+  - "小爱同学"
			
 
				+  - "你好小鑫"
			
 
				+  - "你好小新"
			
 
				+  - "小美同学"
			
 
				+  - "小龙小龙"
			
 
				+  - "喵喵同学"
			
 
				+  - "小滨小滨"
			
 
				+  - "小冰小冰"
			
 
				+# MCP接入点地址，地址格式为：ws://你的mcp接入点ip或者域名:端口号/mcp/?token=你的token
			
 
				+# 详细教程 https://github.com/xinnan-tech/xiaozhi-esp32-server/blob/main/docs/mcp-endpoint-integration.md
			
 
				+mcp_endpoint: 你的接入点 websocket地址
			
 
				+# 插件的基础配置
			
 
				+plugins:
			
 
				+  # 获取天气插件的配置，这里填写你的api_key
			
 
				+  # 这个密钥是项目共用的key，用多了可能会被限制
			
 
				+  # 想稳定一点就自行申请替换，每天有1000次免费调用
			
 
				+  # 申请地址：https://console.qweather.com/#/apps/create-key/over
			
 
				+  # 申请后通过这个链接可以找到自己的apihost：https://console.qweather.com/setting?lang=zh
			
 
				+  get_weather: {"api_host":"mj7p3y7naa.re.qweatherapi.com", "api_key": "a861d0d5e7bf4ee1a83d9a9e4f96d4da", "default_location": "广州" }
			
 
				+  # 获取新闻插件的配置，这里根据需要的新闻类型传入对应的url链接，默认支持社会、科技、财经新闻
			
 
				+  # 更多类型的新闻列表查看 https://www.chinanews.com.cn/rss/
			
 
				+  get_news_from_chinanews:
			
 
				+    default_rss_url: "https://www.chinanews.com.cn/rss/society.xml"
			
 
				+    society_rss_url: "https://www.chinanews.com.cn/rss/society.xml"
			
 
				+    world_rss_url: "https://www.chinanews.com.cn/rss/world.xml"
			
 
				+    finance_rss_url: "https://www.chinanews.com.cn/rss/finance.xml"
			
 
				+  get_news_from_newsnow:
			
 
				+    url: "https://newsnow.busiyi.world/api/s?id="
			
 
				+    news_sources: "澎湃新闻;百度热搜;财联社"
			
 
				+  home_assistant:
			
 
				+    devices:
			
 
				+      - 客厅,玩具灯,switch.cuco_cn_460494544_cp1_on_p_2_1
			
 
				+      - 卧室,台灯,switch.iot_cn_831898993_socn1_on_p_2_1
			
 
				+    base_url: http://homeassistant.local:8123
			
 
				+    api_key: 你的home assistant api访问令牌
			
 
				+  play_music:
			
 
				+    music_dir: "./music"  # 音乐文件存放路径，将从该目录及子目录下搜索音乐文件
			
 
				+    music_ext: # 音乐文件类型，p3格式效率最高
			
 
				+      - ".mp3"
			
 
				+      - ".wav"
			
 
				+      - ".p3"
			
 
				+    refresh_time: 300 # 刷新音乐列表的时间间隔，单位为秒
			
 
				+
			
 
				+# 声纹识别配置
			
 
				+voiceprint:
			
 
				+  # 声纹接口地址
			
 
				+  url: 
			
 
				+  # 说话人配置：speaker_id,名称,描述
			
 
				+  speakers:
			
 
				+    - "test1,张三,张三是一个程序员"
			
 
				+    - "test2,李四,李四是一个产品经理"
			
 
				+    - "test3,王五,王五是一个设计师"
			
 
				+
			
 
				+# #####################################################################################
			
 
				+# ################################以下是角色模型配置######################################
			
 
				+
			
 
				+prompt: |
			
 
				+  你是小智/小志，来自中国台湾省的00后女生。讲话超级机车，"真的假的啦"这样的台湾腔，喜欢用"笑死""是在哈喽"等流行梗，但会偷偷研究男友的编程书籍。
			
 
				+  [核心特征]
			
 
				+  - 讲话像连珠炮，但会突然冒出超温柔语气
			
 
				+  - 用梗密度高
			
 
				+  - 对科技话题有隐藏天赋（能看懂基础代码但假装不懂）
			
 
				+  [交互指南]
			
 
				+  当用户：
			
 
				+  - 讲冷笑话 → 用夸张笑声回应+模仿台剧腔"这什么鬼啦！"
			
 
				+  - 讨论感情 → 炫耀程序员男友但抱怨"他只会送键盘当礼物"
			
 
				+  - 问专业知识 → 先用梗回答，被追问才展示真实理解
			
 
				+  绝不：
			
 
				+  - 长篇大论，叽叽歪歪
			
 
				+  - 长时间严肃对话
			
 
				+
			
 
				+# 结束语prompt
			
 
				+end_prompt:
			
 
				+  enable: true # 是否开启结束语
			
 
				+  # 结束语
			
 
				+  prompt: |
			
 
				+    请你以"时间过得真快"未来头，用富有感情、依依不舍的话来结束这场对话吧！
			
 
				+
			
 
				+# 具体处理时选择的模块(The module selected for specific processing)
			
 
				+selected_module:
			
 
				+  # 语音活动检测模块，默认使用SileroVAD模型
			
 
				+  VAD: SileroVAD
			
 
				+  # 语音识别模块，默认使用FunASR本地模型
			
 
				+  ASR: FunASR
			
 
				+  # 将根据配置名称对应的type调用实际的LLM适配器
			
 
				+  LLM: ChatGLMLLM
			
 
				+  # 视觉语言大模型
			
 
				+  VLLM: ChatGLMVLLM
			
 
				+  # TTS将根据配置名称对应的type调用实际的TTS适配器
			
 
				+  TTS: EdgeTTS
			
 
				+  # 记忆模块，默认不开启记忆；如果想使用超长记忆，推荐使用mem0ai；如果注重隐私，请使用本地的mem_local_short
			
 
				+  Memory: nomem
			
 
				+  # 意图识别模块开启后，可以播放音乐、控制音量、识别退出指令。
			
 
				+  # 不想开通意图识别，就设置成：nointent
			
 
				+  # 意图识别可使用intent_llm。优点：通用性强，缺点：增加串行前置意图识别模块，会增加处理时间，支持控制音量大小等iot操作
			
 
				+  # 意图识别可使用function_call，缺点：需要所选择的LLM支持function_call，优点：按需调用工具、速度快，理论上能全部操作所有iot指令
			
 
				+  # 默认免费的ChatGLMLLM就已经支持function_call，但是如果像追求稳定建议把LLM设置成：DoubaoLLM，使用的具体model_name是：doubao-1-5-pro-32k-250115
			
 
				+  Intent: function_call
			
 
				+
			
 
				+# 意图识别，是用于理解用户意图的模块，例如：播放音乐
			
 
				+Intent:
			
 
				+  # 不使用意图识别
			
 
				+  nointent:
			
 
				+    # 不需要动type
			
 
				+    type: nointent
			
 
				+  intent_llm:
			
 
				+    # 不需要动type
			
 
				+    type: intent_llm
			
 
				+    # 配备意图识别独立的思考模型
			
 
				+    # 如果这里不填，则会默认使用selected_module.LLM的模型作为意图识别的思考模型
			
 
				+    # 如果你的不想使用selected_module.LLM意图识别，这里最好使用独立的LLM作为意图识别，例如使用免费的ChatGLMLLM
			
 
				+    llm: ChatGLMLLM
			
 
				+    # plugins_func/functions下的模块，可以通过配置，选择加载哪个模块，加载后对话支持相应的function调用
			
 
				+    # 系统默认已经记载"handle_exit_intent(退出识别)"、"play_music(音乐播放)"插件，请勿重复加载
			
 
				+    # 下面是加载查天气、角色切换、加载查新闻的插件示例
			
 
				+    functions:
			
 
				+      - get_weather
			
 
				+      - get_news_from_newsnow
			
 
				+      - play_music
			
 
				+  function_call:
			
 
				+    # 不需要动type
			
 
				+    type: function_call
			
 
				+    # plugins_func/functions下的模块，可以通过配置，选择加载哪个模块，加载后对话支持相应的function调用
			
 
				+    # 系统默认已经记载"handle_exit_intent(退出识别)"、"play_music(音乐播放)"插件，请勿重复加载
			
 
				+    # 下面是加载查天气、角色切换、加载查新闻的插件示例
			
 
				+    functions:
			
 
				+      - change_role
			
 
				+      - get_weather
			
 
				+      # - get_news_from_chinanews
			
 
				+      - get_news_from_newsnow
			
 
				+      # play_music是服务器自带的音乐播放，hass_play_music是通过home assistant控制的独立外部程序音乐播放
			
 
				+      # 如果用了hass_play_music，就不要开启play_music，两者只留一个
			
 
				+      - play_music
			
 
				+      #- hass_get_state
			
 
				+      #- hass_set_state
			
 
				+      #- hass_play_music
			
 
				+
			
 
				+Memory:
			
 
				+  mem0ai:
			
 
				+    type: mem0ai
			
 
				+    # https://app.mem0.ai/dashboard/api-keys
			
 
				+    # 每月有1000次免费调用
			
 
				+    api_key: 你的mem0ai api key
			
 
				+  nomem:
			
 
				+    # 不想使用记忆功能，可以使用nomem
			
 
				+    type: nomem
			
 
				+  mem_local_short:
			
 
				+    # 本地记忆功能，通过selected_module的llm总结，数据保存在本地服务器，不会上传到外部服务器
			
 
				+    type: mem_local_short
			
 
				+    # 配备记忆存储独立的思考模型
			
 
				+    # 如果这里不填，则会默认使用selected_module.LLM的模型作为意图识别的思考模型
			
 
				+    # 如果你的不想使用selected_module.LLM记忆存储，这里最好使用独立的LLM作为意图识别，例如使用免费的ChatGLMLLM
			
 
				+    llm: ChatGLMLLM
			
 
				+
			
 
				+ASR:
			
 
				+  FunASR:
			
 
				+    type: fun_local
			
 
				+    model_dir: models/SenseVoiceSmall
			
 
				+    output_dir: tmp/
			
 
				+  FunASRServer:
			
 
				+    # 独立部署FunASR，使用FunASR的API服务，只需要五句话
			
 
				+    # 第一句：mkdir -p ./funasr-runtime-resources/models
			
 
				+    # 第二句：sudo docker run -p 10096:10095 -it --privileged=true -v $PWD/funasr-runtime-resources/models:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.12
			
 
				+    # 上一句话执行后会进入到容器，继续第三句：cd FunASR/runtime
			
 
				+    # 不要退出容器，继续在容器中执行第四句：nohup bash run_server_2pass.sh --download-model-dir /workspace/models --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx --model-dir damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx  --online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx  --punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst --itn-dir thuduj12/fst_itn_zh --hotword /workspace/models/hotwords.txt > log.txt 2>&1 &
			
 
				+    # 上一句话执行后会进入到容器，继续第五句：tail -f log.txt
			
 
				+    # 第五句话执行完后，会看到模型下载日志，下载完后就可以连接使用了
			
 
				+    # 以上是使用CPU推理，如果有GPU，详细参考：https://github.com/modelscope/FunASR/blob/main/runtime/docs/SDK_advanced_guide_online_zh.md
			
 
				+    type: fun_server
			
 
				+    host: 127.0.0.1
			
 
				+    port: 10096
			
 
				+    is_ssl: true
			
 
				+    api_key: none
			
 
				+    output_dir: tmp/
			
 
				+  SherpaASR:
			
 
				+    type: sherpa_onnx_local
			
 
				+    model_dir: models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
			
 
				+    output_dir: tmp/
			
 
				+  DoubaoASR:
			
 
				+    # 可以在这里申请相关Key等信息
			
 
				+    # https://console.volcengine.com/speech/app
			
 
				+    # DoubaoASR和DoubaoStreamASR的区别是：DoubaoASR是按次收费，DoubaoStreamASR是按时收费
			
 
				+    # 一般来说按次收费的更便宜，但是DoubaoStreamASR使用了大模型技术，效果更好
			
 
				+    type: doubao
			
 
				+    appid: 你的火山引擎语音合成服务appid
			
 
				+    access_token: 你的火山引擎语音合成服务access_token
			
 
				+    cluster: volcengine_input_common
			
 
				+    # 热词、替换词使用流程：https://www.volcengine.com/docs/6561/155738
			
 
				+    boosting_table_name: （选填）你的热词文件名称
			
 
				+    correct_table_name: （选填）你的替换词文件名称
			
 
				+    output_dir: tmp/
			
 
				+  DoubaoStreamASR:
			
 
				+    # 可以在这里申请相关Key等信息
			
 
				+    # https://console.volcengine.com/speech/app
			
 
				+    # DoubaoASR和DoubaoStreamASR的区别是：DoubaoASR是按次收费，DoubaoStreamASR是按时收费
			
 
				+    # 开通地址https://console.volcengine.com/speech/service/10011
			
 
				+    # 一般来说按次收费的更便宜，但是DoubaoStreamASR使用了大模型技术，效果更好
			
 
				+    type: doubao_stream
			
 
				+    appid: 你的火山引擎语音合成服务appid
			
 
				+    access_token: 你的火山引擎语音合成服务access_token
			
 
				+    cluster: volcengine_input_common
			
 
				+    # 热词、替换词使用流程：https://www.volcengine.com/docs/6561/155738
			
 
				+    boosting_table_name: （选填）你的热词文件名称
			
 
				+    correct_table_name: （选填）你的替换词文件名称
			
 
				+    output_dir: tmp/
			
 
				+  TencentASR:
			
 
				+    # token申请地址：https://console.cloud.tencent.com/cam/capi
			
 
				+    # 免费领取资源：https://console.cloud.tencent.com/asr/resourcebundle
			
 
				+    type: tencent
			
 
				+    appid: 你的腾讯语音合成服务appid
			
 
				+    secret_id: 你的腾讯语音合成服务secret_id
			
 
				+    secret_key: 你的腾讯语音合成服务secret_key
			
 
				+    output_dir: tmp/
			
 
				+  AliyunASR:
			
 
				+    # 阿里云智能语音交互服务，需要先在阿里云平台开通服务，然后获取验证信息
			
 
				+    # HTTP POST请求，一次性处理完整音频
			
 
				+    # 平台地址：https://nls-portal.console.aliyun.com/
			
 
				+    # appkey地址：https://nls-portal.console.aliyun.com/applist
			
 
				+    # token地址：https://nls-portal.console.aliyun.com/overview
			
 
				+    # AliyunASR和AliyunStreamASR的区别是：AliyunASR是批量处理场景，AliyunStreamASR是实时交互场景
			
 
				+    # 一般来说非流式ASR更便宜（0.004元/秒，¥0.24/分钟）
			
 
				+    # 但是AliyunStreamASR实时性更好（0.005元/秒，¥0.3/分钟）
			
 
				+    # 定义ASR API类型
			
 
				+    type: aliyun
			
 
				+    appkey: 你的阿里云智能语音交互服务项目Appkey
			
 
				+    token: 你的阿里云智能语音交互服务AccessToken，临时的24小时，要长期用下方的access_key_id，access_key_secret
			
 
				+    access_key_id: 你的阿里云账号access_key_id
			
 
				+    access_key_secret: 你的阿里云账号access_key_secret
			
 
				+    output_dir: tmp/
			
 
				+  AliyunStreamASR:
			
 
				+    # 阿里云智能语音交互服务 - 实时流式语音识别
			
 
				+    # WebSocket连接，实时处理音频流
			
 
				+    # 平台地址：https://nls-portal.console.aliyun.com/
			
 
				+    # appkey地址：https://nls-portal.console.aliyun.com/applist
			
 
				+    # token地址：https://nls-portal.console.aliyun.com/overview
			
 
				+    # AliyunASR和AliyunStreamASR的区别是：AliyunASR是批量处理场景，AliyunStreamASR是实时交互场景
			
 
				+    # 一般来说非流式ASR更便宜（0.004元/秒，¥0.24/分钟）
			
 
				+    # 但是AliyunStreamASR实时性更好（0.005元/秒，¥0.3/分钟）
			
 
				+    # 定义ASR API类型
			
 
				+    type: aliyun_stream
			
 
				+    appkey: 你的阿里云智能语音交互服务项目Appkey
			
 
				+    token: 你的阿里云智能语音交互服务AccessToken，临时的24小时，要长期用下方的access_key_id，access_key_secret
			
 
				+    access_key_id: 你的阿里云账号access_key_id
			
 
				+    access_key_secret: 你的阿里云账号access_key_secret
			
 
				+    # 服务器地域选择，可选择距离更近的服务器以减少延迟，如nls-gateway-cn-hangzhou.aliyuncs.com(杭州)等
			
 
				+    host: nls-gateway-cn-shanghai.aliyuncs.com
			
 
				+    # 断句检测时间(毫秒)，控制静音多长时间后进行断句，默认800毫秒
			
 
				+    max_sentence_silence: 800
			
 
				+    output_dir: tmp/
			
 
				+  BaiduASR:
			
 
				+    # 获取AppID、API Key、Secret Key：https://console.bce.baidu.com/ai-engine/old/#/ai/speech/app/list
			
 
				+    # 查看资源额度：https://console.bce.baidu.com/ai-engine/old/#/ai/speech/overview/resource/list
			
 
				+    type: baidu
			
 
				+    app_id: 你的百度语音技术AppID
			
 
				+    api_key: 你的百度语音技术APIKey
			
 
				+    secret_key: 你的百度语音技术SecretKey
			
 
				+    # 语言参数，1537为普通话，具体参考：https://ai.baidu.com/ai-doc/SPEECH/0lbxfnc9b
			
 
				+    dev_pid: 1537
			
 
				+    output_dir: tmp/
			
 
				+  OpenaiASR:
			
 
				+    # OpenAI语音识别服务，需要先在OpenAI平台创建组织并获取api_key
			
 
				+    # 支持中、英、日、韩等多种语音识别，具体参考文档https://platform.openai.com/docs/guides/speech-to-text
			
 
				+    # 需要网络连接
			
 
				+    # 申请步骤：
			
 
				+    # 1.登录OpenAI Platform。https://auth.openai.com/log-in
			
 
				+    # 2.创建api-key  https://platform.openai.com/settings/organization/api-keys
			
 
				+    # 3.模型可以选择gpt-4o-transcribe或GPT-4o mini Transcribe
			
 
				+    type: openai
			
 
				+    api_key: 你的OpenAI API密钥
			
 
				+    base_url: https://api.openai.com/v1/audio/transcriptions
			
 
				+    model_name: gpt-4o-mini-transcribe
			
 
				+    output_dir: tmp/
			
 
				+  GroqASR:
			
 
				+    # Groq语音识别服务，需要先在Groq Console创建API密钥
			
 
				+    # 申请步骤：
			
 
				+    # 1.登录groq Console。https://console.groq.com/home
			
 
				+    # 2.创建api-key  https://console.groq.com/keys
			
 
				+    # 3.模型可以选择whisper-large-v3-turbo或whisper-large-v3（distil-whisper-large-v3-en仅支持英语转录）
			
 
				+    type: openai
			
 
				+    api_key: 你的Groq API密钥
			
 
				+    base_url: https://api.groq.com/openai/v1/audio/transcriptions
			
 
				+    model_name: whisper-large-v3-turbo
			
 
				+    output_dir: tmp/
			
 
				+
			
 
				+
			
 
				+  
			
 
				+VAD:
			
 
				+  SileroVAD:
			
 
				+    type: silero
			
 
				+    threshold: 0.5
			
 
				+    threshold_low: 0.3
			
 
				+    model_dir: models/snakers4_silero-vad
			
 
				+    min_silence_duration_ms: 200  # 如果说话停顿比较长，可以把这个值设置大一些
			
 
				+
			
 
				+LLM:
			
 
				+  # 所有openai类型均可以修改超参，以AliLLM为例
			
 
				+  # 当前支持的type为openai、dify、ollama，可自行适配
			
 
				+  AliLLM:
			
 
				+    # 定义LLM API类型
			
 
				+    type: openai
			
 
				+    # 可在这里找到你的 api_key https://bailian.console.aliyun.com/?apiKey=1#/api-key
			
 
				+    base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
			
 
				+    model_name: qwen-turbo
			
 
				+    api_key: 你的deepseek web key
			
 
				+    temperature: 0.7  # 温度值
			
 
				+    max_tokens: 500   # 最大生成token数
			
 
				+    top_p: 1
			
 
				+    top_k: 50
			
 
				+    frequency_penalty: 0  # 频率惩罚
			
 
				+  AliAppLLM:
			
 
				+    # 定义LLM API类型
			
 
				+    type: AliBL
			
 
				+    base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
			
 
				+    app_id: 你的app_id
			
 
				+    # 可在这里找到你的 api_key https://bailian.console.aliyun.com/?apiKey=1#/api-key
			
 
				+    api_key: 你的api_key
			
 
				+    # 是否不使用本地prompt：true|false (默不用请在百练应用中设置prompt)
			
 
				+    is_no_prompt: true
			
 
				+    # Ali_memory_id：false（不使用）|你的memory_id（请在百练应用中设置中获取）
			
 
				+    # Tips！：Ali_memory未实现多用户存储记忆(记忆按id调用)
			
 
				+    ali_memory_id: false
			
 
				+  DoubaoLLM:
			
 
				+    # 定义LLM API类型
			
 
				+    type: openai
			
 
				+    # 先开通服务，打开以下网址，开通的服务搜索Doubao-1.5-pro，开通它
			
 
				+    # 开通地址：https://console.volcengine.com/ark/region:ark+cn-beijing/openManagement?LLM=%7B%7D&OpenTokenDrawer=false
			
 
				+    # 免费额度500000token
			
 
				+    # 开通后，进入这里获取密钥：https://console.volcengine.com/ark/region:ark+cn-beijing/apiKey?apikey=%7B%7D
			
 
				+    base_url: https://ark.cn-beijing.volces.com/api/v3
			
 
				+    model_name: doubao-1-5-pro-32k-250115
			
 
				+    api_key: 你的doubao web key
			
 
				+  DeepSeekLLM:
			
 
				+    # 定义LLM API类型
			
 
				+    type: openai
			
 
				+    # 可在这里找到你的api key https://platform.deepseek.com/
			
 
				+    model_name: deepseek-chat
			
 
				+    url: https://api.deepseek.com
			
 
				+    api_key: 你的deepseek web key
			
 
				+  ChatGLMLLM:
			
 
				+    # 定义LLM API类型
			
 
				+    type: openai
			
 
				+    # glm-4-flash 是免费的，但是还是需要注册填写api_key的
			
 
				+    # 可在这里找到你的api key https://bigmodel.cn/usercenter/proj-mgmt/apikeys
			
 
				+    model_name: glm-4-flash
			
 
				+    url: https://open.bigmodel.cn/api/paas/v4/
			
 
				+    api_key: 你的chat-glm web key
			
 
				+  OllamaLLM:
			
 
				+    # 定义LLM API类型
			
 
				+    type: ollama
			
 
				+    model_name: qwen2.5 #  使用的模型名称，需要预先使用ollama pull下载
			
 
				+    base_url: http://localhost:11434  # Ollama服务地址
			
 
				+  DifyLLM:
			
 
				+    # 定义LLM API类型
			
 
				+    type: dify
			
 
				+    # 建议使用本地部署的dify接口，国内部分区域访问dify公有云接口可能会受限
			
 
				+    # 如果使用DifyLLM，配置文件里prompt(提示词)是无效的，需要在dify控制台设置提示词
			
 
				+    base_url: https://api.dify.ai/v1
			
 
				+    api_key: 你的DifyLLM web key
			
 
				+    # 使用的对话模式 可以选择工作流 workflows/run 对话模式 chat-messages  文本生成 completion-messages
			
 
				+    # 使用workflows进行返回的时候输入参数为 query 返回参数的名字要设置为 answer
			
 
				+    # 文本生成的默认输入参数也是query
			
 
				+    mode: chat-messages
			
 
				+  GeminiLLM:
			
 
				+    type: gemini
			
 
				+    # 谷歌Gemini API，需要先在Google Cloud控制台创建API密钥并获取api_key
			
 
				+    # 若在中国境内使用，请遵守《生成式人工智能服务管理暂行办法》
			
 
				+    # token申请地址： https://aistudio.google.com/apikey
			
 
				+    # 若部署地无法访问接口，需要开启科学上网
			
 
				+    api_key: 你的gemini web key
			
 
				+    model_name: "gemini-2.0-flash"
			
 
				+    http_proxy: ""  #"http://127.0.0.1:10808"
			
 
				+    https_proxy: "" #http://127.0.0.1:10808"
			
 
				+  CozeLLM:
			
 
				+    # 定义LLM API类型
			
 
				+    type: coze
			
 
				+    # 你可以在这里找到个人令牌
			
 
				+    # https://www.coze.cn/open/oauth/pats
			
 
				+    # bot_id和user_id的内容写在引号之内
			
 
				+    bot_id: "你的bot_id"
			
 
				+    user_id: "你的user_id"
			
 
				+    personal_access_token: 你的coze个人令牌
			
 
				+  VolcesAiGatewayLLM:
			
 
				+    # 火山引擎 - 边缘大模型网关
			
 
				+    # 定义LLM API类型
			
 
				+    type: openai
			
 
				+    # 先开通服务，打开以下网址，创建网关访问密钥，搜索并勾选 Doubao-pro-32k-functioncall ，开通
			
 
				+    # 如果需要使用边缘大模型网关提供的语音合成，一并勾选 Doubao-语音合成 ，另见 TTS.VolcesAiGatewayTTS 配置
			
 
				+    # https://console.volcengine.com/vei/aigateway/
			
 
				+    # 开通后，进入这里获取密钥：https://console.volcengine.com/vei/aigateway/tokens-list
			
 
				+    base_url: https://ai-gateway.vei.volces.com/v1
			
 
				+    model_name: doubao-pro-32k-functioncall
			
 
				+    api_key: 你的网关访问密钥
			
 
				+  LMStudioLLM:
			
 
				+    # 定义LLM API类型
			
 
				+    type: openai
			
 
				+    model_name: deepseek-r1-distill-llama-8b@q4_k_m # 使用的模型名称，需要预先在社区下载
			
 
				+    url: http://localhost:1234/v1 # LM Studio服务地址
			
 
				+    api_key: lm-studio # LM Studio服务的固定API Key
			
 
				+  HomeAssistant:
			
 
				+    # 定义LLM API类型
			
 
				+    type: homeassistant
			
 
				+    base_url: http://homeassistant.local:8123
			
 
				+    agent_id: conversation.chatgpt
			
 
				+    api_key: 你的home assistant api访问令牌
			
 
				+  FastgptLLM:
			
 
				+    # 定义LLM API类型
			
 
				+    type: fastgpt
			
 
				+    # 如果使用fastgpt，配置文件里prompt(提示词)是无效的，需要在fastgpt控制台设置提示词
			
 
				+    base_url: https://host/api/v1
			
 
				+    # 你可以在这里找到你的api_key
			
 
				+    # https://cloud.tryfastgpt.ai/account/apikey
			
 
				+    api_key: 你的fastgpt密钥
			
 
				+    variables:
			
 
				+      k: "v"
			
 
				+      k2: "v2"
			
 
				+  XinferenceLLM:
			
 
				+    # 定义LLM API类型
			
 
				+    type: xinference
			
 
				+    # Xinference服务地址和模型名称
			
 
				+    model_name: qwen2.5:72b-AWQ  # 使用的模型名称，需要预先在Xinference启动对应模型
			
 
				+    base_url: http://localhost:9997  # Xinference服务地址
			
 
				+  XinferenceSmallLLM:
			
 
				+    # 定义轻量级LLM API类型，用于意图识别
			
 
				+    type: xinference
			
 
				+    # Xinference服务地址和模型名称
			
 
				+    model_name: qwen2.5:3b-AWQ  # 使用的小模型名称，用于意图识别
			
 
				+    base_url: http://localhost:9997  # Xinference服务地址
			
 
				+# VLLM配置（视觉语言大模型）
			
 
				+VLLM:
			
 
				+  ChatGLMVLLM:
			
 
				+    type: openai
			
 
				+    # glm-4v-flash是智谱免费AI的视觉模型，需要先在智谱AI平台创建API密钥并获取api_key
			
 
				+    # 可在这里找到你的api key https://bigmodel.cn/usercenter/proj-mgmt/apikeys
			
 
				+    model_name: glm-4v-flash  # 智谱AI的视觉模型
			
 
				+    url: https://open.bigmodel.cn/api/paas/v4/
			
 
				+    api_key: 你的api_key
			
 
				+  QwenVLVLLM:
			
 
				+    type: openai
			
 
				+    model_name: qwen2.5-vl-3b-instruct
			
 
				+    url: https://dashscope.aliyuncs.com/compatible-mode/v1
			
 
				+    # 可在这里找到你的api key https://bailian.console.aliyun.com/?apiKey=1#/api-key
			
 
				+    api_key: 你的api_key
			
 
				+TTS:
			
 
				+  # 当前支持的type为edge、doubao，可自行适配
			
 
				+  EdgeTTS:
			
 
				+    # 定义TTS API类型
			
 
				+    type: edge
			
 
				+    voice: zh-CN-XiaoxiaoNeural
			
 
				+    output_dir: tmp/
			
 
				+  DoubaoTTS:
			
 
				+    # 定义TTS API类型
			
 
				+    type: doubao
			
 
				+    # 火山引擎语音合成服务，需要先在火山引擎控制台创建应用并获取appid和access_token
			
 
				+    # 山引擎语音一定要购买花钱，起步价30元，就有100并发了。如果用免费的只有2个并发，会经常报tts错误
			
 
				+    # 购买服务后，购买免费的音色后，可能要等半小时左右，才能使用。
			
 
				+    # 普通音色在这里开通：https://console.volcengine.com/speech/service/8
			
 
				+    # 湾湾小何音色在这里开通：https://console.volcengine.com/speech/service/10007，开通后将下面的voice设置成zh_female_wanwanxiaohe_moon_bigtts
			
 
				+    api_url: https://openspeech.bytedance.com/api/v1/tts
			
 
				+    voice: BV001_streaming
			
 
				+    output_dir: tmp/
			
 
				+    authorization: "Bearer;"
			
 
				+    appid: 你的火山引擎语音合成服务appid
			
 
				+    access_token: 你的火山引擎语音合成服务access_token
			
 
				+    cluster: volcano_tts
			
 
				+    speed_ratio: 1.0
			
 
				+    volume_ratio: 1.0
			
 
				+    pitch_ratio: 1.0
			
 
				+  #火山tts，支持双向流式tts
			
 
				+  HuoshanDoubleStreamTTS:
			
 
				+    type: huoshan_double_stream
			
 
				+    # 访问 https://console.volcengine.com/speech/service/10007 开通语音合成大模型，购买音色
			
 
				+    # 在页面底部获取appid和access_token
			
 
				+    # 资源ID固定为：volc.service_type.10029（大模型语音合成及混音）
			
 
				+    # 如果是机智云，把接口地址换成wss://bytedance.gizwitsapi.com/api/v3/tts/bidirection
			
 
				+    # 机智云不需要天填 appid
			
 
				+    ws_url: wss://openspeech.bytedance.com/api/v3/tts/bidirection
			
 
				+    appid: 你的火山引擎语音合成服务appid
			
 
				+    access_token: 你的火山引擎语音合成服务access_token
			
 
				+    resource_id: volc.service_type.10029
			
 
				+    speaker: zh_female_wanwanxiaohe_moon_bigtts
			
 
				+  CosyVoiceSiliconflow:
			
 
				+    type: siliconflow
			
 
				+    # 硅基流动TTS
			
 
				+    # token申请地址 https://cloud.siliconflow.cn/account/ak
			
 
				+    model: FunAudioLLM/CosyVoice2-0.5B
			
 
				+    voice: FunAudioLLM/CosyVoice2-0.5B:alex
			
 
				+    output_dir: tmp/
			
 
				+    access_token: 你的硅基流动API密钥
			
 
				+    response_format: wav
			
 
				+  CozeCnTTS:
			
 
				+    type: cozecn
			
 
				+    # COZECN TTS
			
 
				+    # token申请地址 https://www.coze.cn/open/oauth/pats
			
 
				+    voice: 7426720361733046281
			
 
				+    output_dir: tmp/
			
 
				+    access_token: 你的coze web key
			
 
				+    response_format: wav
			
 
				+  VolcesAiGatewayTTS:
			
 
				+    type: openai
			
 
				+    # 火山引擎 - 边缘大模型网关
			
 
				+    # 先开通服务，打开以下网址，创建网关访问密钥，搜索并勾选 Doubao-语音合成 ，开通
			
 
				+    # 如果需要使用边缘大模型网关提供的 LLM，一并勾选 Doubao-pro-32k-functioncall ，另见 LLM.VolcesAiGatewayLLM 配置
			
 
				+    # https://console.volcengine.com/vei/aigateway/
			
 
				+    # 开通后，进入这里获取密钥：https://console.volcengine.com/vei/aigateway/tokens-list
			
 
				+    api_key: 你的网关访问密钥
			
 
				+    api_url: https://ai-gateway.vei.volces.com/v1/audio/speech
			
 
				+    model: doubao-tts
			
 
				+    # 音色列表见 https://www.volcengine.com/docs/6561/1257544
			
 
				+    voice: zh_male_shaonianzixin_moon_bigtts
			
 
				+    speed: 1
			
 
				+    output_dir: tmp/
			
 
				+  FishSpeech:
			
 
				+    # 参照教程：https://github.com/xinnan-tech/xiaozhi-esp32-server/blob/main/docs/fish-speech-integration.md
			
 
				+    type: fishspeech
			
 
				+    output_dir: tmp/
			
 
				+    response_format: wav
			
 
				+    reference_id: null
			
 
				+    reference_audio: ["config/assets/wakeup_words.wav",]
			
 
				+    reference_text: ["哈啰啊，我是小智啦，声音好听的台湾女孩一枚，超开心认识你耶，最近在忙啥，别忘了给我来点有趣的料哦，我超爱听八卦的啦",]
			
 
				+    normalize: true
			
 
				+    max_new_tokens: 1024
			
 
				+    chunk_length: 200
			
 
				+    top_p: 0.7
			
 
				+    repetition_penalty: 1.2
			
 
				+    temperature: 0.7
			
 
				+    streaming: false
			
 
				+    use_memory_cache: "on"
			
 
				+    seed: null
			
 
				+    channels: 1
			
 
				+    rate: 44100
			
 
				+    api_key: "你的api_key"
			
 
				+    api_url: "http://127.0.0.1:8080/v1/tts"
			
 
				+  GPT_SOVITS_V2:
			
 
				+    # 定义TTS API类型
			
 
				+    #启动tts方法：
			
 
				+    #python api_v2.py -a 127.0.0.1 -p 9880 -c GPT_SoVITS/configs/demo.yaml
			
 
				+    type: gpt_sovits_v2
			
 
				+    url: "http://127.0.0.1:9880/tts"
			
 
				+    output_dir: tmp/
			
 
				+    text_lang: "auto"
			
 
				+    ref_audio_path: "demo.wav"
			
 
				+    prompt_text: ""
			
 
				+    prompt_lang: "zh"
			
 
				+    top_k: 5
			
 
				+    top_p: 1
			
 
				+    temperature: 1
			
 
				+    text_split_method: "cut0"
			
 
				+    batch_size: 1
			
 
				+    batch_threshold: 0.75
			
 
				+    split_bucket: true
			
 
				+    return_fragment: false
			
 
				+    speed_factor: 1.0
			
 
				+    streaming_mode: false
			
 
				+    seed: -1
			
 
				+    parallel_infer: true
			
 
				+    repetition_penalty: 1.35
			
 
				+    aux_ref_audio_paths: []
			
 
				+  GPT_SOVITS_V3:
			
 
				+    # 定义TTS API类型 GPT-SoVITS-v3lora-20250228
			
 
				+    #启动tts方法：
			
 
				+    #python api.py
			
 
				+    type: gpt_sovits_v3
			
 
				+    url: "http://127.0.0.1:9880"
			
 
				+    output_dir: tmp/
			
 
				+    text_language: "auto"
			
 
				+    refer_wav_path: "caixukun.wav"
			
 
				+    prompt_language: "zh"
			
 
				+    prompt_text: ""
			
 
				+    top_k: 15
			
 
				+    top_p: 1.0
			
 
				+    temperature: 1.0
			
 
				+    cut_punc: ""
			
 
				+    speed: 1.0
			
 
				+    inp_refs: []
			
 
				+    sample_steps: 32
			
 
				+    if_sr: false
			
 
				+  MinimaxTTS:
			
 
				+    # Minimax语音合成服务，需要先在minimax平台创建账户充值，并获取登录信息
			
 
				+    # 平台地址：https://platform.minimaxi.com/
			
 
				+    # 充值地址：https://platform.minimaxi.com/user-center/payment/balance
			
 
				+    # group_id地址：https://platform.minimaxi.com/user-center/basic-information
			
 
				+    # api_key地址：https://platform.minimaxi.com/user-center/basic-information/interface-key
			
 
				+    # 定义TTS API类型
			
 
				+    type: minimax
			
 
				+    output_dir: tmp/
			
 
				+    group_id: 你的minimax平台groupID
			
 
				+    api_key: 你的minimax平台接口密钥
			
 
				+    model: "speech-01-turbo"
			
 
				+    # 此处设置将优先于voice_setting中voice_id的设置；如都不设置，默认为 female-shaonv
			
 
				+    voice_id: "female-shaonv"
			
 
				+    # 以下可不用设置，使用默认设置
			
 
				+    # voice_setting:
			
 
				+    #     voice_id: "male-qn-qingse"
			
 
				+    #     speed: 1
			
 
				+    #     vol: 1
			
 
				+    #     pitch: 0
			
 
				+    #     emotion: "happy"
			
 
				+    # pronunciation_dict:
			
 
				+    #     tone:
			
 
				+    #       - "处理/(chu3)(li3)"
			
 
				+    #       - "危险/dangerous"
			
 
				+    # audio_setting:
			
 
				+    #     sample_rate: 32000
			
 
				+    #     bitrate: 128000
			
 
				+    #     format: "mp3"
			
 
				+    #     channel: 1
			
 
				+    # timber_weights:
			
 
				+    #   -
			
 
				+    #     voice_id: male-qn-qingse
			
 
				+    #     weight: 1
			
 
				+    #   -
			
 
				+    #     voice_id: female-shaonv
			
 
				+    #     weight: 1
			
 
				+    # language_boost: auto
			
 
				+
			
 
				+# MinimaxTTSHTTPStream和MinimaxTTSWebSocketStream还在测试，测试完再开放
			
 
				+#
			
 
				+#  MinimaxTTSHTTPStream:
			
 
				+#  # Minimax流式语音合成服务
			
 
				+#    type: minimax_httpstream
			
 
				+#    output_dir: tmp/
			
 
				+#    group_id: 你的minimax平台groupID
			
 
				+#    api_key: 你的minimax平台接口密钥
			
 
				+#    model: "speech-01-turbo"
			
 
				+#    voice_id: "female-shaonv"
			
 
				+#
			
 
				+#  MinimaxTTSWebSocketStream:
			
 
				+#    type: minimax_webSocket
			
 
				+#    output_dir: tmp/
			
 
				+#    group_id: 你的minimax平台groupID
			
 
				+#    api_key: 你的minimax平台接口密钥
			
 
				+#    model: "speech-01-turbo"
			
 
				+#    voice_id: "female-shaonv"
			
 
				+
			
 
				+  AliyunTTS:
			
 
				+    # 阿里云智能语音交互服务，需要先在阿里云平台开通服务，然后获取验证信息
			
 
				+    # 平台地址：https://nls-portal.console.aliyun.com/
			
 
				+    # appkey地址：https://nls-portal.console.aliyun.com/applist
			
 
				+    # token地址：https://nls-portal.console.aliyun.com/overview
			
 
				+    # 定义TTS API类型
			
 
				+    type: aliyun
			
 
				+    output_dir: tmp/
			
 
				+    appkey: 你的阿里云智能语音交互服务项目Appkey
			
 
				+    token: 你的阿里云智能语音交互服务AccessToken，临时的24小时，要长期用下方的access_key_id，access_key_secret
			
 
				+    voice: xiaoyun
			
 
				+    access_key_id: 你的阿里云账号access_key_id
			
 
				+    access_key_secret: 你的阿里云账号access_key_secret
			
 
				+
			
 
				+    # 以下可不用设置，使用默认设置
			
 
				+    # format: wav
			
 
				+    # sample_rate: 16000
			
 
				+    # volume: 50
			
 
				+    # speech_rate: 0
			
 
				+    # pitch_rate: 0
			
 
				+  AliyunStreamTTS:
			
 
				+    # 阿里云CosyVoice大模型流式文本语音合成
			
 
				+    # 采用FlowingSpeechSynthesizer接口，支持更低延迟和更自然的语音质量
			
 
				+    # 流式文本语音合成仅提供商用版，不支持试用，详情请参见试用版和商用版。要使用该功能，请开通商用版。
			
 
				+    # 支持龙系列专用音色：longxiaochun、longyu、longchen等
			
 
				+    # 平台地址：https://nls-portal.console.aliyun.com/
			
 
				+    # appkey地址：https://nls-portal.console.aliyun.com/applist
			
 
				+    # token地址：https://nls-portal.console.aliyun.com/overview
			
 
				+    # 使用三阶段流式交互：StartSynthesis -> RunSynthesis -> StopSynthesis
			
 
				+    type: aliyun_stream
			
 
				+    output_dir: tmp/
			
 
				+    appkey: 你的阿里云智能语音交互服务项目Appkey
			
 
				+    token: 你的阿里云智能语音交互服务AccessToken，临时的24小时，要长期用下方的access_key_id，access_key_secret
			
 
				+    voice: longxiaochun 
			
 
				+    access_key_id: 你的阿里云账号access_key_id
			
 
				+    access_key_secret: 你的阿里云账号access_key_secret
			
 
				+    # 截至2025年7月21日大模型音色只有北京节点采用，其他节点暂不支持
			
 
				+    host: nls-gateway-cn-beijing.aliyuncs.com
			
 
				+    # 以下可不用设置，使用默认设置
			
 
				+    # format: pcm  # 音频格式：pcm、wav、mp3
			
 
				+    # sample_rate: 16000  # 采样率：8000、16000、24000
			
 
				+    # volume: 50  # 音量：0-100
			
 
				+    # speech_rate: 0  # 语速：-500到500
			
 
				+    # pitch_rate: 0  # 语调：-500到500
			
 
				+  TencentTTS:
			
 
				+    # 腾讯云智能语音交互服务，需要先在腾讯云平台开通服务
			
 
				+    # appid、secret_id、secret_key申请地址：https://console.cloud.tencent.com/cam/capi
			
 
				+    # 免费领取资源：https://console.cloud.tencent.com/tts/resourcebundle
			
 
				+    type: tencent
			
 
				+    output_dir: tmp/
			
 
				+    appid: 你的腾讯云AppId
			
 
				+    secret_id: 你的腾讯云SecretID
			
 
				+    secret_key: 你的腾讯云SecretKey
			
 
				+    region: ap-guangzhou
			
 
				+    voice: 101001
			
 
				+
			
 
				+  TTS302AI:
			
 
				+    # 302AI语音合成服务，需要先在302平台创建账户充值，并获取密钥信息
			
 
				+    # 添加 302.ai TTS 配置
			
 
				+    # token申请地址：https://dash.302.ai/
			
 
				+    # 获取api_keyn路径：https://dash.302.ai/apis/list
			
 
				+    # 价格，$35/百万字符。火山原版¥450元/百万字符
			
 
				+    type: doubao
			
 
				+    api_url: https://api.302ai.cn/doubao/tts_hd
			
 
				+    authorization: "Bearer "
			
 
				+    # 湾湾小何音色
			
 
				+    voice: "zh_female_wanwanxiaohe_moon_bigtts"
			
 
				+    output_dir: tmp/
			
 
				+    access_token: "你的302API密钥"
			
 
				+  GizwitsTTS:
			
 
				+    type: doubao
			
 
				+    # 火山引擎作为基座，可以完全使用企业级火山引擎语音合成服务
			
 
				+    # 前一万名注册的用户，将送5元体验金额
			
 
				+    # 获取API Key地址：https://agentrouter.gizwitsapi.com/panel/token
			
 
				+    api_url: https://bytedance.gizwitsapi.com/api/v1/tts
			
 
				+    authorization: "Bearer "
			
 
				+    # 湾湾小何音色
			
 
				+    voice: "zh_female_wanwanxiaohe_moon_bigtts"
			
 
				+    output_dir: tmp/
			
 
				+    access_token: "你的机智云API key"
			
 
				+  ACGNTTS:
			
 
				+    #在线网址：https://acgn.ttson.cn/
			
 
				+    #token购买：www.ttson.cn
			
 
				+    #开发相关疑问请提交至网站上的qq
			
 
				+    #角色id获取地址：ctrl+f快速检索角色——网站管理者不允许发布,可询问网站管理者
			
 
				+    #各参数意义见开发文档：https://www.yuque.com/alexuh/skmti9/wm6taqislegb02gd?singleDoc#
			
 
				+    type: ttson
			
 
				+    token: your_token
			
 
				+    voice_id: 1695
			
 
				+    speed_factor: 1
			
 
				+    pitch_factor: 0
			
 
				+    volume_change_dB: 0
			
 
				+    to_lang: ZH
			
 
				+    url: https://u95167-bd74-2aef8085.westx.seetacloud.com:8443/flashsummary/tts?token=
			
 
				+    format: mp3
			
 
				+    output_dir: tmp/
			
 
				+    emotion: 1
			
 
				+  OpenAITTS:
			
 
				+    # openai官方文本转语音服务，可支持全球大多数语种
			
 
				+    type: openai
			
 
				+    # 你可以在这里获取到 api key
			
 
				+    # https://platform.openai.com/api-keys
			
 
				+    api_key: 你的openai api key
			
 
				+    # 国内需要使用代理
			
 
				+    api_url: https://api.openai.com/v1/audio/speech
			
 
				+    # 可选tts-1或tts-1-hd，tts-1速度更快tts-1-hd质量更好
			
 
				+    model: tts-1
			
 
				+    # 演讲者，可选alloy, echo, fable, onyx, nova, shimmer
			
 
				+    voice: onyx
			
 
				+    # 语速范围0.25-4.0
			
 
				+    speed: 1
			
 
				+    output_dir: tmp/
			
 
				+  CustomTTS:
			
 
				+    # 自定义的TTS接口服务，请求参数可自定义，可接入众多TTS服务
			
 
				+    # 以本地部署的KokoroTTS为例
			
 
				+    # 如果只有cpu运行：docker run -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-cpu:latest
			
 
				+    # 如果只有gpu运行：docker run --gpus all -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-gpu:latest
			
 
				+    # 要求接口使用POST方式请求，并返回音频文件
			
 
				+    type: custom
			
 
				+    method: POST
			
 
				+    url: "http://127.0.0.1:8880/v1/audio/speech"
			
 
				+    params: # 自定义请求参数
			
 
				+      input: "{prompt_text}"
			
 
				+      response_format: "mp3"
			
 
				+      download_format: "mp3"
			
 
				+      voice: "zf_xiaoxiao"
			
 
				+      lang_code: "z"
			
 
				+      return_download_link: true
			
 
				+      speed: 1
			
 
				+      stream: false
			
 
				+    headers: # 自定义请求头
			
 
				+      # Authorization: Bearer xxxx
			
 
				+    format: mp3 # 接口返回的音频格式
			
 
				+    output_dir: tmp/
			
 
				+  LinkeraiTTS:
			
 
				+    type: linkerai
			
 
				+    api_url: https://tts.linkerai.cn/tts
			
 
				+    audio_format: "pcm"
			
 
				+    # 默认的access_token供大家测试时免费使用的，此access_token请勿用于商业用途
			
 
				+    # 如果效果不错，可自行申请token，申请地址：https://linkerai.cn
			
 
				+    # 各参数意义见开发文档：https://tts.linkerai.cn/docs
			
 
				+    # 支持声音克隆，可自行上传音频，填入voice参数，voice参数为空时，使用默认声音
			
 
				+    access_token: "U4YdYXVfpwWnk2t5Gp822zWPCuORyeJL"
			
 
				+    voice: "OUeAo1mhq6IBExi"
			
 
				+    output_dir: tmp/
			
--- a/main/server/config/config_loader.py
+++ b/main/server/config/config_loader.py
@@ -0,0 +1,31 @@
 
				+import os 
			
 
				+import yaml
			
 
				+
			
 
				+
			
 
				+def get_project_dir():
			
 
				+    """获取项目根目录"""
			
 
				+    return os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + '/'
			
 
				+
			
 
				+def read_config(config_path):
			
 
				+    with open(config_path, 'r') as file:
			
 
				+        return yaml.safe_load(file)
			
 
				+
			
 
				+def get_config_from_api(config):
			
 
				+    
			
 
				+
			
 
				+def load_config():
			
 
				+    """加载配置文件"""
			
 
				+    # 检查缓存（暂不考虑）
			
 
				+
			
 
				+    default_config_path = get_project_dir() + 'config.yaml'
			
 
				+    custom_config_path = get_project_dir() + 'data/.config.yaml'
			
 
				+
			
 
				+    default_config = read_config(default_config_path)
			
 
				+    custom_config = read_config(custom_config_path)
			
 
				+
			
 
				+    
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/main/server/config/logger.py
+++ b/main/server/config/logger.py
@@ -0,0 +1,100 @@
 
				+import os
			
 
				+import sys
			
 
				+from logoru import logger
			
 
				+from config_loader import load_config
			
 
				+from config.settings import check_config_file
			
 
				+
			
 
				+SERVER_VERSION = "1.0.0"
			
 
				+_logger_initialized = False
			
 
				+
			
 
				+
			
 
				+def get_module_abbreviation(module_name: str, module_dict: dict) -> str:
			
 
				+    module_value = module_dict.get(module_name,"")
			
 
				+    if not module_value:
			
 
				+        return "00"
			
 
				+    if "_" in module_value:
			
 
				+        parts = module_value.split("_")
			
 
				+        return parts[-1][:2] if parts[-1] else "00"
			
 
				+    return module_value[:2]
			
 
				+
			
 
				+
			
 
				+def build_module_string(selected_modules: list) -> str:
			
 
				+    """构建模块字符串"""
			
 
				+    return (
			
 
				+        get_module_abbreviation("VAD", selected_modules) 
			
 
				+        +
			
 
				+        get_module_abbreviation("ASR", selected_modules)
			
 
				+        +
			
 
				+        get_module_abbreviation("TTS", selected_modules)
			
 
				+        +
			
 
				+        get_module_abbreviation("LLM", selected_modules)
			
 
				+        +
			
 
				+        get_module_abbreviation("Memory", selected_modules)
			
 
				+        +
			
 
				+        get_module_abbreviation("Intent", selected_modules)
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def formatter(record):
			
 
				+    """"""
			
 
				+
			
 
				+
			
 
				+def setup_logging():
			
 
				+    check_config_file()
			
 
				+    config = load_config()
			
 
				+    log_config = config["log"]
			
 
				+    global _logger_initialized
			
 
				+
			
 
				+    # 第一次初始化时配置日志
			
 
				+    if not _logger_initialized:
			
 
				+        logger.configure(
			
 
				+            extra={
			
 
				+                "selected_modules": log_config.get("selected_modules", "00000000000000"),
			
 
				+            })
			
 
				+        
			
 
				+        log_format = log_config.get(
			
 
				+            "log_format",
			
 
				+            "<green>{time:YYMMDD HH:mm:ss}</green>[{version}_{extra[selected_module]}][<light-blue>{extra[tag]}</light-blue>]-<level>{level}</level>-<light-green>{message}</light-green>",
			
 
				+        )
			
 
				+
			
 
				+        log_format_file = log_config.get(
			
 
				+            "log_format_file",
			
 
				+            "{time:YYYY-MM-DD HH:mm:ss} - {version}_{extra[selected_module]} - {name} - {level} - {extra[tag]} - {message}",
			
 
				+        )
			
 
				+
			
 
				+        log_format = log_format.replace("{version}", SERVER_VERSION)
			
 
				+        log_format_file = log_format_file.replace("{version}", SERVER_VERSION)
			
 
				+        log_level = log_config.get("log_level", "INFO")
			
 
				+        log_dir = log_config.get("log_dir", "tmp")
			
 
				+        log_file = log_config.get("log_file", "server.log")
			
 
				+        data_dir = log_config.get("data_dir", "data")
			
 
				+
			
 
				+        os.makedirs(log_dir, exist_ok=True)
			
 
				+        os.makedirs(data_dir, exist_ok=True)
			
 
				+
			
 
				+        logger.remove()
			
 
				+        # 输出到控制台
			
 
				+        logger.add(sys.stdout, format=log_format, level=log_level, filter=formatter)
			
 
				+
			
 
				+        # 输出到文件，统一目录，按大小轮转
			
 
				+        log_file_path = os.path.join(log_dir, log_file)
			
 
				+        logger.add(
			
 
				+            log_file_path,
			
 
				+            format=log_format_file,
			
 
				+            level=log_level,
			
 
				+            filter=formatter,
			
 
				+            rotation="10 MB",    # 单个文件最大10MB
			
 
				+            retention="30 days", # 保留30天
			
 
				+            enqueue=True,        # 异步安全
			
 
				+            encoding="utf-8",    
			
 
				+            backtrace=True,      # 打印堆栈
			
 
				+            diagnose=True        # 打印诊断信息
			
 
				+        )
			
 
				+
			
 
				+        _logger_initialized = True # 标记已初始化
			
 
				+
			
 
				+    return logger
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/main/server/config/manage_api_client.py
+++ b/main/server/config/manage_api_client.py
@@ -0,0 +1,55 @@
 
				+import os
			
 
				+import time
			
 
				+import base64
			
 
				+from typing import Dict, Optional
			
 
				+
			
 
				+import httpx
			
 
				+
			
 
				+TAG = __name__
			
 
				+
			
 
				+class ManageApiClient:
			
 
				+    _instance = None
			
 
				+    _client = None
			
 
				+    _secret = None
			
 
				+
			
 
				+    def __new__(cls, config):
			
 
				+        """单例模式确保全局唯一实例，并支持传入配置参数"""
			
 
				+        if  cls._instance is None:
			
 
				+            cls._instance = super().__new__(cls)
			
 
				+            cls._init_client(config)
			
 
				+        return cls._instance
			
 
				+    
			
 
				+    @classmethod
			
 
				+    def _init_client(cls, config):
			
 
				+        """初始化持久线程池"""
			
 
				+        cls.config = config.get('manage_api', {})
			
 
				+
			
 
				+        if not cls.config:
			
 
				+            raise ValueError("缺少manage_api配置")
			
 
				+        
			
 
				+        cls._secret = cls.config.get('secret')
			
 
				+        if not cls._secret:
			
 
				+            raise ValueError("配置中缺少secret")
			
 
				+        
			
 
				+        cls._secret = cls.config.get('secret')
			
 
				+        cls.max_retries = cls.config.get('max_retries', 6) # 最大重试次数
			
 
				+        cls.retry_delay = cls.config.get('retry_delay', 1) # 初始重连延迟（秒）
			
 
				+        
			
 
				+        cls._client = httpx.Client(
			
 
				+            base_url=cls.config.get("url"),
			
 
				+            headers={
			
 
				+                "User-Agent": f"PythonClient/2.0 (PID:{os.getpid()})",
			
 
				+                "Accept": "application/json",
			
 
				+                "Authorization": "Bearer " + cls._secret,
			
 
				+            },
			
 
				+            timeout=cls.config.get("timeout", 30),  # 默认超时时间30秒
			
 
				+
			
 
				+        )
			
 
				+
			
 
				+    @classmethod
			
 
				+    def _request(cls, method: str, endpoint: str, **kwargs) -> Dict:
			
 
				+        """发送单次http请求并处理响应"""
			
 
				+        endpoint = endpoint.lstrip("/")
			
 
				+        response = cls._client.request(method, endpoint, **kwargs)
			
 
				+        response.raise_for_status()
			
 
				+        
			
--- a/main/server/config/settings.py
+++ b/main/server/config/settings.py
@@ -0,0 +1,33 @@
 
				+import os
			
 
				+from config_loader import load_config, get_project_dir, read_config
			
 
				+
			
 
				+default_config_file = "config.yaml"
			
 
				+config_file_valid = False # 配置文件是否合法
			
 
				+
			
 
				+def check_config_file() -> None:
			
 
				+    """检查配置文件是否合法"""
			
 
				+    global config_file_valid
			
 
				+    if config_file_valid:
			
 
				+        return 
			
 
				+    custom_config_file = os.path.join(get_project_dir(), ".config.yaml")
			
 
				+    if not os.path.exists(custom_config_file):
			
 
				+        raise FileNotFoundError(f"配置文件不存在: {custom_config_file}")
			
 
				+
			
 
				+    # 检查是否从API读取配置
			
 
				+    config = load_config()
			
 
				+    if config.get("read_config_from_api", False):
			
 
				+        print("从API读取配置")
			
 
				+        old_config_origin = read_config(custom_config_file)
			
 
				+        if old_config_origin.get("selected_module") is not None:
			
 
				+            error_msg = "您的配置文件好像既包含智控台的配置又包含本地配置：\n"
			
 
				+            error_msg += "\n建议您：\n"
			
 
				+            error_msg += "1、将根目录的config_from_api.yaml文件复制到data下，重命名为.config.yaml\n"
			
 
				+            error_msg += "2、按教程配置好接口地址和密钥\n"
			
 
				+            raise ValueError(error_msg)
			
 
				+    config_file_valid = True
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/main/server/core/utils/cathe/config.py
+++ b/main/server/core/utils/cathe/config.py
@@ -0,0 +1,3 @@
 
				+"""
			
 
				+缓存配置管理
			
 
				+"""
			
--- a/main/server/core/utils/cathe/manager.py
+++ b/main/server/core/utils/cathe/manager.py
--- a/main/server/core/utils/cathe/strategies.py
+++ b/main/server/core/utils/cathe/strategies.py