From 7e87981167a21a9a04d9c15a858e51ef2b1165f3 Mon Sep 17 00:00:00 2001 From: slexce <2767145231@qq.com> Date: Wed, 11 Mar 2026 00:00:04 +0800 Subject: [PATCH] =?UTF-8?q?:sparkles:=20=E5=8F=91=E9=80=81=E5=9B=BE?= =?UTF-8?q?=E7=89=87=E5=8F=AF=E5=8F=82=E8=80=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- nonebot_plugin_llmchat/__init__.py | 22 ++++++- nonebot_plugin_llmchat/mcpclient.py | 10 +++- nonebot_plugin_llmchat/submodel_caller.py | 70 +++++++++++++++++++---- 3 files changed, 86 insertions(+), 16 deletions(-) diff --git a/nonebot_plugin_llmchat/__init__.py b/nonebot_plugin_llmchat/__init__.py index 6981e63..28b016b 100755 --- a/nonebot_plugin_llmchat/__init__.py +++ b/nonebot_plugin_llmchat/__init__.py @@ -418,6 +418,9 @@ async def process_messages(context_id: int, is_group: bool = True): content: list[ChatCompletionContentPartParam] = [] + # 收集用户消息中的图片(用于传递给子模型作为参考) + user_message_images: list[str] = [] + # 将机器人错过的消息推送给LLM past_events_snapshot = list(state.past_events) state.past_events.clear() @@ -426,11 +429,19 @@ async def process_messages(context_id: int, is_group: bool = True): content.append({"type": "text", "text": text_content}) # 将消息中的图片转成 base64 + base64_images = await process_images(ev) + + # 收集图片用于子模型调用 + user_message_images.extend(base64_images) + + # 如果主模型支持图片输入,也传递给主模型 if preset.support_image: - base64_images = await process_images(ev) for base64_image in base64_images: content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}) + if user_message_images: + logger.info(f"用户消息中包含 {len(user_message_images)} 张图片,将用于子模型调用") + new_messages: list[ChatCompletionMessageParam] = [ {"role": "user", "content": content} ] @@ -484,6 +495,9 @@ async def process_messages(context_id: int, is_group: bool = True): # 发送工具调用提示 await handler.send(Message(f"正在使用{mcp_client.get_friendly_name(tool_name)}")) + # 对于子模型调用,传递用户消息中的图片作为参考 + images_for_submodel = user_message_images if tool_name.startswith("submodel__") else None + if is_group: result = await mcp_client.call_tool( tool_name, @@ -492,7 +506,8 @@ async def process_messages(context_id: int, is_group: bool = True): bot_id=str(event.self_id), user_id=event.user_id, is_group=True, - current_preset=preset + current_preset=preset, + user_images=images_for_submodel ) else: result = await mcp_client.call_tool( @@ -501,7 +516,8 @@ async def process_messages(context_id: int, is_group: bool = True): bot_id=str(event.self_id), user_id=event.user_id, is_group=False, - current_preset=preset + current_preset=preset, + user_images=images_for_submodel ) # 处理子模型返回的结构化结果 diff --git a/nonebot_plugin_llmchat/mcpclient.py b/nonebot_plugin_llmchat/mcpclient.py index 7c846b5..2e35de7 100644 --- a/nonebot_plugin_llmchat/mcpclient.py +++ b/nonebot_plugin_llmchat/mcpclient.py @@ -195,7 +195,8 @@ class MCPClient: bot_id: str | None = None, user_id: int | None = None, is_group: bool = True, - current_preset: PresetConfig | None = None + current_preset: PresetConfig | None = None, + user_images: list[str] | None = None ): """按需连接调用工具,调用后立即断开 @@ -207,6 +208,7 @@ class MCPClient: user_id: 用户ID is_group: 是否群聊 current_preset: 当前使用的预设配置(子模型调用时必需) + user_images: 用户消息中的图片列表(base64 编码),用于子模型参考 """ # 检查是否是OneBot内置工具 if tool_name.startswith("ob__"): @@ -231,8 +233,10 @@ class MCPClient: return "子模型调用器未初始化" if not current_preset: return "子模型调用需要提供 current_preset 参数" - logger.info(f"调用子模型工具[{tool_name}]") - result = await self.submodel_caller.call_tool(tool_name, tool_args, current_preset) + logger.info(f"调用子模型工具[{tool_name}],参考图片数: {len(user_images) if user_images else 0}") + result = await self.submodel_caller.call_tool( + tool_name, tool_args, current_preset, reference_images=user_images + ) # 返回结构化结果,让上层处理 return result diff --git a/nonebot_plugin_llmchat/submodel_caller.py b/nonebot_plugin_llmchat/submodel_caller.py index df4d793..f4f84a9 100644 --- a/nonebot_plugin_llmchat/submodel_caller.py +++ b/nonebot_plugin_llmchat/submodel_caller.py @@ -99,6 +99,7 @@ class SubModelCaller: 使用说明: - 当用户要求生成图片时使用此工具 - prompt 应该是详细的图片描述,用英文效果更好 +- 如果用户消息中包含图片(发送或引用),系统会自动将这些图片作为参考传递给子模型,无需在 prompt 中描述 - 系统会自动选择最优的模型,如果失败会自动切换备选模型 - 返回结果包含 base64 编码的图片数据""", "parameters": { @@ -106,7 +107,7 @@ class SubModelCaller: "properties": { "prompt": { "type": "string", - "description": "图片生成提示词,描述要生成的图片内容,建议使用英文" + "description": "图片生成提示词,描述要生成的图片内容或对参考图片的修改要求" }, "preferred_model": { "type": "string", @@ -307,7 +308,8 @@ class SubModelCaller: self, current_preset: PresetConfig, prompt: str, - preferred_model: str | None = None + preferred_model: str | None = None, + reference_images: list[str] | None = None ) -> dict[str, Any]: """生成图片 @@ -315,6 +317,7 @@ class SubModelCaller: current_preset: 当前主模型预设 prompt: 图片生成提示词 preferred_model: 可选的指定模型名称 + reference_images: 可选的参考图片列表(base64 编码) Returns: 包含生成结果的字典: @@ -351,15 +354,37 @@ class SubModelCaller: except Exception as e: logger.debug(f"获取 MCP 工具失败: {e}") + # 构建用户消息内容 + user_content: list[dict[str, Any]] = [] + + # 添加文本提示 + user_content.append({"type": "text", "text": prompt}) + + # 如果有参考图片,添加到消息中 + if reference_images: + logger.info(f"子模型调用包含 {len(reference_images)} 张参考图片") + for img_base64 in reference_images: + # 确保格式正确 + if not img_base64.startswith("data:"): + img_base64 = f"data:image/jpeg;base64,{img_base64}" + user_content.append({ + "type": "image_url", + "image_url": {"url": img_base64} + }) + # 构建消息 + system_prompt = "你是一个图片生成助手。请根据用户的描述生成图片。直接生成图片,不需要额外解释。" + if reference_images: + system_prompt += "\n用户提供了参考图片,请根据参考图片和用户的描述来生成或修改图片。" + messages = [ { "role": "system", - "content": "你是一个图片生成助手。请根据用户的描述生成图片。直接生成图片,不需要额外解释。" + "content": system_prompt }, { "role": "user", - "content": prompt + "content": user_content if reference_images else prompt } ] @@ -493,7 +518,8 @@ class SubModelCaller: self, current_preset: PresetConfig, prompt: str, - preferred_model: str | None = None + preferred_model: str | None = None, + reference_images: list[str] | None = None ) -> dict[str, Any]: """生成视频 @@ -501,6 +527,7 @@ class SubModelCaller: current_preset: 当前主模型预设 prompt: 视频生成提示词 preferred_model: 可选的指定模型名称 + reference_images: 可选的参考图片列表(base64 编码) Returns: 包含生成结果的字典 @@ -521,14 +548,33 @@ class SubModelCaller: key=lambda p: 0 if p.name == preferred_model else 1 ) + # 构建用户消息内容 + user_content: list[dict[str, Any]] = [] + user_content.append({"type": "text", "text": prompt}) + + # 如果有参考图片,添加到消息中 + if reference_images: + logger.info(f"视频生成包含 {len(reference_images)} 张参考图片") + for img_base64 in reference_images: + if not img_base64.startswith("data:"): + img_base64 = f"data:image/jpeg;base64,{img_base64}" + user_content.append({ + "type": "image_url", + "image_url": {"url": img_base64} + }) + + system_prompt = "你是一个视频生成助手。请根据用户的描述生成视频。" + if reference_images: + system_prompt += "\n用户提供了参考图片,请根据参考图片和用户的描述来生成视频。" + messages = [ { "role": "system", - "content": "你是一个视频生成助手。请根据用户的描述生成视频。" + "content": system_prompt }, { "role": "user", - "content": prompt + "content": user_content if reference_images else prompt } ] @@ -566,7 +612,8 @@ class SubModelCaller: self, tool_name: str, tool_args: dict[str, Any], - current_preset: PresetConfig + current_preset: PresetConfig, + reference_images: list[str] | None = None ) -> dict[str, Any]: """工具调用入口 @@ -574,6 +621,7 @@ class SubModelCaller: tool_name: 工具名称 tool_args: 工具参数 current_preset: 当前主模型预设 + reference_images: 可选的参考图片列表(base64 编码),来自用户消息 Returns: 工具调用结果 @@ -582,7 +630,8 @@ class SubModelCaller: return await self.generate_image( current_preset=current_preset, prompt=tool_args.get("prompt", ""), - preferred_model=tool_args.get("preferred_model") + preferred_model=tool_args.get("preferred_model"), + reference_images=reference_images ) elif tool_name == "submodel__generate_voice": return await self.generate_voice( @@ -594,7 +643,8 @@ class SubModelCaller: return await self.generate_video( current_preset=current_preset, prompt=tool_args.get("prompt", ""), - preferred_model=tool_args.get("preferred_model") + preferred_model=tool_args.get("preferred_model"), + reference_images=reference_images ) else: return {