mirror of
https://github.com/FuQuan233/nonebot-plugin-llmchat.git
synced 2026-05-12 19:42:50 +00:00
✨ 发送图片可参考
This commit is contained in:
parent
25a9cd0d76
commit
7e87981167
3 changed files with 86 additions and 16 deletions
|
|
@ -418,6 +418,9 @@ async def process_messages(context_id: int, is_group: bool = True):
|
||||||
|
|
||||||
content: list[ChatCompletionContentPartParam] = []
|
content: list[ChatCompletionContentPartParam] = []
|
||||||
|
|
||||||
|
# 收集用户消息中的图片(用于传递给子模型作为参考)
|
||||||
|
user_message_images: list[str] = []
|
||||||
|
|
||||||
# 将机器人错过的消息推送给LLM
|
# 将机器人错过的消息推送给LLM
|
||||||
past_events_snapshot = list(state.past_events)
|
past_events_snapshot = list(state.past_events)
|
||||||
state.past_events.clear()
|
state.past_events.clear()
|
||||||
|
|
@ -426,11 +429,19 @@ async def process_messages(context_id: int, is_group: bool = True):
|
||||||
content.append({"type": "text", "text": text_content})
|
content.append({"type": "text", "text": text_content})
|
||||||
|
|
||||||
# 将消息中的图片转成 base64
|
# 将消息中的图片转成 base64
|
||||||
|
base64_images = await process_images(ev)
|
||||||
|
|
||||||
|
# 收集图片用于子模型调用
|
||||||
|
user_message_images.extend(base64_images)
|
||||||
|
|
||||||
|
# 如果主模型支持图片输入,也传递给主模型
|
||||||
if preset.support_image:
|
if preset.support_image:
|
||||||
base64_images = await process_images(ev)
|
|
||||||
for base64_image in base64_images:
|
for base64_image in base64_images:
|
||||||
content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}})
|
content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}})
|
||||||
|
|
||||||
|
if user_message_images:
|
||||||
|
logger.info(f"用户消息中包含 {len(user_message_images)} 张图片,将用于子模型调用")
|
||||||
|
|
||||||
new_messages: list[ChatCompletionMessageParam] = [
|
new_messages: list[ChatCompletionMessageParam] = [
|
||||||
{"role": "user", "content": content}
|
{"role": "user", "content": content}
|
||||||
]
|
]
|
||||||
|
|
@ -484,6 +495,9 @@ async def process_messages(context_id: int, is_group: bool = True):
|
||||||
# 发送工具调用提示
|
# 发送工具调用提示
|
||||||
await handler.send(Message(f"正在使用{mcp_client.get_friendly_name(tool_name)}"))
|
await handler.send(Message(f"正在使用{mcp_client.get_friendly_name(tool_name)}"))
|
||||||
|
|
||||||
|
# 对于子模型调用,传递用户消息中的图片作为参考
|
||||||
|
images_for_submodel = user_message_images if tool_name.startswith("submodel__") else None
|
||||||
|
|
||||||
if is_group:
|
if is_group:
|
||||||
result = await mcp_client.call_tool(
|
result = await mcp_client.call_tool(
|
||||||
tool_name,
|
tool_name,
|
||||||
|
|
@ -492,7 +506,8 @@ async def process_messages(context_id: int, is_group: bool = True):
|
||||||
bot_id=str(event.self_id),
|
bot_id=str(event.self_id),
|
||||||
user_id=event.user_id,
|
user_id=event.user_id,
|
||||||
is_group=True,
|
is_group=True,
|
||||||
current_preset=preset
|
current_preset=preset,
|
||||||
|
user_images=images_for_submodel
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
result = await mcp_client.call_tool(
|
result = await mcp_client.call_tool(
|
||||||
|
|
@ -501,7 +516,8 @@ async def process_messages(context_id: int, is_group: bool = True):
|
||||||
bot_id=str(event.self_id),
|
bot_id=str(event.self_id),
|
||||||
user_id=event.user_id,
|
user_id=event.user_id,
|
||||||
is_group=False,
|
is_group=False,
|
||||||
current_preset=preset
|
current_preset=preset,
|
||||||
|
user_images=images_for_submodel
|
||||||
)
|
)
|
||||||
|
|
||||||
# 处理子模型返回的结构化结果
|
# 处理子模型返回的结构化结果
|
||||||
|
|
|
||||||
|
|
@ -195,7 +195,8 @@ class MCPClient:
|
||||||
bot_id: str | None = None,
|
bot_id: str | None = None,
|
||||||
user_id: int | None = None,
|
user_id: int | None = None,
|
||||||
is_group: bool = True,
|
is_group: bool = True,
|
||||||
current_preset: PresetConfig | None = None
|
current_preset: PresetConfig | None = None,
|
||||||
|
user_images: list[str] | None = None
|
||||||
):
|
):
|
||||||
"""按需连接调用工具,调用后立即断开
|
"""按需连接调用工具,调用后立即断开
|
||||||
|
|
||||||
|
|
@ -207,6 +208,7 @@ class MCPClient:
|
||||||
user_id: 用户ID
|
user_id: 用户ID
|
||||||
is_group: 是否群聊
|
is_group: 是否群聊
|
||||||
current_preset: 当前使用的预设配置(子模型调用时必需)
|
current_preset: 当前使用的预设配置(子模型调用时必需)
|
||||||
|
user_images: 用户消息中的图片列表(base64 编码),用于子模型参考
|
||||||
"""
|
"""
|
||||||
# 检查是否是OneBot内置工具
|
# 检查是否是OneBot内置工具
|
||||||
if tool_name.startswith("ob__"):
|
if tool_name.startswith("ob__"):
|
||||||
|
|
@ -231,8 +233,10 @@ class MCPClient:
|
||||||
return "子模型调用器未初始化"
|
return "子模型调用器未初始化"
|
||||||
if not current_preset:
|
if not current_preset:
|
||||||
return "子模型调用需要提供 current_preset 参数"
|
return "子模型调用需要提供 current_preset 参数"
|
||||||
logger.info(f"调用子模型工具[{tool_name}]")
|
logger.info(f"调用子模型工具[{tool_name}],参考图片数: {len(user_images) if user_images else 0}")
|
||||||
result = await self.submodel_caller.call_tool(tool_name, tool_args, current_preset)
|
result = await self.submodel_caller.call_tool(
|
||||||
|
tool_name, tool_args, current_preset, reference_images=user_images
|
||||||
|
)
|
||||||
# 返回结构化结果,让上层处理
|
# 返回结构化结果,让上层处理
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -99,6 +99,7 @@ class SubModelCaller:
|
||||||
使用说明:
|
使用说明:
|
||||||
- 当用户要求生成图片时使用此工具
|
- 当用户要求生成图片时使用此工具
|
||||||
- prompt 应该是详细的图片描述,用英文效果更好
|
- prompt 应该是详细的图片描述,用英文效果更好
|
||||||
|
- 如果用户消息中包含图片(发送或引用),系统会自动将这些图片作为参考传递给子模型,无需在 prompt 中描述
|
||||||
- 系统会自动选择最优的模型,如果失败会自动切换备选模型
|
- 系统会自动选择最优的模型,如果失败会自动切换备选模型
|
||||||
- 返回结果包含 base64 编码的图片数据""",
|
- 返回结果包含 base64 编码的图片数据""",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
|
|
@ -106,7 +107,7 @@ class SubModelCaller:
|
||||||
"properties": {
|
"properties": {
|
||||||
"prompt": {
|
"prompt": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "图片生成提示词,描述要生成的图片内容,建议使用英文"
|
"description": "图片生成提示词,描述要生成的图片内容或对参考图片的修改要求"
|
||||||
},
|
},
|
||||||
"preferred_model": {
|
"preferred_model": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
|
@ -307,7 +308,8 @@ class SubModelCaller:
|
||||||
self,
|
self,
|
||||||
current_preset: PresetConfig,
|
current_preset: PresetConfig,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
preferred_model: str | None = None
|
preferred_model: str | None = None,
|
||||||
|
reference_images: list[str] | None = None
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""生成图片
|
"""生成图片
|
||||||
|
|
||||||
|
|
@ -315,6 +317,7 @@ class SubModelCaller:
|
||||||
current_preset: 当前主模型预设
|
current_preset: 当前主模型预设
|
||||||
prompt: 图片生成提示词
|
prompt: 图片生成提示词
|
||||||
preferred_model: 可选的指定模型名称
|
preferred_model: 可选的指定模型名称
|
||||||
|
reference_images: 可选的参考图片列表(base64 编码)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
包含生成结果的字典:
|
包含生成结果的字典:
|
||||||
|
|
@ -351,15 +354,37 @@ class SubModelCaller:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"获取 MCP 工具失败: {e}")
|
logger.debug(f"获取 MCP 工具失败: {e}")
|
||||||
|
|
||||||
|
# 构建用户消息内容
|
||||||
|
user_content: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
# 添加文本提示
|
||||||
|
user_content.append({"type": "text", "text": prompt})
|
||||||
|
|
||||||
|
# 如果有参考图片,添加到消息中
|
||||||
|
if reference_images:
|
||||||
|
logger.info(f"子模型调用包含 {len(reference_images)} 张参考图片")
|
||||||
|
for img_base64 in reference_images:
|
||||||
|
# 确保格式正确
|
||||||
|
if not img_base64.startswith("data:"):
|
||||||
|
img_base64 = f"data:image/jpeg;base64,{img_base64}"
|
||||||
|
user_content.append({
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": img_base64}
|
||||||
|
})
|
||||||
|
|
||||||
# 构建消息
|
# 构建消息
|
||||||
|
system_prompt = "你是一个图片生成助手。请根据用户的描述生成图片。直接生成图片,不需要额外解释。"
|
||||||
|
if reference_images:
|
||||||
|
system_prompt += "\n用户提供了参考图片,请根据参考图片和用户的描述来生成或修改图片。"
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": "你是一个图片生成助手。请根据用户的描述生成图片。直接生成图片,不需要额外解释。"
|
"content": system_prompt
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": prompt
|
"content": user_content if reference_images else prompt
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -493,7 +518,8 @@ class SubModelCaller:
|
||||||
self,
|
self,
|
||||||
current_preset: PresetConfig,
|
current_preset: PresetConfig,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
preferred_model: str | None = None
|
preferred_model: str | None = None,
|
||||||
|
reference_images: list[str] | None = None
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""生成视频
|
"""生成视频
|
||||||
|
|
||||||
|
|
@ -501,6 +527,7 @@ class SubModelCaller:
|
||||||
current_preset: 当前主模型预设
|
current_preset: 当前主模型预设
|
||||||
prompt: 视频生成提示词
|
prompt: 视频生成提示词
|
||||||
preferred_model: 可选的指定模型名称
|
preferred_model: 可选的指定模型名称
|
||||||
|
reference_images: 可选的参考图片列表(base64 编码)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
包含生成结果的字典
|
包含生成结果的字典
|
||||||
|
|
@ -521,14 +548,33 @@ class SubModelCaller:
|
||||||
key=lambda p: 0 if p.name == preferred_model else 1
|
key=lambda p: 0 if p.name == preferred_model else 1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 构建用户消息内容
|
||||||
|
user_content: list[dict[str, Any]] = []
|
||||||
|
user_content.append({"type": "text", "text": prompt})
|
||||||
|
|
||||||
|
# 如果有参考图片,添加到消息中
|
||||||
|
if reference_images:
|
||||||
|
logger.info(f"视频生成包含 {len(reference_images)} 张参考图片")
|
||||||
|
for img_base64 in reference_images:
|
||||||
|
if not img_base64.startswith("data:"):
|
||||||
|
img_base64 = f"data:image/jpeg;base64,{img_base64}"
|
||||||
|
user_content.append({
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": img_base64}
|
||||||
|
})
|
||||||
|
|
||||||
|
system_prompt = "你是一个视频生成助手。请根据用户的描述生成视频。"
|
||||||
|
if reference_images:
|
||||||
|
system_prompt += "\n用户提供了参考图片,请根据参考图片和用户的描述来生成视频。"
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": "你是一个视频生成助手。请根据用户的描述生成视频。"
|
"content": system_prompt
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": prompt
|
"content": user_content if reference_images else prompt
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -566,7 +612,8 @@ class SubModelCaller:
|
||||||
self,
|
self,
|
||||||
tool_name: str,
|
tool_name: str,
|
||||||
tool_args: dict[str, Any],
|
tool_args: dict[str, Any],
|
||||||
current_preset: PresetConfig
|
current_preset: PresetConfig,
|
||||||
|
reference_images: list[str] | None = None
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""工具调用入口
|
"""工具调用入口
|
||||||
|
|
||||||
|
|
@ -574,6 +621,7 @@ class SubModelCaller:
|
||||||
tool_name: 工具名称
|
tool_name: 工具名称
|
||||||
tool_args: 工具参数
|
tool_args: 工具参数
|
||||||
current_preset: 当前主模型预设
|
current_preset: 当前主模型预设
|
||||||
|
reference_images: 可选的参考图片列表(base64 编码),来自用户消息
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
工具调用结果
|
工具调用结果
|
||||||
|
|
@ -582,7 +630,8 @@ class SubModelCaller:
|
||||||
return await self.generate_image(
|
return await self.generate_image(
|
||||||
current_preset=current_preset,
|
current_preset=current_preset,
|
||||||
prompt=tool_args.get("prompt", ""),
|
prompt=tool_args.get("prompt", ""),
|
||||||
preferred_model=tool_args.get("preferred_model")
|
preferred_model=tool_args.get("preferred_model"),
|
||||||
|
reference_images=reference_images
|
||||||
)
|
)
|
||||||
elif tool_name == "submodel__generate_voice":
|
elif tool_name == "submodel__generate_voice":
|
||||||
return await self.generate_voice(
|
return await self.generate_voice(
|
||||||
|
|
@ -594,7 +643,8 @@ class SubModelCaller:
|
||||||
return await self.generate_video(
|
return await self.generate_video(
|
||||||
current_preset=current_preset,
|
current_preset=current_preset,
|
||||||
prompt=tool_args.get("prompt", ""),
|
prompt=tool_args.get("prompt", ""),
|
||||||
preferred_model=tool_args.get("preferred_model")
|
preferred_model=tool_args.get("preferred_model"),
|
||||||
|
reference_images=reference_images
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue