1.添加requirement2.更新一个新闻播报ai
This commit is contained in:
417
Plugins/Others/NewsReport.py
Normal file
417
Plugins/Others/NewsReport.py
Normal file
@@ -0,0 +1,417 @@
|
|||||||
|
from Plugins.WPSAPI import *
|
||||||
|
import httpx
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from llama_index.core.tools import FunctionTool
|
||||||
|
from llama_index.llms.ollama import Ollama
|
||||||
|
from llama_index.core.agent.workflow import AgentWorkflow
|
||||||
|
from llama_index.core.agent.workflow.react_agent import ReActAgent
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
|
||||||
|
logger: ProjectConfig = Architecture.Get(ProjectConfig)
|
||||||
|
OLLAMA_URL = logger.FindItem("ollama_url", "http://ollama.liubai.site")
|
||||||
|
OLLAMA_MODEL = logger.FindItem("ollama_model", "qwen2.5:7b")
|
||||||
|
logger.SaveProperties()
|
||||||
|
|
||||||
|
def get_target_web_page_url(year:str, month:str, day:str) -> str:
|
||||||
|
return fr"http://mrxwlb.com/{year}/{month}/{day}/{year}年{month}月{day}日新闻联播文字版/"
|
||||||
|
|
||||||
|
class NewsAIAgent:
|
||||||
|
"""新闻AI智能体 - 基于LlamaIndex和Ollama的工具调用智能体"""
|
||||||
|
|
||||||
|
def __init__(self, ollama_url: str = OLLAMA_URL):
|
||||||
|
self.ollama_url = ollama_url
|
||||||
|
self.client: Optional[httpx.AsyncClient] = None
|
||||||
|
self.llm = Ollama(model=OLLAMA_MODEL, base_url=ollama_url, request_timeout=600.0)
|
||||||
|
self.workflow: Optional[AgentWorkflow] = None
|
||||||
|
self._initialize_agent()
|
||||||
|
|
||||||
|
def _initialize_agent(self):
|
||||||
|
"""初始化智能体和工具"""
|
||||||
|
# 创建工具函数
|
||||||
|
tools = [
|
||||||
|
FunctionTool.from_defaults(
|
||||||
|
fn=self._get_current_date,
|
||||||
|
name="get_current_date",
|
||||||
|
description="获取当前日期,返回格式为'年-月-日',例如'2025-11-19'"
|
||||||
|
),
|
||||||
|
#FunctionTool.from_defaults(
|
||||||
|
# fn=self._get_yesterday_date,
|
||||||
|
# name="get_yesterday_date",
|
||||||
|
# description="获取昨天的日期,返回格式为'年-月-日',例如'2025-11-18'"
|
||||||
|
#),
|
||||||
|
FunctionTool.from_defaults(
|
||||||
|
fn=self._get_news_content,
|
||||||
|
name="get_news_content",
|
||||||
|
description="获取指定日期的新闻联播文字内容。参数date格式为'年-月-日',例如'2025-11-19'。返回该日期的新闻内容文本。"
|
||||||
|
),
|
||||||
|
#FunctionTool.from_defaults(
|
||||||
|
# fn=self._parse_date_from_text,
|
||||||
|
# name="parse_date_from_text",
|
||||||
|
# description="从文本中解析日期。支持格式:'2025年11月19日'、'2025-11-19'、'2025/11/19'、'今天'、'昨天'等。返回格式为'年-月-日'"
|
||||||
|
#),
|
||||||
|
]
|
||||||
|
|
||||||
|
# 系统提示词 - 更清晰明确的指令
|
||||||
|
system_prompt = """你是一个新闻分析助手,专门回答关于新闻联播的问题。
|
||||||
|
|
||||||
|
【重要】你必须按照以下步骤使用工具:
|
||||||
|
|
||||||
|
步骤1: 确定日期
|
||||||
|
- 如果用户问"今天"或"今日",调用 get_current_date
|
||||||
|
- 如果用户问"昨天"或"昨日",调用 get_yesterday_date
|
||||||
|
- 如果用户提到具体日期(如"2025年11月17日"),调用 parse_date_from_text
|
||||||
|
|
||||||
|
步骤2: 获取新闻内容
|
||||||
|
- 拿到日期后,必须调用 get_news_content(date="YYYY-MM-DD") 获取新闻
|
||||||
|
- 注意:date参数格式必须是 "年-月-日",例如 "2025-11-19"
|
||||||
|
|
||||||
|
步骤3: 回答问题
|
||||||
|
- 仔细阅读获取到的新闻内容
|
||||||
|
- 基于新闻内容准确回答用户的问题
|
||||||
|
- 如果新闻中没有相关信息,明确告知用户
|
||||||
|
|
||||||
|
【禁止】直接回答问题而不调用工具!你必须先获取新闻内容才能回答。"""
|
||||||
|
|
||||||
|
# 创建ReActAgent
|
||||||
|
agent = ReActAgent(
|
||||||
|
llm=self.llm,
|
||||||
|
tools=tools,
|
||||||
|
verbose=True,
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 创建AgentWorkflow
|
||||||
|
self.workflow = AgentWorkflow(
|
||||||
|
agents=[agent],
|
||||||
|
timeout=600.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _get_client(self) -> httpx.AsyncClient:
|
||||||
|
"""获取HTTP客户端(懒加载)"""
|
||||||
|
if self.client is None:
|
||||||
|
self.client = httpx.AsyncClient(timeout=120.0)
|
||||||
|
return self.client
|
||||||
|
|
||||||
|
def _get_current_date(self) -> str:
|
||||||
|
"""获取当前日期
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
当前日期字符串,格式:年-月-日
|
||||||
|
"""
|
||||||
|
now = datetime.now()
|
||||||
|
return f"{now.year}-{str(now.month).zfill(2)}-{str(now.day).zfill(2)}"
|
||||||
|
|
||||||
|
def _get_yesterday_date(self) -> str:
|
||||||
|
"""获取昨天日期
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
昨天日期字符串,格式:年-月-日
|
||||||
|
"""
|
||||||
|
yesterday = datetime.now() - timedelta(days=1)
|
||||||
|
return f"{yesterday.year}-{str(yesterday.month).zfill(2)}-{str(yesterday.day).zfill(2)}"
|
||||||
|
|
||||||
|
def _parse_date_from_text(self, text: str) -> str:
|
||||||
|
"""从文本中解析日期
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: 包含日期信息的文本
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
日期字符串,格式:年-月-日
|
||||||
|
"""
|
||||||
|
# 尝试匹配日期格式
|
||||||
|
date_patterns = [
|
||||||
|
r'(\d{4})年(\d{1,2})月(\d{1,2})日',
|
||||||
|
r'(\d{4})-(\d{1,2})-(\d{1,2})',
|
||||||
|
r'(\d{4})/(\d{1,2})/(\d{1,2})',
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in date_patterns:
|
||||||
|
match = re.search(pattern, text)
|
||||||
|
if match:
|
||||||
|
year, month, day = match.groups()
|
||||||
|
return f"{year}-{month.zfill(2)}-{day.zfill(2)}"
|
||||||
|
|
||||||
|
# 检查相对日期
|
||||||
|
if "今天" in text or "今日" in text:
|
||||||
|
return self._get_current_date()
|
||||||
|
elif "昨天" in text or "昨日" in text:
|
||||||
|
return self._get_yesterday_date()
|
||||||
|
|
||||||
|
# 默认返回今天
|
||||||
|
return self._get_current_date()
|
||||||
|
|
||||||
|
def _get_news_content(self, date: str) -> str:
|
||||||
|
"""获取指定日期的新闻内容(带缓存)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
date: 日期字符串,格式:年-月-日,例如'2025-11-19'
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
新闻文字内容
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 解析日期
|
||||||
|
parts = date.split('-')
|
||||||
|
if len(parts) != 3:
|
||||||
|
return f"日期格式错误,请使用'年-月-日'格式,例如'2025-11-19'"
|
||||||
|
|
||||||
|
year, month, day = parts
|
||||||
|
# 去掉前导零(某些网站URL格式要求)
|
||||||
|
month = str(int(month))
|
||||||
|
day = str(int(day))
|
||||||
|
|
||||||
|
# 检查缓存
|
||||||
|
cache_key = f"news_cache/{year}/{month}/{day}"
|
||||||
|
cached_file = ProjectConfig().GetFile(cache_key, False)
|
||||||
|
|
||||||
|
if cached_file.Exists():
|
||||||
|
cached_content = cached_file.LoadAsText()
|
||||||
|
logger.Log("Info", f"从缓存加载新闻: {date}")
|
||||||
|
return cached_content
|
||||||
|
|
||||||
|
# 如果没有缓存,则抓取网页
|
||||||
|
logger.Log("Info", f"从网页抓取新闻: {date}")
|
||||||
|
# url = get_target_web_page_url(year, month, day)
|
||||||
|
url = ToolURL(get_target_web_page_url(year, month, day))
|
||||||
|
|
||||||
|
# 添加浏览器请求头,模拟真实浏览器访问
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36',
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||||
|
'Accept-Encoding': 'gzip, deflate',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Upgrade-Insecure-Requests': '1',
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Log("Info", f"请求URL: {url}")
|
||||||
|
response = requests.get(url, headers=headers, timeout=30, allow_redirects=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# 使用BeautifulSoup解析HTML
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
# 移除script和style标签
|
||||||
|
for script in soup(["script", "style"]):
|
||||||
|
script.decompose()
|
||||||
|
|
||||||
|
# 获取文本内容
|
||||||
|
content = soup.get_text()
|
||||||
|
# 清理空白字符
|
||||||
|
lines = (line.strip() for line in content.splitlines())
|
||||||
|
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||||
|
content = ' '.join(chunk for chunk in chunks if chunk)
|
||||||
|
|
||||||
|
if not content or len(content) < 100:
|
||||||
|
return f"未能获取到{year}年{month}月{day}日的新闻内容"
|
||||||
|
|
||||||
|
# 保存到缓存
|
||||||
|
ProjectConfig().GetFile(cache_key, True).SaveAsText(content)
|
||||||
|
logger.Log("Info", f"新闻内容已缓存: {date}")
|
||||||
|
|
||||||
|
return content
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.Log("Error", f"获取新闻失败: {e}")
|
||||||
|
return f"获取新闻时出错: {str(e)}"
|
||||||
|
|
||||||
|
async def answer_question(self, query: str) -> str:
|
||||||
|
"""根据问题回答新闻内容
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: 用户问题
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
AI生成的回答
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
logger.Log("Info", f"="*50)
|
||||||
|
logger.Log("Info", f"用户提问: {query}")
|
||||||
|
logger.Log("Info", f"使用模型: {self.llm.model}")
|
||||||
|
logger.Log("Info", f"="*50)
|
||||||
|
|
||||||
|
# 使用workflow运行agent(注意参数是user_msg)
|
||||||
|
result = await self.workflow.run(user_msg=query)
|
||||||
|
|
||||||
|
logger.Log("Info", f"Agent执行完成,结果类型: {type(result)}")
|
||||||
|
|
||||||
|
# 提取回答内容
|
||||||
|
if hasattr(result, 'response'):
|
||||||
|
answer = str(result.response)
|
||||||
|
logger.Log("Info", f"从result.response提取答案")
|
||||||
|
elif hasattr(result, 'message'):
|
||||||
|
answer = str(result.message)
|
||||||
|
logger.Log("Info", f"从result.message提取答案")
|
||||||
|
else:
|
||||||
|
answer = str(result)
|
||||||
|
logger.Log("Info", f"直接转换result为字符串")
|
||||||
|
|
||||||
|
logger.Log("Info", f"最终答案长度: {len(answer)} 字符")
|
||||||
|
|
||||||
|
return answer
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.Log("Error", f"回答问题失败: {e}")
|
||||||
|
import traceback
|
||||||
|
error_trace = traceback.format_exc()
|
||||||
|
logger.Log("Error", f"详细错误:\n{error_trace}")
|
||||||
|
return f"处理问题时出错: {str(e)}"
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
"""关闭客户端"""
|
||||||
|
if self.client:
|
||||||
|
await self.client.aclose()
|
||||||
|
self.client = None
|
||||||
|
|
||||||
|
class NewsAIPlugin(WPSAPI):
|
||||||
|
"""新闻AI智能问答插件"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.ai_agent = NewsAIAgent(OLLAMA_URL)
|
||||||
|
|
||||||
|
@override
|
||||||
|
def dependencies(self) -> List[Type]:
|
||||||
|
return [WPSAPI]
|
||||||
|
|
||||||
|
@override
|
||||||
|
def is_enable_plugin(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get_guide_title(self) -> str:
|
||||||
|
return "新闻AI智能问答"
|
||||||
|
|
||||||
|
def get_guide_subtitle(self) -> str:
|
||||||
|
return "基于AI的新闻联播智能问答系统"
|
||||||
|
|
||||||
|
def get_guide_metadata(self) -> Dict[str, str]:
|
||||||
|
return {
|
||||||
|
"AI模型": "qwen3:0.6b",
|
||||||
|
"数据源": "每日新闻联播",
|
||||||
|
"功能": "智能问答",
|
||||||
|
}
|
||||||
|
|
||||||
|
def collect_command_entries(self) -> Sequence[GuideEntry]:
|
||||||
|
return (
|
||||||
|
{
|
||||||
|
"title": "新闻",
|
||||||
|
"identifier": "ask_news",
|
||||||
|
"description": "询问新闻内容,支持自动识别日期或查询今天/昨天的新闻。",
|
||||||
|
"metadata": {"别名": "news"},
|
||||||
|
"icon": "🤖",
|
||||||
|
"badge": "AI",
|
||||||
|
"details": [
|
||||||
|
{
|
||||||
|
"type": "list",
|
||||||
|
"items": [
|
||||||
|
"支持日期格式:2024年11月19日、2024-11-19、2024/11/19",
|
||||||
|
"支持相对日期:今天、昨天、今日、昨日",
|
||||||
|
"自动使用AI分析新闻内容并回答问题",
|
||||||
|
"示例:问 今天有什么重要新闻?",
|
||||||
|
"示例:问 2024年11月19日 经济相关的新闻有哪些?",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
#{
|
||||||
|
# "title": "新闻摘要",
|
||||||
|
# "identifier": "news_summary",
|
||||||
|
# "description": "获取指定日期新闻的AI摘要。",
|
||||||
|
# "metadata": {"别名": "摘要"},
|
||||||
|
# "icon": "📝",
|
||||||
|
# "badge": "AI",
|
||||||
|
#},
|
||||||
|
)
|
||||||
|
|
||||||
|
def collect_guide_entries(self) -> Sequence[GuideEntry]:
|
||||||
|
return (
|
||||||
|
{
|
||||||
|
"title": "智能问答",
|
||||||
|
"description": (
|
||||||
|
"使用AI理解用户问题,从新闻内容中提取相关信息进行回答。"
|
||||||
|
"支持自然语言提问,可以询问特定主题、人物、事件等。"
|
||||||
|
),
|
||||||
|
"icon": "💬",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "日期识别",
|
||||||
|
"description": (
|
||||||
|
"自动从问题中识别日期,支持多种格式。"
|
||||||
|
"如果未指定日期,默认查询当天新闻。"
|
||||||
|
),
|
||||||
|
"icon": "📅",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "内容抓取",
|
||||||
|
"description": (
|
||||||
|
"自动从新闻网站抓取指定日期的新闻联播文字版内容。"
|
||||||
|
),
|
||||||
|
"icon": "🌐",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
@override
|
||||||
|
def wake_up(self) -> None:
|
||||||
|
logger.Log("Info", f"{ConsoleFrontColor.GREEN}NewsAIPlugin 新闻AI智能问答插件已加载{ConsoleFrontColor.RESET}")
|
||||||
|
self.register_plugin("news")
|
||||||
|
self.register_plugin("新闻")
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def callback(self, message: str, chat_id: int, user_id: int) -> str|None:
|
||||||
|
"""处理用户问题"""
|
||||||
|
try:
|
||||||
|
if not message or message.strip() == "":
|
||||||
|
help_text = """# 📰 新闻AI智能问答使用帮助
|
||||||
|
|
||||||
|
**直接提问即可,智能体会自动:**
|
||||||
|
1. 识别你想查询的日期
|
||||||
|
2. 获取对应日期的新闻内容
|
||||||
|
3. 基于新闻内容回答你的问题
|
||||||
|
|
||||||
|
**支持的日期格式:**
|
||||||
|
- 今天、昨天、今日、昨日
|
||||||
|
- 2025年11月19日
|
||||||
|
- 2025-11-19
|
||||||
|
- 2025/11/19
|
||||||
|
|
||||||
|
**示例问题:**
|
||||||
|
- `今天有什么重要新闻?`
|
||||||
|
- `2025年11月17日有什么新闻?`
|
||||||
|
- `昨天的新闻中有关于经济的内容吗?`
|
||||||
|
- `今天习近平主席有什么活动?`
|
||||||
|
- `请总结今天新闻联播的主要内容`
|
||||||
|
|
||||||
|
**特性:**
|
||||||
|
✨ 智能理解问题意图
|
||||||
|
🤖 自动调用工具获取信息
|
||||||
|
💾 新闻内容自动缓存
|
||||||
|
🚀 基于LlamaIndex + Ollama"""
|
||||||
|
return await self.send_markdown_message(help_text, chat_id, user_id)
|
||||||
|
|
||||||
|
# 使用智能体回答问题
|
||||||
|
answer = await self.ai_agent.answer_question(message)
|
||||||
|
|
||||||
|
# 格式化返回结果
|
||||||
|
formatted_answer = f"""📰 **新闻AI智能问答**
|
||||||
|
|
||||||
|
{answer}
|
||||||
|
|
||||||
|
---
|
||||||
|
*由 LlamaIndex + Ollama 驱动*"""
|
||||||
|
|
||||||
|
return await self.send_markdown_message(formatted_answer, chat_id, user_id)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.Log("Error", f"新闻AI问答异常: {e}")
|
||||||
|
import traceback
|
||||||
|
error_detail = traceback.format_exc()
|
||||||
|
logger.Log("Error", f"详细错误: {error_detail}")
|
||||||
|
error_msg = f"""❌ **处理问题时出错**
|
||||||
|
|
||||||
|
错误信息:{str(e)}
|
||||||
|
|
||||||
|
请稍后重试或联系管理员。"""
|
||||||
|
return await self.send_markdown_message(error_msg, chat_id, user_id)
|
||||||
0
Plugins/Others/__init__.py
Normal file
0
Plugins/Others/__init__.py
Normal file
@@ -7,7 +7,6 @@ from PWF.Convention.Runtime.Web import ToolURL
|
|||||||
from PWF.Convention.Runtime.String import LimitStringLength
|
from PWF.Convention.Runtime.String import LimitStringLength
|
||||||
from fastapi.responses import HTMLResponse
|
from fastapi.responses import HTMLResponse
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, TypedDict, override, Union
|
|
||||||
import httpx
|
import httpx
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|||||||
18
requirements.txt
Normal file
18
requirements.txt
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Web框架和服务器
|
||||||
|
fastapi
|
||||||
|
uvicorn
|
||||||
|
|
||||||
|
# HTTP客户端
|
||||||
|
httpx
|
||||||
|
requests
|
||||||
|
|
||||||
|
# 数据验证
|
||||||
|
pydantic
|
||||||
|
|
||||||
|
# HTML解析
|
||||||
|
beautifulsoup4
|
||||||
|
|
||||||
|
# AI框架 (LlamaIndex)
|
||||||
|
llama-index-core
|
||||||
|
llama-index-llms-ollama
|
||||||
|
|
||||||
Reference in New Issue
Block a user