{title}{badge_html}
+ +{description}
+ {metadata_html} + {tags_html} + {details_html} + {links_html} +From 84671c5e6b96c9be8e67e4914fb1090b8baea398 Mon Sep 17 00:00:00 2001 From: ninemine <1371605831@qq.com> Date: Wed, 19 Nov 2025 11:52:24 +0800 Subject: [PATCH] =?UTF-8?q?=E9=A6=96=E4=B8=AA=E5=8F=AF=E8=BF=90=E8=A1=8C?= =?UTF-8?q?=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitattributes | 2 + .gitignore | 188 ++++++ .gitmodules | 4 + PWF | 1 + Plugins/WPSAPI.py | 1491 +++++++++++++++++++++++++++++++++++++++++++ Plugins/__init__.py | 0 app.py | 3 + requirements.txt | 10 + 8 files changed, 1699 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 .gitmodules create mode 160000 PWF create mode 100644 Plugins/WPSAPI.py create mode 100644 Plugins/__init__.py create mode 100644 app.py create mode 100644 requirements.txt diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..dfe0770 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0e7828f --- /dev/null +++ b/.gitignore @@ -0,0 +1,188 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor.`.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore +# IDE +.vscode/ + +# Database +Assets/db.db +liubai_web.pid +Assets/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..8ba6f4c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "PWF"] + path = PWF + url = http://www.liubai.site:3000/ninemine/PWF.git + branch = main diff --git a/PWF b/PWF new file mode 160000 index 0000000..89de330 --- /dev/null +++ b/PWF @@ -0,0 +1 @@ +Subproject commit 89de330e2d102d59c6732b017fee7ec8d04bc186 diff --git a/Plugins/WPSAPI.py b/Plugins/WPSAPI.py new file mode 100644 index 0000000..5c3dd50 --- /dev/null +++ b/Plugins/WPSAPI.py @@ -0,0 +1,1491 @@ +from PWF.Convention.Runtime.Config import * +from PWF.CoreModules.plugin_interface import PluginInterface +from PWF.CoreModules.flags import * +from PWF.Convention.Runtime.Architecture import Architecture +from PWF.Convention.Runtime.GlobalConfig import ProjectConfig +from PWF.Convention.Runtime.Web import ToolURL +from PWF.Convention.Runtime.String import LimitStringLength +from fastapi.responses import HTMLResponse +from dataclasses import dataclass, field +from typing import Any, Dict, Iterable, List, Optional, Sequence, TypedDict, override, Union +import httpx +import re +from datetime import datetime, timedelta +from llama_index.core.tools import FunctionTool +from llama_index.llms.ollama import Ollama +from llama_index.core.agent.workflow import AgentWorkflow +from llama_index.core.agent.workflow.react_agent import ReActAgent +from bs4 import BeautifulSoup + +logger: ProjectConfig = Architecture.Get(ProjectConfig) +MAIN_WEBHOOK_URL = logger.FindItem("main_webhook_url", "") +logger.SaveProperties() + +def get_target_web_page_url(year:str, month:str, day:str) -> str: + return fr"http://mrxwlb.com/{year}/{month}/{day}/{year}年{month}月{day}日新闻联播文字版/" + +ollama_url = "http://www.liubai.site:11434" + +class NewsAIAgent: + """新闻AI智能体 - 基于LlamaIndex和Ollama的工具调用智能体""" + + def __init__(self, ollama_url: str = "http://www.liubai.site:11434"): + self.ollama_url = ollama_url + self.client: Optional[httpx.AsyncClient] = None + self.llm = Ollama(model="qwen2.5:7b", base_url=ollama_url, request_timeout=600.0) + self.workflow: Optional[AgentWorkflow] = None + self._initialize_agent() + + def _initialize_agent(self): + """初始化智能体和工具""" + # 创建工具函数 + tools = [ + FunctionTool.from_defaults( + fn=self._get_current_date, + name="get_current_date", + description="获取当前日期,返回格式为'年-月-日',例如'2025-11-19'" + ), + FunctionTool.from_defaults( + fn=self._get_yesterday_date, + name="get_yesterday_date", + description="获取昨天的日期,返回格式为'年-月-日',例如'2025-11-18'" + ), + FunctionTool.from_defaults( + fn=self._get_news_content, + name="get_news_content", + description="获取指定日期的新闻联播文字内容。参数date格式为'年-月-日',例如'2025-11-19'。返回该日期的新闻内容文本。" + ), + FunctionTool.from_defaults( + fn=self._parse_date_from_text, + name="parse_date_from_text", + description="从文本中解析日期。支持格式:'2025年11月19日'、'2025-11-19'、'2025/11/19'、'今天'、'昨天'等。返回格式为'年-月-日'" + ), + ] + + # 系统提示词 - 更清晰明确的指令 + system_prompt = """你是一个新闻分析助手,专门回答关于新闻联播的问题。 + +【重要】你必须按照以下步骤使用工具: + +步骤1: 确定日期 +- 如果用户问"今天"或"今日",调用 get_current_date +- 如果用户问"昨天"或"昨日",调用 get_yesterday_date +- 如果用户提到具体日期(如"2025年11月17日"),调用 parse_date_from_text + +步骤2: 获取新闻内容 +- 拿到日期后,必须调用 get_news_content(date="YYYY-MM-DD") 获取新闻 +- 注意:date参数格式必须是 "年-月-日",例如 "2025-11-19" + +步骤3: 回答问题 +- 仔细阅读获取到的新闻内容 +- 基于新闻内容准确回答用户的问题 +- 如果新闻中没有相关信息,明确告知用户 + +【禁止】直接回答问题而不调用工具!你必须先获取新闻内容才能回答。""" + + # 创建ReActAgent + agent = ReActAgent( + llm=self.llm, + tools=tools, + verbose=True, + system_prompt=system_prompt, + ) + + # 创建AgentWorkflow + self.workflow = AgentWorkflow( + agents=[agent], + timeout=600.0, + ) + + async def _get_client(self) -> httpx.AsyncClient: + """获取HTTP客户端(懒加载)""" + if self.client is None: + self.client = httpx.AsyncClient(timeout=120.0) + return self.client + + def _get_current_date(self) -> str: + """获取当前日期 + + Returns: + 当前日期字符串,格式:年-月-日 + """ + now = datetime.now() + return f"{now.year}-{str(now.month).zfill(2)}-{str(now.day).zfill(2)}" + + def _get_yesterday_date(self) -> str: + """获取昨天日期 + + Returns: + 昨天日期字符串,格式:年-月-日 + """ + yesterday = datetime.now() - timedelta(days=1) + return f"{yesterday.year}-{str(yesterday.month).zfill(2)}-{str(yesterday.day).zfill(2)}" + + def _parse_date_from_text(self, text: str) -> str: + """从文本中解析日期 + + Args: + text: 包含日期信息的文本 + + Returns: + 日期字符串,格式:年-月-日 + """ + # 尝试匹配日期格式 + date_patterns = [ + r'(\d{4})年(\d{1,2})月(\d{1,2})日', + r'(\d{4})-(\d{1,2})-(\d{1,2})', + r'(\d{4})/(\d{1,2})/(\d{1,2})', + ] + + for pattern in date_patterns: + match = re.search(pattern, text) + if match: + year, month, day = match.groups() + return f"{year}-{month.zfill(2)}-{day.zfill(2)}" + + # 检查相对日期 + if "今天" in text or "今日" in text: + return self._get_current_date() + elif "昨天" in text or "昨日" in text: + return self._get_yesterday_date() + + # 默认返回今天 + return self._get_current_date() + + def _get_news_content(self, date: str) -> str: + """获取指定日期的新闻内容(带缓存) + + Args: + date: 日期字符串,格式:年-月-日,例如'2025-11-19' + + Returns: + 新闻文字内容 + """ + try: + # 解析日期 + parts = date.split('-') + if len(parts) != 3: + return f"日期格式错误,请使用'年-月-日'格式,例如'2025-11-19'" + + year, month, day = parts + # 去掉前导零(某些网站URL格式要求) + month = str(int(month)) + day = str(int(day)) + + # 检查缓存 + cache_key = f"news_cache/{year}/{month}/{day}" + cached_file = ProjectConfig().GetFile(cache_key, False) + + if cached_file.Exists(): + cached_content = cached_file.LoadAsText() + logger.Log("Info", f"从缓存加载新闻: {date}") + return cached_content + + # 如果没有缓存,则抓取网页 + logger.Log("Info", f"从网页抓取新闻: {date}") + url = get_target_web_page_url(year, month, day) + + # 使用同步方式获取(因为工具函数需要同步) + import requests + + # 直接使用中文URL,让requests自动处理编码 + url = f"http://mrxwlb.com/{year}/{month}/{day}/{year}年{month}月{day}日新闻联播文字版/" + + # 添加浏览器请求头,模拟真实浏览器访问 + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'Accept-Encoding': 'gzip, deflate', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + } + + logger.Log("Info", f"请求URL: {url}") + response = requests.get(url, headers=headers, timeout=30, allow_redirects=True) + response.raise_for_status() + + # 使用BeautifulSoup解析HTML + soup = BeautifulSoup(response.text, 'html.parser') + + # 移除script和style标签 + for script in soup(["script", "style"]): + script.decompose() + + # 获取文本内容 + content = soup.get_text() + # 清理空白字符 + lines = (line.strip() for line in content.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + content = ' '.join(chunk for chunk in chunks if chunk) + + if not content or len(content) < 100: + return f"未能获取到{year}年{month}月{day}日的新闻内容" + + # 保存到缓存 + ProjectConfig().GetFile(cache_key, True).SaveAsText(content) + logger.Log("Info", f"新闻内容已缓存: {date}") + + return content + + except Exception as e: + logger.Log("Error", f"获取新闻失败: {e}") + return f"获取新闻时出错: {str(e)}" + + async def answer_question(self, query: str) -> str: + """根据问题回答新闻内容 + + Args: + query: 用户问题 + + Returns: + AI生成的回答 + """ + try: + logger.Log("Info", f"="*50) + logger.Log("Info", f"用户提问: {query}") + logger.Log("Info", f"使用模型: {self.llm.model}") + logger.Log("Info", f"="*50) + + # 使用workflow运行agent(注意参数是user_msg) + result = await self.workflow.run(user_msg=query) + + logger.Log("Info", f"Agent执行完成,结果类型: {type(result)}") + + # 提取回答内容 + if hasattr(result, 'response'): + answer = str(result.response) + logger.Log("Info", f"从result.response提取答案") + elif hasattr(result, 'message'): + answer = str(result.message) + logger.Log("Info", f"从result.message提取答案") + else: + answer = str(result) + logger.Log("Info", f"直接转换result为字符串") + + logger.Log("Info", f"最终答案长度: {len(answer)} 字符") + + return answer + + except Exception as e: + logger.Log("Error", f"回答问题失败: {e}") + import traceback + error_trace = traceback.format_exc() + logger.Log("Error", f"详细错误:\n{error_trace}") + return f"处理问题时出错: {str(e)}" + + async def close(self): + """关闭客户端""" + if self.client: + await self.client.aclose() + self.client = None + +class GuideEntry(TypedDict, total=False): + """单条图鉴信息。""" + + title: str + identifier: str + description: str + category: str + metadata: Dict[str, str] + icon: str + badge: str + links: Sequence[Dict[str, str]] + tags: Sequence[str] + details: Sequence[Union[str, Dict[str, Any]]] + group: str + + +@dataclass(frozen=True) +class GuideSection: + """图鉴章节。""" + + title: str + entries: Sequence[GuideEntry] = field(default_factory=tuple) + description: str = "" + layout: str = "grid" + section_id: str | None = None + + +@dataclass(frozen=True) +class GuidePage: + """完整图鉴页面。""" + + title: str + sections: Sequence[GuideSection] = field(default_factory=tuple) + subtitle: str = "" + metadata: Dict[str, str] = field(default_factory=dict) + related_links: Dict[str, Sequence[Dict[str, str]]] = field(default_factory=dict) + + +def render_markdown_page(page: GuidePage) -> str: + """保留 Markdown 渲染(备用)。""" + + def _render_section(section: GuideSection) -> str: + lines: List[str] = [f"## {section.title}"] + if section.description: + lines.append(section.description) + if not section.entries: + lines.append("> 暂无内容。") + return "\n".join(lines) + for entry in section.entries: + title = entry.get("title", "未命名") + identifier = entry.get("identifier") + desc = entry.get("description", "") + category = entry.get("category") + metadata = entry.get("metadata", {}) + bullet = f"- **{title}**" + if identifier: + bullet += f"|`{identifier}`" + if category: + bullet += f"|{category}" + lines.append(bullet) + if desc: + lines.append(f" - {desc}") + for meta_key, meta_value in metadata.items(): + lines.append(f" - {meta_key}:{meta_value}") + return "\n".join(lines) + + lines: List[str] = [f"# {page.title}"] + if page.subtitle: + lines.append(page.subtitle) + if page.metadata: + lines.append("") + for key, value in page.metadata.items(): + lines.append(f"- {key}:{value}") + for section in page.sections: + lines.append("") + lines.append(_render_section(section)) + return "\n".join(lines) + + +def render_html_page(page: GuidePage) -> str: + """渲染 Apple Store 风格的 HTML 页面。""" + + def escape(text: Optional[str]) -> str: + if not text: + return "" + return ( + text.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace('"', """) + ) + + def render_metadata(metadata: Dict[str, str]) -> str: + if not metadata: + return "" + cards = [] + for key, value in metadata.items(): + cards.append( + f""" +
+ """ + ) + return f'' + + def render_links(links: Optional[Sequence[Dict[str, str]]]) -> str: + if not links: + return "" + items = [] + for link in links: + href = escape(link.get("href", "#")) + label = escape(link.get("label", "前往")) + items.append(f'{label}') + return "".join(items) + + def render_tags(tags: Optional[Sequence[str]]) -> str: + if not tags: + return "" + chips = "".join(f'{escape(tag)}' for tag in tags) + return f'' + + def render_details(details: Optional[Sequence[Union[str, Dict[str, Any]]]]) -> str: + if not details: + return "" + blocks: List[str] = [] + for detail in details: + if isinstance(detail, str): + blocks.append(f'{escape(detail)}
') + elif isinstance(detail, dict): + kind = detail.get("type") + if kind == "list": + items = "".join( + f'{identifier}' if identifier else ""
+ category_html = f'{category}' if category else ""
+ badge_html = f'{badge}' if badge else ""
+ icon_html = f'' if icon else ""
+ links_html = render_links(entry.get("links"))
+ tags_html = render_tags(entry.get("tags"))
+ details_html = render_details(entry.get("details"))
+ group = escape(entry.get("group"))
+ group_attr = f' data-group="{group}"' if group else ""
+ return f"""
+ {description}
+ {metadata_html} + {tags_html} + {details_html} + {links_html} +{escape(section.description)}
' + if section.description + else "" + ) + if not cards: + cards = '{escape(page.subtitle)}
' if page.subtitle else "" + + return f""" + + + + + +