[全量] 初始化项目代码、配置、文档及Agent协同harness

This commit is contained in:
2026-04-02 11:36:05 +08:00
parent 0553309cdf
commit 87e571d9ec
1133 changed files with 221948 additions and 0 deletions

View File

View File

@@ -0,0 +1,16 @@
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base, DeclarativeMeta
from sqlalchemy.orm import sessionmaker
from configs import SQLALCHEMY_DATABASE_URI
import json
engine = create_engine(
SQLALCHEMY_DATABASE_URI,
json_serializer=lambda obj: json.dumps(obj, ensure_ascii=False),
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base: DeclarativeMeta = declarative_base()

View File

@@ -0,0 +1,13 @@
from datetime import datetime
from sqlalchemy import Column, DateTime, String, Integer
class BaseModel:
"""
基础模型
"""
id = Column(Integer, primary_key=True, index=True, comment="主键ID")
create_time = Column(DateTime, default=datetime.utcnow, comment="创建时间")
update_time = Column(DateTime, default=None, onupdate=datetime.utcnow, comment="更新时间")
create_by = Column(String, default=None, comment="创建者")
update_by = Column(String, default=None, comment="更新者")

View File

@@ -0,0 +1,17 @@
from sqlalchemy import Column, Integer, String, DateTime, JSON, func
from server.db.base import Base
class ConversationModel(Base):
"""
聊天记录模型
"""
__tablename__ = 'conversation'
id = Column(String(32), primary_key=True, comment='对话框ID')
name = Column(String(50), comment='对话框名称')
# chat/agent_chat等
chat_type = Column(String(50), comment='聊天类型')
create_time = Column(DateTime, default=func.now(), comment='创建时间')
def __repr__(self):
return f"<Conversation(id='{self.id}', name='{self.name}', chat_type='{self.chat_type}', create_time='{self.create_time}')>"

View File

@@ -0,0 +1,20 @@
from sqlalchemy import Column, Integer, String, DateTime, func
from server.db.base import Base
class KnowledgeBaseModel(Base):
"""
知识库模型
"""
__tablename__ = 'knowledge_base'
id = Column(Integer, primary_key=True, autoincrement=True, comment='知识库ID')
kb_name = Column(String(50), comment='知识库名称')
kb_info = Column(String(200), comment='知识库简介(用于Agent)')
vs_type = Column(String(50), comment='向量库类型')
embed_model = Column(String(50), comment='嵌入模型名称')
file_count = Column(Integer, default=0, comment='文件数量')
create_time = Column(DateTime, default=func.now(), comment='创建时间')
def __repr__(self):
return f"<KnowledgeBase(id='{self.id}', kb_name='{self.kb_name}',kb_intro='{self.kb_info} vs_type='{self.vs_type}', embed_model='{self.embed_model}', file_count='{self.file_count}', create_time='{self.create_time}')>"

View File

@@ -0,0 +1,40 @@
from sqlalchemy import Column, Integer, String, DateTime, Float, Boolean, JSON, func
from server.db.base import Base
class KnowledgeFileModel(Base):
"""
知识文件模型
"""
__tablename__ = 'knowledge_file'
id = Column(Integer, primary_key=True, autoincrement=True, comment='知识文件ID')
file_name = Column(String(255), comment='文件名')
file_ext = Column(String(10), comment='文件扩展名')
kb_name = Column(String(50), comment='所属知识库名称')
document_loader_name = Column(String(50), comment='文档加载器名称')
text_splitter_name = Column(String(50), comment='文本分割器名称')
file_version = Column(Integer, default=1, comment='文件版本')
file_mtime = Column(Float, default=0.0, comment="文件修改时间")
file_size = Column(Integer, default=0, comment="文件大小")
custom_docs = Column(Boolean, default=False, comment="是否自定义docs")
docs_count = Column(Integer, default=0, comment="切分文档数量")
create_time = Column(DateTime, default=func.now(), comment='创建时间')
def __repr__(self):
return f"<KnowledgeFile(id='{self.id}', file_name='{self.file_name}', file_ext='{self.file_ext}', kb_name='{self.kb_name}', document_loader_name='{self.document_loader_name}', text_splitter_name='{self.text_splitter_name}', file_version='{self.file_version}', create_time='{self.create_time}')>"
class FileDocModel(Base):
"""
文件-向量库文档模型
"""
__tablename__ = 'file_doc'
id = Column(Integer, primary_key=True, autoincrement=True, comment='ID')
kb_name = Column(String(50), comment='知识库名称')
file_name = Column(String(255), comment='文件名称')
doc_id = Column(String(50), comment="向量库文档ID")
meta_data = Column(JSON, default={})
def __repr__(self):
return f"<FileDoc(id='{self.id}', kb_name='{self.kb_name}', file_name='{self.file_name}', doc_id='{self.doc_id}', metadata='{self.meta_data}')>"

View File

@@ -0,0 +1,28 @@
from sqlalchemy import Column, Integer, String, DateTime, Float, Boolean, JSON, func
from server.db.base import Base
class SummaryChunkModel(Base):
"""
chunk summary模型用于存储file_doc中每个doc_id的chunk 片段,
数据来源:
用户输入: 用户上传文件可填写文件的描述生成的file_doc中的doc_id存入summary_chunk中
程序自动切分 对file_doc表meta_data字段信息中存储的页码信息按每页的页码切分自定义prompt生成总结文本将对应页码关联的doc_id存入summary_chunk中
后续任务:
矢量库构建: 对数据库表summary_chunk中summary_context创建索引构建矢量库meta_data为矢量库的元数据doc_ids
语义关联: 通过用户输入的描述,自动切分的总结文本,计算
语义相似度
"""
__tablename__ = 'summary_chunk'
id = Column(Integer, primary_key=True, autoincrement=True, comment='ID')
kb_name = Column(String(50), comment='知识库名称')
summary_context = Column(String(255), comment='总结文本')
summary_id = Column(String(255), comment='总结矢量id')
doc_ids = Column(String(1024), comment="向量库id关联列表")
meta_data = Column(JSON, default={})
def __repr__(self):
return (f"<SummaryChunk(id='{self.id}', kb_name='{self.kb_name}', summary_context='{self.summary_context}',"
f" doc_ids='{self.doc_ids}', metadata='{self.metadata}')>")

View File

@@ -0,0 +1,25 @@
from sqlalchemy import Column, Integer, String, DateTime, JSON, func
from server.db.base import Base
class MessageModel(Base):
"""
聊天记录模型
"""
__tablename__ = 'message'
id = Column(String(32), primary_key=True, comment='聊天记录ID')
conversation_id = Column(String(32), default=None, index=True, comment='对话框ID')
# chat/agent_chat等
chat_type = Column(String(50), comment='聊天类型')
query = Column(String(4096), comment='用户问题')
response = Column(String(4096), comment='模型回答')
# 记录知识库id等以便后续扩展
meta_data = Column(JSON, default={})
# 满分100 越高表示评价越好
feedback_score = Column(Integer, default=-1, comment='用户评分')
feedback_reason = Column(String(255), default="", comment='用户评分理由')
create_time = Column(DateTime, default=func.now(), comment='创建时间')
def __repr__(self):
return f"<message(id='{self.id}', conversation_id='{self.conversation_id}', chat_type='{self.chat_type}', query='{self.query}', response='{self.response}',meta_data='{self.meta_data}',feedback_score='{self.feedback_score}',feedback_reason='{self.feedback_reason}', create_time='{self.create_time}')>"

View File

@@ -0,0 +1,4 @@
from .conversation_repository import *
from .message_repository import *
from .knowledge_base_repository import *
from .knowledge_file_repository import *

View File

@@ -0,0 +1,16 @@
from server.db.session import with_session
import uuid
from server.db.models.conversation_model import ConversationModel
@with_session
def add_conversation_to_db(session, chat_type, name="", conversation_id=None):
"""
新增聊天记录
"""
if not conversation_id:
conversation_id = uuid.uuid4().hex
c = ConversationModel(id=conversation_id, chat_type=chat_type, name=name)
session.add(c)
return c.id

View File

@@ -0,0 +1,64 @@
from server.db.models.knowledge_base_model import KnowledgeBaseModel
from server.db.session import with_session
@with_session
def add_kb_to_db(session, kb_name, kb_info, vs_type, embed_model):
# 创建知识库实例
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_name)).first()
if not kb:
kb = KnowledgeBaseModel(kb_name=kb_name, kb_info=kb_info, vs_type=vs_type, embed_model=embed_model)
session.add(kb)
else: # update kb with new vs_type and embed_model
kb.kb_info = kb_info
kb.vs_type = vs_type
kb.embed_model = embed_model
return True
@with_session
def list_kbs_from_db(session, min_file_count: int = -1):
kbs = session.query(KnowledgeBaseModel.kb_name).filter(KnowledgeBaseModel.file_count > min_file_count).all()
kbs = [kb[0] for kb in kbs]
return kbs
@with_session
def kb_exists(session, kb_name):
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_name)).first()
status = True if kb else False
return status
@with_session
def load_kb_from_db(session, kb_name):
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_name)).first()
if kb:
kb_name, vs_type, embed_model = kb.kb_name, kb.vs_type, kb.embed_model
else:
kb_name, vs_type, embed_model = None, None, None
return kb_name, vs_type, embed_model
@with_session
def delete_kb_from_db(session, kb_name):
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_name)).first()
if kb:
session.delete(kb)
return True
@with_session
def get_kb_detail(session, kb_name: str) -> dict:
kb: KnowledgeBaseModel = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_name)).first()
if kb:
return {
"kb_name": kb.kb_name,
"kb_info": kb.kb_info,
"vs_type": kb.vs_type,
"embed_model": kb.embed_model,
"file_count": kb.file_count,
"create_time": kb.create_time,
}
else:
return {}

View File

@@ -0,0 +1,198 @@
from server.db.models.knowledge_base_model import KnowledgeBaseModel
from server.db.models.knowledge_file_model import KnowledgeFileModel, FileDocModel
from server.db.session import with_session
from server.knowledge_base.utils import KnowledgeFile
from typing import List, Dict
@with_session
def list_file_num_docs_id_by_kb_name_and_file_name(session,
kb_name: str,
file_name: str,
) -> List[int]:
'''
列出某知识库某文件对应的所有Document的id。
返回形式:[str, ...]
'''
doc_ids = session.query(FileDocModel.doc_id).filter_by(kb_name=kb_name, file_name=file_name).all()
return [int(_id[0]) for _id in doc_ids]
@with_session
def list_docs_from_db(session,
kb_name: str,
file_name: str = None,
metadata: Dict = {},
) -> List[Dict]:
'''
列出某知识库某文件对应的所有Document。
返回形式:[{"id": str, "metadata": dict}, ...]
'''
docs = session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(kb_name))
if file_name:
docs = docs.filter(FileDocModel.file_name.ilike(file_name))
for k, v in metadata.items():
docs = docs.filter(FileDocModel.meta_data[k].as_string() == str(v))
return [{"id": x.doc_id, "metadata": x.metadata} for x in docs.all()]
@with_session
def delete_docs_from_db(session,
kb_name: str,
file_name: str = None,
) -> List[Dict]:
'''
删除某知识库某文件对应的所有Document并返回被删除的Document。
返回形式:[{"id": str, "metadata": dict}, ...]
'''
docs = list_docs_from_db(kb_name=kb_name, file_name=file_name)
query = session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(kb_name))
if file_name:
query = query.filter(FileDocModel.file_name.ilike(file_name))
query.delete(synchronize_session=False)
session.commit()
return docs
@with_session
def add_docs_to_db(session,
kb_name: str,
file_name: str,
doc_infos: List[Dict]):
'''
将某知识库某文件对应的所有Document信息添加到数据库。
doc_infos形式[{"id": str, "metadata": dict}, ...]
'''
# ! 这里会出现doc_infos为None的情况需要进一步排查
if doc_infos is None:
print("输入的server.db.repository.knowledge_file_repository.add_docs_to_db的doc_infos参数为None")
return False
for d in doc_infos:
obj = FileDocModel(
kb_name=kb_name,
file_name=file_name,
doc_id=d["id"],
meta_data=d["metadata"],
)
session.add(obj)
return True
@with_session
def count_files_from_db(session, kb_name: str) -> int:
return session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(kb_name)).count()
@with_session
def list_files_from_db(session, kb_name):
files = session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(kb_name)).all()
docs = [f.file_name for f in files]
return docs
@with_session
def add_file_to_db(session,
kb_file: KnowledgeFile,
docs_count: int = 0,
custom_docs: bool = False,
doc_infos: List[Dict] = [], # 形式:[{"id": str, "metadata": dict}, ...]
):
kb = session.query(KnowledgeBaseModel).filter_by(kb_name=kb_file.kb_name).first()
if kb:
# 如果已经存在该文件,则更新文件信息与版本号
existing_file: KnowledgeFileModel = (session.query(KnowledgeFileModel)
.filter(KnowledgeFileModel.kb_name.ilike(kb_file.kb_name),
KnowledgeFileModel.file_name.ilike(kb_file.filename))
.first())
mtime = kb_file.get_mtime()
size = kb_file.get_size()
if existing_file:
existing_file.file_mtime = mtime
existing_file.file_size = size
existing_file.docs_count = docs_count
existing_file.custom_docs = custom_docs
existing_file.file_version += 1
# 否则,添加新文件
else:
new_file = KnowledgeFileModel(
file_name=kb_file.filename,
file_ext=kb_file.ext,
kb_name=kb_file.kb_name,
document_loader_name=kb_file.document_loader_name,
text_splitter_name=kb_file.text_splitter_name or "SpacyTextSplitter",
file_mtime=mtime,
file_size=size,
docs_count=docs_count,
custom_docs=custom_docs,
)
kb.file_count += 1
session.add(new_file)
add_docs_to_db(kb_name=kb_file.kb_name, file_name=kb_file.filename, doc_infos=doc_infos)
return True
@with_session
def delete_file_from_db(session, kb_file: KnowledgeFile):
existing_file = (session.query(KnowledgeFileModel)
.filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),
KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
.first())
if existing_file:
session.delete(existing_file)
delete_docs_from_db(kb_name=kb_file.kb_name, file_name=kb_file.filename)
session.commit()
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_file.kb_name)).first()
if kb:
kb.file_count -= 1
session.commit()
return True
@with_session
def delete_files_from_db(session, knowledge_base_name: str):
session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(knowledge_base_name)).delete(
synchronize_session=False)
session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(knowledge_base_name)).delete(
synchronize_session=False)
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(knowledge_base_name)).first()
if kb:
kb.file_count = 0
session.commit()
return True
@with_session
def file_exists_in_db(session, kb_file: KnowledgeFile):
existing_file = (session.query(KnowledgeFileModel)
.filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),
KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
.first())
return True if existing_file else False
@with_session
def get_file_detail(session, kb_name: str, filename: str) -> dict:
file: KnowledgeFileModel = (session.query(KnowledgeFileModel)
.filter(KnowledgeFileModel.file_name.ilike(filename),
KnowledgeFileModel.kb_name.ilike(kb_name))
.first())
if file:
return {
"kb_name": file.kb_name,
"file_name": file.file_name,
"file_ext": file.file_ext,
"file_version": file.file_version,
"document_loader": file.document_loader_name,
"text_splitter": file.text_splitter_name,
"create_time": file.create_time,
"file_mtime": file.file_mtime,
"file_size": file.file_size,
"custom_docs": file.custom_docs,
"docs_count": file.docs_count,
}
else:
return {}

View File

@@ -0,0 +1,66 @@
from server.db.models.knowledge_metadata_model import SummaryChunkModel
from server.db.session import with_session
from typing import List, Dict
@with_session
def list_summary_from_db(session,
kb_name: str,
metadata: Dict = {},
) -> List[Dict]:
'''
列出某知识库chunk summary。
返回形式:[{"id": str, "summary_context": str, "doc_ids": str}, ...]
'''
docs = session.query(SummaryChunkModel).filter(SummaryChunkModel.kb_name.ilike(kb_name))
for k, v in metadata.items():
docs = docs.filter(SummaryChunkModel.meta_data[k].as_string() == str(v))
return [{"id": x.id,
"summary_context": x.summary_context,
"summary_id": x.summary_id,
"doc_ids": x.doc_ids,
"metadata": x.metadata} for x in docs.all()]
@with_session
def delete_summary_from_db(session,
kb_name: str
) -> List[Dict]:
'''
删除知识库chunk summary并返回被删除的Dchunk summary。
返回形式:[{"id": str, "summary_context": str, "doc_ids": str}, ...]
'''
docs = list_summary_from_db(kb_name=kb_name)
query = session.query(SummaryChunkModel).filter(SummaryChunkModel.kb_name.ilike(kb_name))
query.delete(synchronize_session=False)
session.commit()
return docs
@with_session
def add_summary_to_db(session,
kb_name: str,
summary_infos: List[Dict]):
'''
将总结信息添加到数据库。
summary_infos形式[{"summary_context": str, "doc_ids": str}, ...]
'''
for summary in summary_infos:
obj = SummaryChunkModel(
kb_name=kb_name,
summary_context=summary["summary_context"],
summary_id=summary["summary_id"],
doc_ids=summary["doc_ids"],
meta_data=summary["metadata"],
)
session.add(obj)
session.commit()
return True
@with_session
def count_summary_from_db(session, kb_name: str) -> int:
return session.query(SummaryChunkModel).filter(SummaryChunkModel.kb_name.ilike(kb_name)).count()

View File

@@ -0,0 +1,72 @@
from server.db.session import with_session
from typing import Dict, List
import uuid
from server.db.models.message_model import MessageModel
@with_session
def add_message_to_db(session, conversation_id: str, chat_type, query, response="", message_id=None,
metadata: Dict = {}):
"""
新增聊天记录
"""
if not message_id:
message_id = uuid.uuid4().hex
m = MessageModel(id=message_id, chat_type=chat_type, query=query, response=response,
conversation_id=conversation_id,
meta_data=metadata)
session.add(m)
session.commit()
return m.id
@with_session
def update_message(session, message_id, response: str = None, metadata: Dict = None):
"""
更新已有的聊天记录
"""
m = get_message_by_id(message_id)
if m is not None:
if response is not None:
m.response = response
if isinstance(metadata, dict):
m.meta_data = metadata
session.add(m)
session.commit()
return m.id
@with_session
def get_message_by_id(session, message_id) -> MessageModel:
"""
查询聊天记录
"""
m = session.query(MessageModel).filter_by(id=message_id).first()
return m
@with_session
def feedback_message_to_db(session, message_id, feedback_score, feedback_reason):
"""
反馈聊天记录
"""
m = session.query(MessageModel).filter_by(id=message_id).first()
if m:
m.feedback_score = feedback_score
m.feedback_reason = feedback_reason
session.commit()
return m.id
@with_session
def filter_message(session, conversation_id: str, limit: int = 10):
messages = (session.query(MessageModel).filter_by(conversation_id=conversation_id).
# 用户最新的query 也会插入到db忽略这个message record
filter(MessageModel.response != '').
# 返回最近的limit 条记录
order_by(MessageModel.create_time.desc()).limit(limit).all())
# 直接返回 List[MessageModel] 报错
data = []
for m in messages:
data.append({"query": m.query, "response": m.response})
return data

View File

@@ -0,0 +1,46 @@
from functools import wraps
from contextlib import contextmanager
from server.db.base import SessionLocal
from sqlalchemy.orm import Session
@contextmanager
def session_scope() -> Session:
"""上下文管理器用于自动获取 Session, 避免错误"""
session = SessionLocal()
try:
yield session
session.commit()
except:
session.rollback()
raise
finally:
session.close()
def with_session(f):
@wraps(f)
def wrapper(*args, **kwargs):
with session_scope() as session:
try:
result = f(session, *args, **kwargs)
session.commit()
return result
except:
session.rollback()
raise
return wrapper
def get_db() -> SessionLocal:
db = SessionLocal()
try:
yield db
finally:
db.close()
def get_db0() -> SessionLocal:
db = SessionLocal()
return db