[全量] 初始化项目代码、配置、文档及Agent协同harness
This commit is contained in:
4
langchain-chat/server/db/repository/__init__.py
Normal file
4
langchain-chat/server/db/repository/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from .conversation_repository import *
|
||||
from .message_repository import *
|
||||
from .knowledge_base_repository import *
|
||||
from .knowledge_file_repository import *
|
||||
@@ -0,0 +1,16 @@
|
||||
from server.db.session import with_session
|
||||
import uuid
|
||||
from server.db.models.conversation_model import ConversationModel
|
||||
|
||||
|
||||
@with_session
|
||||
def add_conversation_to_db(session, chat_type, name="", conversation_id=None):
|
||||
"""
|
||||
新增聊天记录
|
||||
"""
|
||||
if not conversation_id:
|
||||
conversation_id = uuid.uuid4().hex
|
||||
c = ConversationModel(id=conversation_id, chat_type=chat_type, name=name)
|
||||
|
||||
session.add(c)
|
||||
return c.id
|
||||
@@ -0,0 +1,64 @@
|
||||
from server.db.models.knowledge_base_model import KnowledgeBaseModel
|
||||
from server.db.session import with_session
|
||||
|
||||
|
||||
@with_session
|
||||
def add_kb_to_db(session, kb_name, kb_info, vs_type, embed_model):
|
||||
# 创建知识库实例
|
||||
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_name)).first()
|
||||
if not kb:
|
||||
kb = KnowledgeBaseModel(kb_name=kb_name, kb_info=kb_info, vs_type=vs_type, embed_model=embed_model)
|
||||
session.add(kb)
|
||||
else: # update kb with new vs_type and embed_model
|
||||
kb.kb_info = kb_info
|
||||
kb.vs_type = vs_type
|
||||
kb.embed_model = embed_model
|
||||
return True
|
||||
|
||||
|
||||
@with_session
|
||||
def list_kbs_from_db(session, min_file_count: int = -1):
|
||||
kbs = session.query(KnowledgeBaseModel.kb_name).filter(KnowledgeBaseModel.file_count > min_file_count).all()
|
||||
kbs = [kb[0] for kb in kbs]
|
||||
return kbs
|
||||
|
||||
|
||||
@with_session
|
||||
def kb_exists(session, kb_name):
|
||||
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_name)).first()
|
||||
status = True if kb else False
|
||||
return status
|
||||
|
||||
|
||||
@with_session
|
||||
def load_kb_from_db(session, kb_name):
|
||||
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_name)).first()
|
||||
if kb:
|
||||
kb_name, vs_type, embed_model = kb.kb_name, kb.vs_type, kb.embed_model
|
||||
else:
|
||||
kb_name, vs_type, embed_model = None, None, None
|
||||
return kb_name, vs_type, embed_model
|
||||
|
||||
|
||||
@with_session
|
||||
def delete_kb_from_db(session, kb_name):
|
||||
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_name)).first()
|
||||
if kb:
|
||||
session.delete(kb)
|
||||
return True
|
||||
|
||||
|
||||
@with_session
|
||||
def get_kb_detail(session, kb_name: str) -> dict:
|
||||
kb: KnowledgeBaseModel = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_name)).first()
|
||||
if kb:
|
||||
return {
|
||||
"kb_name": kb.kb_name,
|
||||
"kb_info": kb.kb_info,
|
||||
"vs_type": kb.vs_type,
|
||||
"embed_model": kb.embed_model,
|
||||
"file_count": kb.file_count,
|
||||
"create_time": kb.create_time,
|
||||
}
|
||||
else:
|
||||
return {}
|
||||
198
langchain-chat/server/db/repository/knowledge_file_repository.py
Normal file
198
langchain-chat/server/db/repository/knowledge_file_repository.py
Normal file
@@ -0,0 +1,198 @@
|
||||
from server.db.models.knowledge_base_model import KnowledgeBaseModel
|
||||
from server.db.models.knowledge_file_model import KnowledgeFileModel, FileDocModel
|
||||
from server.db.session import with_session
|
||||
from server.knowledge_base.utils import KnowledgeFile
|
||||
from typing import List, Dict
|
||||
|
||||
|
||||
@with_session
|
||||
def list_file_num_docs_id_by_kb_name_and_file_name(session,
|
||||
kb_name: str,
|
||||
file_name: str,
|
||||
) -> List[int]:
|
||||
'''
|
||||
列出某知识库某文件对应的所有Document的id。
|
||||
返回形式:[str, ...]
|
||||
'''
|
||||
doc_ids = session.query(FileDocModel.doc_id).filter_by(kb_name=kb_name, file_name=file_name).all()
|
||||
return [int(_id[0]) for _id in doc_ids]
|
||||
|
||||
|
||||
@with_session
|
||||
def list_docs_from_db(session,
|
||||
kb_name: str,
|
||||
file_name: str = None,
|
||||
metadata: Dict = {},
|
||||
) -> List[Dict]:
|
||||
'''
|
||||
列出某知识库某文件对应的所有Document。
|
||||
返回形式:[{"id": str, "metadata": dict}, ...]
|
||||
'''
|
||||
docs = session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(kb_name))
|
||||
if file_name:
|
||||
docs = docs.filter(FileDocModel.file_name.ilike(file_name))
|
||||
for k, v in metadata.items():
|
||||
docs = docs.filter(FileDocModel.meta_data[k].as_string() == str(v))
|
||||
|
||||
return [{"id": x.doc_id, "metadata": x.metadata} for x in docs.all()]
|
||||
|
||||
|
||||
@with_session
|
||||
def delete_docs_from_db(session,
|
||||
kb_name: str,
|
||||
file_name: str = None,
|
||||
) -> List[Dict]:
|
||||
'''
|
||||
删除某知识库某文件对应的所有Document,并返回被删除的Document。
|
||||
返回形式:[{"id": str, "metadata": dict}, ...]
|
||||
'''
|
||||
docs = list_docs_from_db(kb_name=kb_name, file_name=file_name)
|
||||
query = session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(kb_name))
|
||||
if file_name:
|
||||
query = query.filter(FileDocModel.file_name.ilike(file_name))
|
||||
query.delete(synchronize_session=False)
|
||||
session.commit()
|
||||
return docs
|
||||
|
||||
|
||||
@with_session
|
||||
def add_docs_to_db(session,
|
||||
kb_name: str,
|
||||
file_name: str,
|
||||
doc_infos: List[Dict]):
|
||||
'''
|
||||
将某知识库某文件对应的所有Document信息添加到数据库。
|
||||
doc_infos形式:[{"id": str, "metadata": dict}, ...]
|
||||
'''
|
||||
# ! 这里会出现doc_infos为None的情况,需要进一步排查
|
||||
if doc_infos is None:
|
||||
print("输入的server.db.repository.knowledge_file_repository.add_docs_to_db的doc_infos参数为None")
|
||||
return False
|
||||
for d in doc_infos:
|
||||
obj = FileDocModel(
|
||||
kb_name=kb_name,
|
||||
file_name=file_name,
|
||||
doc_id=d["id"],
|
||||
meta_data=d["metadata"],
|
||||
)
|
||||
session.add(obj)
|
||||
return True
|
||||
|
||||
|
||||
@with_session
|
||||
def count_files_from_db(session, kb_name: str) -> int:
|
||||
return session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(kb_name)).count()
|
||||
|
||||
|
||||
@with_session
|
||||
def list_files_from_db(session, kb_name):
|
||||
files = session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(kb_name)).all()
|
||||
docs = [f.file_name for f in files]
|
||||
return docs
|
||||
|
||||
|
||||
@with_session
|
||||
def add_file_to_db(session,
|
||||
kb_file: KnowledgeFile,
|
||||
docs_count: int = 0,
|
||||
custom_docs: bool = False,
|
||||
doc_infos: List[Dict] = [], # 形式:[{"id": str, "metadata": dict}, ...]
|
||||
):
|
||||
kb = session.query(KnowledgeBaseModel).filter_by(kb_name=kb_file.kb_name).first()
|
||||
if kb:
|
||||
# 如果已经存在该文件,则更新文件信息与版本号
|
||||
existing_file: KnowledgeFileModel = (session.query(KnowledgeFileModel)
|
||||
.filter(KnowledgeFileModel.kb_name.ilike(kb_file.kb_name),
|
||||
KnowledgeFileModel.file_name.ilike(kb_file.filename))
|
||||
.first())
|
||||
mtime = kb_file.get_mtime()
|
||||
size = kb_file.get_size()
|
||||
|
||||
if existing_file:
|
||||
existing_file.file_mtime = mtime
|
||||
existing_file.file_size = size
|
||||
existing_file.docs_count = docs_count
|
||||
existing_file.custom_docs = custom_docs
|
||||
existing_file.file_version += 1
|
||||
# 否则,添加新文件
|
||||
else:
|
||||
new_file = KnowledgeFileModel(
|
||||
file_name=kb_file.filename,
|
||||
file_ext=kb_file.ext,
|
||||
kb_name=kb_file.kb_name,
|
||||
document_loader_name=kb_file.document_loader_name,
|
||||
text_splitter_name=kb_file.text_splitter_name or "SpacyTextSplitter",
|
||||
file_mtime=mtime,
|
||||
file_size=size,
|
||||
docs_count=docs_count,
|
||||
custom_docs=custom_docs,
|
||||
)
|
||||
kb.file_count += 1
|
||||
session.add(new_file)
|
||||
add_docs_to_db(kb_name=kb_file.kb_name, file_name=kb_file.filename, doc_infos=doc_infos)
|
||||
return True
|
||||
|
||||
|
||||
@with_session
|
||||
def delete_file_from_db(session, kb_file: KnowledgeFile):
|
||||
existing_file = (session.query(KnowledgeFileModel)
|
||||
.filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),
|
||||
KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
|
||||
.first())
|
||||
if existing_file:
|
||||
session.delete(existing_file)
|
||||
delete_docs_from_db(kb_name=kb_file.kb_name, file_name=kb_file.filename)
|
||||
session.commit()
|
||||
|
||||
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_file.kb_name)).first()
|
||||
if kb:
|
||||
kb.file_count -= 1
|
||||
session.commit()
|
||||
return True
|
||||
|
||||
|
||||
@with_session
|
||||
def delete_files_from_db(session, knowledge_base_name: str):
|
||||
session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(knowledge_base_name)).delete(
|
||||
synchronize_session=False)
|
||||
session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(knowledge_base_name)).delete(
|
||||
synchronize_session=False)
|
||||
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(knowledge_base_name)).first()
|
||||
if kb:
|
||||
kb.file_count = 0
|
||||
|
||||
session.commit()
|
||||
return True
|
||||
|
||||
|
||||
@with_session
|
||||
def file_exists_in_db(session, kb_file: KnowledgeFile):
|
||||
existing_file = (session.query(KnowledgeFileModel)
|
||||
.filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),
|
||||
KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
|
||||
.first())
|
||||
return True if existing_file else False
|
||||
|
||||
|
||||
@with_session
|
||||
def get_file_detail(session, kb_name: str, filename: str) -> dict:
|
||||
file: KnowledgeFileModel = (session.query(KnowledgeFileModel)
|
||||
.filter(KnowledgeFileModel.file_name.ilike(filename),
|
||||
KnowledgeFileModel.kb_name.ilike(kb_name))
|
||||
.first())
|
||||
if file:
|
||||
return {
|
||||
"kb_name": file.kb_name,
|
||||
"file_name": file.file_name,
|
||||
"file_ext": file.file_ext,
|
||||
"file_version": file.file_version,
|
||||
"document_loader": file.document_loader_name,
|
||||
"text_splitter": file.text_splitter_name,
|
||||
"create_time": file.create_time,
|
||||
"file_mtime": file.file_mtime,
|
||||
"file_size": file.file_size,
|
||||
"custom_docs": file.custom_docs,
|
||||
"docs_count": file.docs_count,
|
||||
}
|
||||
else:
|
||||
return {}
|
||||
@@ -0,0 +1,66 @@
|
||||
from server.db.models.knowledge_metadata_model import SummaryChunkModel
|
||||
from server.db.session import with_session
|
||||
from typing import List, Dict
|
||||
|
||||
|
||||
@with_session
|
||||
def list_summary_from_db(session,
|
||||
kb_name: str,
|
||||
metadata: Dict = {},
|
||||
) -> List[Dict]:
|
||||
'''
|
||||
列出某知识库chunk summary。
|
||||
返回形式:[{"id": str, "summary_context": str, "doc_ids": str}, ...]
|
||||
'''
|
||||
docs = session.query(SummaryChunkModel).filter(SummaryChunkModel.kb_name.ilike(kb_name))
|
||||
|
||||
for k, v in metadata.items():
|
||||
docs = docs.filter(SummaryChunkModel.meta_data[k].as_string() == str(v))
|
||||
|
||||
return [{"id": x.id,
|
||||
"summary_context": x.summary_context,
|
||||
"summary_id": x.summary_id,
|
||||
"doc_ids": x.doc_ids,
|
||||
"metadata": x.metadata} for x in docs.all()]
|
||||
|
||||
|
||||
@with_session
|
||||
def delete_summary_from_db(session,
|
||||
kb_name: str
|
||||
) -> List[Dict]:
|
||||
'''
|
||||
删除知识库chunk summary,并返回被删除的Dchunk summary。
|
||||
返回形式:[{"id": str, "summary_context": str, "doc_ids": str}, ...]
|
||||
'''
|
||||
docs = list_summary_from_db(kb_name=kb_name)
|
||||
query = session.query(SummaryChunkModel).filter(SummaryChunkModel.kb_name.ilike(kb_name))
|
||||
query.delete(synchronize_session=False)
|
||||
session.commit()
|
||||
return docs
|
||||
|
||||
|
||||
@with_session
|
||||
def add_summary_to_db(session,
|
||||
kb_name: str,
|
||||
summary_infos: List[Dict]):
|
||||
'''
|
||||
将总结信息添加到数据库。
|
||||
summary_infos形式:[{"summary_context": str, "doc_ids": str}, ...]
|
||||
'''
|
||||
for summary in summary_infos:
|
||||
obj = SummaryChunkModel(
|
||||
kb_name=kb_name,
|
||||
summary_context=summary["summary_context"],
|
||||
summary_id=summary["summary_id"],
|
||||
doc_ids=summary["doc_ids"],
|
||||
meta_data=summary["metadata"],
|
||||
)
|
||||
session.add(obj)
|
||||
|
||||
session.commit()
|
||||
return True
|
||||
|
||||
|
||||
@with_session
|
||||
def count_summary_from_db(session, kb_name: str) -> int:
|
||||
return session.query(SummaryChunkModel).filter(SummaryChunkModel.kb_name.ilike(kb_name)).count()
|
||||
72
langchain-chat/server/db/repository/message_repository.py
Normal file
72
langchain-chat/server/db/repository/message_repository.py
Normal file
@@ -0,0 +1,72 @@
|
||||
from server.db.session import with_session
|
||||
from typing import Dict, List
|
||||
import uuid
|
||||
from server.db.models.message_model import MessageModel
|
||||
|
||||
|
||||
@with_session
|
||||
def add_message_to_db(session, conversation_id: str, chat_type, query, response="", message_id=None,
|
||||
metadata: Dict = {}):
|
||||
"""
|
||||
新增聊天记录
|
||||
"""
|
||||
if not message_id:
|
||||
message_id = uuid.uuid4().hex
|
||||
m = MessageModel(id=message_id, chat_type=chat_type, query=query, response=response,
|
||||
conversation_id=conversation_id,
|
||||
meta_data=metadata)
|
||||
session.add(m)
|
||||
session.commit()
|
||||
return m.id
|
||||
|
||||
|
||||
@with_session
|
||||
def update_message(session, message_id, response: str = None, metadata: Dict = None):
|
||||
"""
|
||||
更新已有的聊天记录
|
||||
"""
|
||||
m = get_message_by_id(message_id)
|
||||
if m is not None:
|
||||
if response is not None:
|
||||
m.response = response
|
||||
if isinstance(metadata, dict):
|
||||
m.meta_data = metadata
|
||||
session.add(m)
|
||||
session.commit()
|
||||
return m.id
|
||||
|
||||
|
||||
@with_session
|
||||
def get_message_by_id(session, message_id) -> MessageModel:
|
||||
"""
|
||||
查询聊天记录
|
||||
"""
|
||||
m = session.query(MessageModel).filter_by(id=message_id).first()
|
||||
return m
|
||||
|
||||
|
||||
@with_session
|
||||
def feedback_message_to_db(session, message_id, feedback_score, feedback_reason):
|
||||
"""
|
||||
反馈聊天记录
|
||||
"""
|
||||
m = session.query(MessageModel).filter_by(id=message_id).first()
|
||||
if m:
|
||||
m.feedback_score = feedback_score
|
||||
m.feedback_reason = feedback_reason
|
||||
session.commit()
|
||||
return m.id
|
||||
|
||||
|
||||
@with_session
|
||||
def filter_message(session, conversation_id: str, limit: int = 10):
|
||||
messages = (session.query(MessageModel).filter_by(conversation_id=conversation_id).
|
||||
# 用户最新的query 也会插入到db,忽略这个message record
|
||||
filter(MessageModel.response != '').
|
||||
# 返回最近的limit 条记录
|
||||
order_by(MessageModel.create_time.desc()).limit(limit).all())
|
||||
# 直接返回 List[MessageModel] 报错
|
||||
data = []
|
||||
for m in messages:
|
||||
data.append({"query": m.query, "response": m.response})
|
||||
return data
|
||||
Reference in New Issue
Block a user