Files
gangyan/langchain-chat/server/chat/KgoSearchAPIWrapper.py

212 lines
8.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import urllib
from langchain_core.pydantic_v1 import BaseModel, Extra, Field, root_validator
import requests
from typing import Dict, List
from configs import kgo_search_url, kgo_professional_search_url
from configs import LLM_MODELS, TEMPERATURE, MAX_TOKENS
from server.chat.policy_fun_iast import get_llm_model_response
from datetime import datetime
import json
from configs.basic_config import *
class KgoSearchAPIWrapper(BaseModel):
search_kwargs: dict = Field(default_factory=dict)
search_map = {
"全部": "1000",
"期刊论文": "1002",
"学位论文": "1003",
"会议论文": "1004",
"政策": "1005",
"成果": "1006",
"科技成果": "1006",
"项目": "1007",
"报告": "1010",
"图书": "1011",
"外文期刊论文": "1013",
"外文资料": "1013",
"期刊": "1015",
"专利": "3001",
"新闻": "4001",
"数据集": "4004",
"视频": "4005",
"统计数据": "6001"
}
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def _kgo_search_results(self, kgo_search_type: str, search_text: str) -> List[dict]:
params = {
"type": kgo_search_type,
"model": 20,
"text": search_text,
**self.search_kwargs,
}
response = requests.get(
kgo_search_url,
params=params, # type: ignore
)
response.raise_for_status()
search_results = response.json()
# print(search_results)
if "code" in search_results and search_results["code"] == 10000:
return search_results["results"]["datas"]
return []
def _kgo_professional_search_results(self, kgo_search_type: str, search_text: str, origin_query:str) -> List[dict]:
# 将逗号排除在编码范围之外,不进行编码
# encoded_kgo_search_type = ','.join(kgo_search_type.split(','))
# params = {
# "model": 20,
# "express": "theme=" + search_text,
# **self.search_kwargs,
# }
year = get_llm_model_response(
strategy_name="kgo_get_year",
llm_model_name=LLM_MODELS[0],
template_prompt_name="kgo_get_year",
prompt_param_dict={
"query": origin_query,
"year": datetime.now().strftime("%Y")
},
temperature=TEMPERATURE,
max_tokens=MAX_TOKENS
)
logger.info(f'KGO资源发布时间表达式{year}')
# res = {}
# for response in solve_problem(query=search_text, conversation_id="", history=[], model_name=LLM_MODELS[0],temperature=TEMPERATURE,max_tokens=MAX_TOKENS,prompt_name="kgo_get_year",stream=False):
# res["text"] += json.loads(response)["text"]
params = {
"model": 20,
"express": "theme=" + search_text + " AND " + year,
**self.search_kwargs,
}
# 使用 urllib.parse.urlencode 构建查询参数部分
query_params = urllib.parse.urlencode(params)
# 构建完整的 URL
url = f"{kgo_professional_search_url}?type={kgo_search_type}&{query_params}"
logger.info(f'KGO专业检索请求URL: {url}')
try:
# 发起网络请求
response = requests.get(url)
response.raise_for_status()
search_results = response.json()
# 检查返回结果
if "code" in search_results and search_results["code"] == 10000:
if "datas" in search_results["results"]:
return search_results["results"]["datas"]
except requests.RequestException as e:
# 处理请求异常
print(f"请求异常: {e}")
# 如果出现任何错误或未找到结果,返回空列表
return []
@root_validator(pre=True)
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that python package exists in environment."""
return values
def run(self, text: str, kgo_search_type: str) -> str:
"""Run query through Kgo Search and parse result."""
snippets = []
results = self._kgo_search_results(search_text=text, kgo_search_type=kgo_search_type)
if len(results) == 0:
return "No good Kgo Search Result was found"
for result in results:
snippets.append(result["snippet"])
return " ".join(snippets)
def results(self, text: str, kgo_search_type: str,origin_query:str) -> List[Dict]:
"""Run query through Kgo Search and return metadata.
Args:
text: The query to search for.
kgo_search_type: The type of results in search_map to return.
Returns:
A list of dictionaries with the following keys:
snippet - The description of the result.
title - The title of the result.
link - The link to the result.
author - The author of the result.
"""
metadata_results = []
results = self._kgo_professional_search_results(search_text=text, kgo_search_type=kgo_search_type,origin_query=origin_query)
if not results:
return [{"Result": "No good Kgo Search Result was found"}]
i = 0
for result in results:
i += 1
snippet = result.get("AB", "")
title = result.get("title", "")
publish_year = result.get("publish_year", "")
source_table_name = result.get("source_table_name", "")
resource_type = result.get("resource_type", "")
_id = result.get("_id", "")
author = ",".join(result.get("author", []))
keywords = ",".join(result.get("keywords", []))
link = ""
if source_table_name and resource_type:
link = f"https://kgo.ckcest.cn/kgo/detail/{self.search_map.get(resource_type, '')}/{source_table_name}/{_id}.html"
metadata_result = {
"snippet": snippet,
"title": title,
"link": link,
"author": author,
"keywords": keywords,
"publish_year": publish_year,
"resource_type": resource_type
}
metadata_results.append(metadata_result)
if i == 5:
break
# print("metadata_results:", metadata_results)
return metadata_results
def results_normal(self, text: str, kgo_search_type: str) -> List[Dict]:
metadata_results = []
results = self._kgo_search_results(search_text=text, kgo_search_type=kgo_search_type)
if len(results) == 0:
return [{"Result": "No good Kgo Search Result was found"}]
for result in results:
metadata_result = {
"snippet": result["AB"] if "AB" in result.keys() else "",
"publish_year": result["publish_year"] if "publish_year" in result.keys() else "",
"resource_type": result["resource_type"] if "resource_type" in result.keys() else "",
"title": result["title"] if "title" in result.keys() else "",
"link": "https://kgo.ckcest.cn/kgo/detail/" + self.search_map[result["resource_type"]]
+ "/" + result["source_table_name"] + "/" + result["_id"] + ".html"
if "source_table_name" in result.keys() and "resource_type" in result.keys() else "",
"keywords": result["keywords"] if "keywords" in result.keys() else "",
# "author": result["author_json"]["name"]
}
# print("metadata_result:", metadata_result)
metadata_results.append(metadata_result)
return metadata_results
if __name__ == '__main__':
search = KgoSearchAPIWrapper()
search_results = search.results("物理气相沉积", "1000")
print(search_results)
# docs = []
# for result in search_results:
# print(result["snippet"])
# # print("snippet:" + result["snippet"])
# # print("link:" + result["link"])
# # print("title:" + result["title"])
# doc = Document(page_content=result["snippet"] if "snippet" in result.keys() else "",
# metadata={"source": result["link"] if "link" in result.keys() else "",
# "filename": result["title"] if "title" in result.keys() else ""})
# docs.append(doc)
# print(docs)