[全量] 初始化项目代码、配置、文档及Agent协同harness

This commit is contained in:
2026-04-02 11:36:05 +08:00
parent 0553309cdf
commit 87e571d9ec
1133 changed files with 221948 additions and 0 deletions

View File

@@ -0,0 +1,10 @@
data_path = './人工智能发展月报.html'
from langchain_community.document_loaders import TextLoader
loader = TextLoader(data_path)
data = loader.load()
print(data)
from unstructured.partition.html import partition_html
rst = partition_html(text=data[0].page_content)
print("\n\n".join([str(el) for el in rst]))

View File

@@ -0,0 +1,21 @@
import sys
from pathlib import Path
root_path = Path(__file__).parent.parent.parent
sys.path.append(str(root_path))
from pprint import pprint
test_files = {
"ocr_test.jpg": str(root_path / "tests" / "samples" / "ocr_test.jpg"),
}
def test_rapidocrloader():
img_path = test_files["ocr_test.jpg"]
from document_loaders import RapidOCRLoader
loader = RapidOCRLoader(img_path)
docs = loader.load()
pprint(docs)
assert isinstance(docs, list) and len(docs) > 0 and isinstance(docs[0].page_content, str)

View File

@@ -0,0 +1,21 @@
import sys
from pathlib import Path
root_path = Path(__file__).parent.parent.parent
sys.path.append(str(root_path))
from pprint import pprint
test_files = {
"ocr_test.pdf": str(root_path / "tests" / "samples" / "ocr_test.pdf"),
}
def test_rapidocrpdfloader():
pdf_path = test_files["ocr_test.pdf"]
from document_loaders import RapidOCRPDFLoader
loader = RapidOCRPDFLoader(pdf_path)
docs = loader.load()
pprint(docs)
assert isinstance(docs, list) and len(docs) > 0 and isinstance(docs[0].page_content, str)

File diff suppressed because one or more lines are too long