Files
gangyan/langchain-chat/tests/document_loader/test_html.py

10 lines
318 B
Python

data_path = './人工智能发展月报.html'
from langchain_community.document_loaders import TextLoader
loader = TextLoader(data_path)
data = loader.load()
print(data)
from unstructured.partition.html import partition_html
rst = partition_html(text=data[0].page_content)
print("\n\n".join([str(el) for el in rst]))