Files

10 lines
318 B
Python
Raw Permalink Normal View History

data_path = './人工智能发展月报.html'
from langchain_community.document_loaders import TextLoader
loader = TextLoader(data_path)
data = loader.load()
print(data)
from unstructured.partition.html import partition_html
rst = partition_html(text=data[0].page_content)
print("\n\n".join([str(el) for el in rst]))