10 lines
318 B
Python
10 lines
318 B
Python
data_path = './人工智能发展月报.html'
|
|
from langchain_community.document_loaders import TextLoader
|
|
|
|
loader = TextLoader(data_path)
|
|
data = loader.load()
|
|
print(data)
|
|
|
|
from unstructured.partition.html import partition_html
|
|
rst = partition_html(text=data[0].page_content)
|
|
print("\n\n".join([str(el) for el in rst])) |