data_path = './人工智能发展月报.html' from langchain_community.document_loaders import TextLoader loader = TextLoader(data_path) data = loader.load() print(data) from unstructured.partition.html import partition_html rst = partition_html(text=data[0].page_content) print("\n\n".join([str(el) for el in rst]))