import multiprocessing import re import time import networkx as nx import numpy as np from textrank4zh import TextRank4Keyword, TextRank4Sentence from joblib import Parallel, delayed, parallel_backend import logging nx.from_numpy_matrix = nx.from_numpy_array logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def process_text_segment(text_segment, num_sentences): tr4w = TextRank4Keyword() tr4w.analyze(text=text_segment, lower=True, window=5) keywords = [(item.word, item.weight) for item in tr4w.get_keywords(30, word_min_len=4)] tr4s = TextRank4Sentence() tr4s.analyze(text=text_segment, lower=True, source='all_filters') summaries = [item.sentence for item in tr4s.get_key_sentences(num=num_sentences)] return keywords, summaries def split_text_by_sentences(text, n_parts): """Split the text into n_parts based on sentences using regular expressions.""" sentences = re.split(r'(?