[前端+RAG] 异步上传+前端轮询自动刷新导读;PDF阅读模式合并行消除留白
This commit is contained in:
@@ -957,22 +957,33 @@ class FileConverter:
|
||||
|
||||
if text.strip():
|
||||
any_text = True
|
||||
# 按行处理文本,识别标题
|
||||
lines = text.split('\n')
|
||||
# 合并连续非空行为段落,空行分段,标题行独立
|
||||
current_para = []
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
# 空行 → 结束当前段落
|
||||
if current_para:
|
||||
page_parts.append(f'<p>{self._escape_html("".join(current_para))}</p>')
|
||||
current_para = []
|
||||
continue
|
||||
# 简单的标题检测:短行 + 无标点结尾
|
||||
is_heading = (len(line) < 40 and not line.endswith(('。', ',', ';', '、', ':', ',', '.', ';'))
|
||||
and not line.startswith(('(', '('))
|
||||
and re.match(r'^[一二三四五六七八九十\d]+[、..]', line))
|
||||
# 标题检测
|
||||
is_heading = (len(stripped) < 30
|
||||
and not stripped.endswith(('。', ',', ';', '、', ':', ',', '.', ';'))
|
||||
and not stripped.startswith(('(', '('))
|
||||
and re.match(r'^[一二三四五六七八九十\d]+[、..]', stripped))
|
||||
if is_heading:
|
||||
escaped = self._escape_html(line)
|
||||
page_parts.append(f'<h3>{escaped}</h3>')
|
||||
# 先输出累积的段落
|
||||
if current_para:
|
||||
page_parts.append(f'<p>{self._escape_html("".join(current_para))}</p>')
|
||||
current_para = []
|
||||
page_parts.append(f'<h3>{self._escape_html(stripped)}</h3>')
|
||||
else:
|
||||
escaped = self._escape_html(line)
|
||||
page_parts.append(f'<p>{escaped}</p>')
|
||||
current_para.append(stripped)
|
||||
# 输出最后一个段落
|
||||
if current_para:
|
||||
page_parts.append(f'<p>{self._escape_html("".join(current_para))}</p>')
|
||||
|
||||
# 渲染表格
|
||||
for table in tables:
|
||||
|
||||
Reference in New Issue
Block a user