资源
正文
其他
从官网获取的 PDF 中,不同行之间拷贝下来会留有空格,手动删起来真是太麻烦了!可用下面这段 python 代码一次性移除:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| import os import re
def remove_spaces_between_chinese(text): pattern = re.compile(r'([\u4e00-\u9fff])\s+([\u4e00-\u9fff])') return pattern.sub(r'\1\2', text)
def process_md_files(directory): for filename in os.listdir(directory): if filename.endswith(".md"): file_path = os.path.join(directory, filename) with open(file_path, 'r', encoding='utf-8') as file: content = file.read()
updated_content = remove_spaces_between_chinese(content)
if content != updated_content: with open(file_path, 'w', encoding='utf-8') as file: file.write(updated_content) print(f"Processed: {filename}")
directory_path = "your_directory_path_here" process_md_files(directory_path)
|