资源

正文

其他

从官网获取的 PDF 中，不同行之间拷贝下来会留有空格，手动删起来真是太麻烦了！可用下面这段 python 代码一次性移除：

import os
import re

def remove_spaces_between_chinese(text):
    # 正则表达式匹配中文字符之间的空格
    # 其中 u4e00-u9fff 是常见的中文字符 Unicode 范围
    pattern = re.compile(r'([\u4e00-\u9fff])\s+([\u4e00-\u9fff])')
    # 替换匹配到的空格
    return pattern.sub(r'\1\2', text)

def process_md_files(directory):
    # 遍历文件夹中的所有 .md 文件
    for filename in os.listdir(directory):
        if filename.endswith(".md"):
            file_path = os.path.join(directory, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()

            # 删除中文字符之间的空格
            updated_content = remove_spaces_between_chinese(content)

            # 如果内容有更新，则写回文件
            if content != updated_content:
                with open(file_path, 'w', encoding='utf-8') as file:
                    file.write(updated_content)
                print(f"Processed: {filename}")

# 指定你要遍历的文件夹路径
directory_path = "your_directory_path_here"
process_md_files(directory_path)