python脚本提取flutter项目的中文字符生成表格文件
- 2020 年 3 月 6 日
- 筆記
思路: # 获取文件内连续的中文字符(筛选规则为冒号内的字符串如果出现u4e00到u9fa5字符串 提取冒号内容作为目标文本 (.*?)非贪婪模式)
用法:运行main.py,生成result excel文件。
library requirements
pip3 install xlwt
main.py
import os import datetime import re import xlwt class FileRead: def __init__(self): self.root_dir = r"C:UsersAdministratorDesktopflutteri2-school-applib" # 递归获取所有文件 def find_file(self, dir_path): file_list = [] if len(dir_path) == 0: path = os.path.join(self.root_dir, dir_path) else: path = dir_path temp_list = os.listdir(path) for file in temp_list: if os.path.isfile(os.path.join(path, file)): file_list.append(os.path.join(path, file)) else: file_list.extend(self.find_file(os.path.join(path, file))) return file_list # 获取文件内连续的中文字符(不包括注释,筛选规则为冒号内的字符串如果出现u4e00到u9fa5 则提取冒号内容作为目标文本 (.*?)非贪婪模式) def find_chinese_in_file(self, file): file = open(file, mode='r', encoding='UTF-8') file_content = file.read() result = set( ) # 正则匹配规则1 # 中文的编码范围是:u4e00到u9fa5 patten = re.compile(r''([u4e00-u9fa5].*?)'') result.update(patten.findall(file_content)) # 正则匹配规则2 patten = re.compile(r'"([u4e00-u9fa5].*?)"') result.update(patten.findall(file_content)) # 清除空格 for text in result.copy(): if len(str.strip(text)) == 0: result.remove(text) file.close() return result class ExeclHelper: def set_style(self, name, height, bold=False): style = xlwt.XFStyle() # 初始化样式 font = xlwt.Font() # 为样式创建字体 font.name = name # 'Times New Roman' font.bold = bold font.color_index = 4 font.height = height style.font = font return style def write_execl(self, result): # 格式化输出 f = xlwt.Workbook() # 创建sheet sheet1 = f.add_sheet(u'translate', cell_overwrite_ok=True) row0 = [u'chinese', u'english'] # 生成第一行 for i in range(0, len(row0)): sheet1.write(0, i, row0[i], self.set_style('Times New Roman', 220, True)) # 填充数据 for i in range(0, len(result)): sheet1.write(i+1, 0, result[i]) # 设置宽度 first_col = sheet1.col(0) first_col.width = 256*40 second_col = sheet1.col(1) second_col.width = 256*40 f.save("result.xls") if __name__ == '__main__': print(datetime.datetime.now()) file_read = FileRead() file_path_list = file_read.find_file("") # 找出所有中文字符 result = [] for file_path in file_path_list: result.extend(file_read.find_chinese_in_file(file_path)) execl_helper = ExeclHelper() execl_helper.write_execl(result) print(datetime.datetime.now())