formatting.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. #
  2. # File : formatting.py
  3. # This file is part of RT-Thread RTOS
  4. # COPYRIGHT (C) 2006 - 2018, RT-Thread Development Team
  5. #
  6. # This program is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation; either version 2 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License along
  17. # with this program; if not, write to the Free Software Foundation, Inc.,
  18. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  19. #
  20. # Change Logs:
  21. # Date Author Notes
  22. # 2021-03-02 Meco Man The first version
  23. # 2021-03-04 Meco Man 增加统一转换成UTF-8编码格式功能
  24. #本文件会自动对指定路径下的所有文件包括子文件夹的文件(仅针对.c.h)进行扫描
  25. # 1)将源文件编码统一为UTF-8;
  26. # 2)将TAB键替换为空格;
  27. # 3)将每行末尾多余的空格删除,并统一换行符为'\n';
  28. #使用时只需要双击本文件,输入要扫描的文件夹路径即可
  29. #不能保证100%全部成功转换为UTF-8,有一些编码特殊或识别不准确会在终端打印信息,需人工转换
  30. #欢迎对本文件的功能继续做出补充,欢迎提交PR
  31. import os
  32. import chardet
  33. #用空格代替TAB键
  34. #这里并不是简单的将TAB替换成4个空格
  35. #空格个数到底是多少需要计算,因为TAB制表本身有自动对齐的功能
  36. def tab2spaces(line):
  37. list_str = list(line) #字符串打散成列表,放边操作
  38. i = list_str.count('\t')
  39. while i > 0:
  40. ptr = list_str.index('\t')
  41. del list_str[ptr]
  42. space_need_to_insert = 4 - (ptr%4)
  43. j = 0
  44. while j < space_need_to_insert:
  45. list_str.insert(ptr,' ')
  46. j = j+1
  47. i = i-1
  48. line = ''.join(list_str) #列表恢复成字符串
  49. return line
  50. #删除每行末尾多余的空格 统一使用\n作为结尾
  51. def formattail(line):
  52. line = line.rstrip()
  53. line = line + '\n'
  54. return line
  55. #对单个文件进行格式整理
  56. def format_codes(filename):
  57. try:
  58. file=open(filename,'r',encoding = 'utf-8')
  59. file_temp=open('temp','w',encoding = 'utf-8')
  60. for line in file:
  61. line = tab2spaces(line)
  62. line = formattail(line)
  63. file_temp.write(line)
  64. file_temp.close()
  65. file.close()
  66. os.remove(filename)
  67. os.rename('temp',filename)
  68. def get_encode_info(file):
  69. with open(file, 'rb') as f:
  70. code = chardet.detect(f.read())['encoding']
  71. #charde库有一定几率对当前文件的编码识别不准确
  72. if code == 'EUC-JP': #容易将含着少量中文的英文字符文档识别为日语编码格式
  73. code = 'GB2312'
  74. elif code == 'ISO-8859-1': #部分文件GB2312码会被识别成ISO-8859-1
  75. code = 'GB2312'
  76. if not (code == 'ascii' or code == 'utf-8' or code == 'GB2312' #编码识别正确
  77. or code == 'Windows-1252'): # Windows-1252 是由于意法半导体是法国企业's的'是法语的'导致的
  78. if code != None:
  79. print('未处理,需人工确认:'+code+':'+file) #需要人工确认
  80. code = None
  81. return code
  82. #将单个文件转为UTF-8编码
  83. def conver_to_utf_8 (path):
  84. try:
  85. info = get_encode_info(path)
  86. if info == None:
  87. return 0 #0 失败
  88. file=open(path,'rb+')
  89. data = file.read()
  90. string = data.decode(info)
  91. utf = string.encode('utf-8')
  92. file.seek(0)
  93. file.write(utf)
  94. file.close()
  95. return 1 #1成功
  96. except UnicodeDecodeError:
  97. print("UnicodeDecodeError未处理,需人工确认"+path)
  98. return 0
  99. except UnicodeEncodeError:
  100. print("UnicodeEncodeError未处理,需人工确认"+path)
  101. return 0
  102. # 递归扫描目录下的所有文件
  103. def traversalallfile(path):
  104. filelist=os.listdir(path)
  105. for file in filelist:
  106. filepath=os.path.join(path,file)
  107. if os.path.isdir(filepath):
  108. traversalallfile(filepath)
  109. elif os.path.isfile(filepath):
  110. if filepath.endswith(".c") == True or filepath.endswith(".h") == True: #只处理.c和.h文件
  111. if conver_to_utf_8(filepath) == 1: #先把这个文件转为UTF-8编码,1成功
  112. format_codes(filepath) #再对这个文件进行格式整理
  113. def formatfiles():
  114. workpath = input('enter work path: ')
  115. traversalallfile(workpath)
  116. if __name__ == '__main__':
  117. formatfiles()