245 lines
		
	
	
		
			9.1 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
			
		
		
	
	
			245 lines
		
	
	
		
			9.1 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
| #! /usr/bin/python
 | |
| 
 | |
| # Copyright (C) 2009-2012, International Business Machines Corporation, Google and Others.
 | |
| # All rights reserved.
 | |
| 
 | |
| #
 | |
| #  Script to check and fix svn property settings for CLDR source files.
 | |
| #  This script is a modified version of ICU's icu-svnprops-check.py.
 | |
| #  Also check for the correct line endings on files with svn:eol-style = native
 | |
| #
 | |
| #  THIS SCRIPT DOES NOT WORK ON WINDOWS
 | |
| #     It only works correctly on platforms where the native line ending is a plain \n
 | |
| #
 | |
| #  usage:
 | |
| #     cldr-svnprops-check.py  [options]
 | |
| #
 | |
| #  options:
 | |
| #     -f | --fix     Fix any problems that are found
 | |
| #     -h | --help    Print a usage line and exit.
 | |
| #
 | |
| #  The tool operates recursively on the directory from which it is run.
 | |
| #  Only files from the svn repository are checked.
 | |
| #  No changes are made to the repository; only the working copy will be altered.
 | |
| 
 | |
| import sys
 | |
| import os
 | |
| import os.path
 | |
| import re
 | |
| import getopt
 | |
| 
 | |
| #
 | |
| #  svn autoprops definitions.
 | |
| #      Copy and paste here the ICU recommended auto-props from
 | |
| #      http://icu-project.org/docs/subversion_howto/index.html
 | |
| #
 | |
| #  This program will parse this autoprops string, and verify that files in
 | |
| #  the repository have the recommeded properties set.
 | |
| #
 | |
| svn_auto_props = """
 | |
| ### Section for configuring automatic properties.
 | |
| [auto-props]
 | |
| ### The format of the entries is:
 | |
| ###   file-name-pattern = propname[=value][;propname[=value]...]
 | |
| ### The file-name-pattern can contain wildcards (such as '*' and
 | |
| ### '?').  All entries which match will be applied to the file.
 | |
| ### Note that auto-props functionality must be enabled, which
 | |
| ### is typically done by setting the 'enable-auto-props' option.
 | |
| *.c = svn:eol-style=native
 | |
| *.cc = svn:eol-style=native
 | |
| *.cpp = svn:eol-style=native
 | |
| *.h = svn:eol-style=native
 | |
| *.rc = svn:eol-style=native
 | |
| *.dsp = svn:eol-style=native
 | |
| *.dsw = svn:eol-style=native
 | |
| *.sln = svn:eol-style=native
 | |
| *.vcproj = svn:eol-style=native
 | |
| configure = svn:eol-style=native;svn:executable
 | |
| *.sh = svn:eol-style=native;svn:executable
 | |
| *.pl = svn:eol-style=native;svn:executable
 | |
| *.py = svn:eol-style=native;svn:executable
 | |
| *.txt = svn:mime-type=text/plain;svn:eol-style=native
 | |
| *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
 | |
| *.ucm = svn:eol-style=native
 | |
| *.html = svn:eol-style=native;svn:mime-type=text/html
 | |
| *.htm = svn:eol-style=native;svn:mime-type=text/html
 | |
| *.xml = svn:eol-style=native
 | |
| Makefile = svn:eol-style=native
 | |
| *.in = svn:eol-style=native
 | |
| *.mak = svn:eol-style=native
 | |
| *.mk = svn:eol-style=native
 | |
| *.png = svn:mime-type=image/png
 | |
| *.jpeg = svn:mime-type=image/jpeg
 | |
| *.jpg = svn:mime-type=image/jpeg
 | |
| *.bin = svn:mime-type=application/octet-stream
 | |
| *.brk = svn:mime-type=application/octet-stream
 | |
| *.cnv = svn:mime-type=application/octet-stream
 | |
| *.dat = svn:mime-type=application/octet-stream
 | |
| *.icu = svn:mime-type=application/octet-stream
 | |
| *.res = svn:mime-type=application/octet-stream
 | |
| *.spp = svn:mime-type=application/octet-stream
 | |
| # new additions 2007-dec-5 srl
 | |
| *.rtf = mime-type=text/rtf
 | |
| *.pdf = mime-type=application/pdf
 | |
| # changed 2008-04-08: modified .txt, above, adding mime-type
 | |
| # changed 2010-11-09: modified .java, adding mime-type
 | |
| # Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1
 | |
| """
 | |
| 
 | |
| 
 | |
| # file_types:  The parsed form of the svn auto-props specification.
 | |
| #              A list of file types - .cc, .cpp, .txt, etc.
 | |
| #              each element is a [type, proplist]
 | |
| #              "type" is a regular expression string that will match a file name
 | |
| #              prop list is another list, one element per property.
 | |
| #              Each property item is a two element list, [prop name, prop value]
 | |
| file_types = list()
 | |
| 
 | |
| def parse_auto_props():
 | |
|     aprops = svn_auto_props.splitlines()
 | |
|     for propline in aprops:
 | |
|         if re.match("\s*(#.*)?$", propline):         # Match comment and blank lines
 | |
|             continue
 | |
|         if re.match("\s*\[auto-props\]", propline):  # Match the [auto-props] line.
 | |
|             continue
 | |
|         if not re.match("\s*[^\s]+\s*=", propline):  # minimal syntax check for <file-type> =
 | |
|             print "Bad line from autoprops definitions: " + propline
 | |
|             continue
 | |
|         file_type, string_proplist = propline.split("=", 1)
 | |
| 
 | |
|         #transform the file type expression from autoprops into a normal regular expression.
 | |
|         #  e.g.  "*.cpp"  ==>  ".*\.cpp$"
 | |
|         file_type = file_type.strip()
 | |
|         file_type = file_type.replace(".", "\.")
 | |
|         file_type = file_type.replace("*", ".*")
 | |
|         file_type = file_type + "$"
 | |
| 
 | |
|         # example string_proplist at this point: " svn:eol-style=native;svn:executable"
 | |
|         # split on ';' into a list of properties.  The negative lookahead and lookbehind
 | |
|         # in the split regexp are to prevent matching on ';;', which is an escaped ';'
 | |
|         # within a property value.
 | |
|         string_proplist = re.split("(?<!;);(?!;)", string_proplist)
 | |
|         proplist = list()
 | |
|         for prop in string_proplist:
 | |
|             if prop.find("=") >= 0:
 | |
|                 prop_name, prop_val = prop.split("=", 1)
 | |
|             else:
 | |
|                 # properties with no explicit value, e.g. svn:executable
 | |
|                 prop_name, prop_val = prop, ""
 | |
|             prop_name = prop_name.strip()
 | |
|             prop_val = prop_val.strip()
 | |
|             # unescape any ";;" in a property value, e.g. the mime-type from
 | |
|             #    *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
 | |
|             prop_val = prop_val.replace(";;", ";");
 | |
|             proplist.append((prop_name, prop_val))
 | |
| 
 | |
|         file_types.append((file_type, proplist))
 | |
|     # print file_types
 | |
| 
 | |
|         
 | |
| def runCommand(cmd):
 | |
|     output_file = os.popen(cmd);
 | |
|     output_text = output_file.read();
 | |
|     exit_status = output_file.close();
 | |
|     if exit_status:
 | |
|         print >>sys.stderr, '"', cmd, '" failed.  Exiting.'
 | |
|         sys.exit(exit_status)
 | |
|     return output_text
 | |
| 
 | |
| 
 | |
| def usage():
 | |
|     print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
 | |
| 
 | |
|     
 | |
| #
 | |
| #  UTF-8 file check.   For text files, add a charset to the mime-type if their contents are UTF-8
 | |
| #    file_name:        name of a text file.
 | |
| #    base_mime_type:   svn:mime-type property value from the auto-props file (no charset= part)
 | |
| #    actual_mime_type: existing svn:mime-type property value for the file.
 | |
| #    return:           svn:mime-type property value, with charset added when appropriate.
 | |
| #
 | |
| def check_utf8(file_name, base_mime_type, actual_mime_type):
 | |
| 
 | |
|     # If the file already has a charset in its mime-type, don't make any change.
 | |
| 
 | |
|     if actual_mime_type.find("charset=") > 0:
 | |
|         return actual_mime_type;
 | |
| 
 | |
|     f = open(file_name, 'r')
 | |
|     bytes = f.read()
 | |
|     f.close()
 | |
| 
 | |
|     if all(ord(byte) < 128 for byte in bytes):
 | |
|         # pure ASCII.
 | |
|         # print "Pure ASCII " + file_name
 | |
|         return base_mime_type
 | |
| 
 | |
|     try:
 | |
|         bytes.decode("UTF-8")
 | |
|     except UnicodeDecodeError:
 | |
|         print "warning: %s: not ASCII, not UTF-8" % file_name
 | |
|         return base_mime_type
 | |
| 
 | |
|     if ord(bytes[0]) == 0xef:
 | |
|       print "UTF-8 file with BOM: " + file_name
 | |
| 
 | |
|     # Append charset=utf-8.
 | |
|     return base_mime_type + ';charset=utf-8'
 | |
| 
 | |
| 
 | |
| def main(argv):
 | |
|     fix_problems = False;
 | |
|     try:
 | |
|         opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
 | |
|     except getopt.GetoptError:
 | |
|         print "unrecognized option: " + argv[0]
 | |
|         usage()
 | |
|         sys.exit(2)
 | |
|     for opt, arg in opts:
 | |
|         if opt in ("-h", "--help"):
 | |
|             usage()
 | |
|             sys.exit()
 | |
|         if opt in ("-f", "--fix"):
 | |
|             fix_problems = True
 | |
|     if args:
 | |
|         print "unexpected command line argument"
 | |
|         usage()
 | |
|         sys.exit()
 | |
| 
 | |
|     parse_auto_props()
 | |
|     output = runCommand("svn ls -R ");
 | |
|     file_list = output.splitlines()
 | |
| 
 | |
|     for f in file_list:
 | |
|         if os.path.isdir(f):
 | |
|             # print "Skipping dir " + f
 | |
|             continue
 | |
|         if not os.path.isfile(f):
 | |
|             print "Repository file not in working copy: " + f
 | |
|             continue;
 | |
| 
 | |
|         for file_pattern, props in file_types:
 | |
|             if re.match(file_pattern, f):
 | |
|                 # print "doing " + f
 | |
|                 for propname, propval in props:
 | |
|                     actual_propval = runCommand("svn propget --strict " + propname + " '" + f + "'")
 | |
|                     #print propname + ": " + actual_propval
 | |
|                     if propname == "svn:mime-type" and propval.find("text/") == 0:
 | |
|                         # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8
 | |
|                         propval = check_utf8(f, propval, actual_propval)
 | |
|                     if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
 | |
|                         print "svn propset %s '%s' '%s'" % (propname, propval, f)
 | |
|                         if fix_problems:
 | |
|                             os.system("svn propset %s '%s' '%s'" % (propname, propval, f))
 | |
|                     if propname == "svn:eol-style" and propval == "native":
 | |
|                         if os.system("grep -q -v \r '" + f + "'"):
 | |
|                             if fix_problems:
 | |
|                                 print f + ": Removing DOS CR characters."
 | |
|                                 os.system("sed -i s/\r// '" + f + "'");
 | |
|                             else:
 | |
|                                 print f + " contains DOS CR characters."
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main(sys.argv[1:])
 |