ext/scintilla/src/LexGen.py

   1 #!/usr/bin/env python
   2 # LexGen.py - implemented 2002 by Neil Hodgson neilh@scintilla.org
   3 # Released to the public domain.
   4
   5 # Regenerate the Scintilla and SciTE source files that list
   6 # all the lexers and all the properties files.
   7 # Should be run whenever a new lexer is added or removed.
   8 # Requires Python 2.4 or later
   9 # Most files are regenerated in place with templates stored in comments.
  10 # The VS .NET project file is generated into a different file as the
  11 # VS .NET environment will not retain comments when modifying the file.
  12 # The files are copied to a string apart from sections between a
  13 # ++Autogenerated comment and a --Autogenerated comment which is
  14 # generated by the CopyWithInsertion function. After the whole
  15 # string is instantiated, it is compared with the target file and
  16 # if different the file is rewritten.
  17 # Does not regenerate the Visual C++ 6 project files but does the VS .NET
  18 # project file.
  19
  20 import string
  21 import sys
  22 import os
  23 import glob
  24
  25 # EOL constants
  26 CR = "\r"
  27 LF = "\n"
  28 CRLF = "\r\n"
  29 if sys.platform == "win32":
  30     NATIVE = CRLF
  31 else:
  32     # Yes, LF is the native EOL even on Mac OS X. CR is just for
  33     # Mac OS <=9 (a.k.a. "Mac Classic")
  34     NATIVE = LF
  35
  36 # Automatically generated sections contain start and end comments,
  37 # a definition line and the results.
  38 # The results are replaced by regenerating based on the definition line.
  39 # The definition line is a comment prefix followed by "**".
  40 # If there is a digit after the ** then this indicates which list to use
  41 # and the digit and next character are not part of the definition
  42 # Backslash is used as an escape within the definition line.
  43 # The part between \( and \) is repeated for each item in the list.
  44 # \* is replaced by each list item. \t, and \n are tab and newline.
  45 def CopyWithInsertion(input, commentPrefix, retainDefs, eolType, *lists):
  46     copying = 1
  47     listid = 0
  48     output = []
  49     for line in input.splitlines(0):
  50         isStartGenerated = line.startswith(commentPrefix + "++Autogenerated")
  51         if copying and not isStartGenerated:
  52             output.append(line)
  53         if isStartGenerated:
  54             if retainDefs:
  55                 output.append(line)
  56             copying = 0
  57             definition = ""
  58         elif not copying and line.startswith(commentPrefix + "**"):
  59             if retainDefs:
  60                 output.append(line)
  61             definition = line[len(commentPrefix + "**"):]
  62             if (commentPrefix == "<!--") and (" -->" in definition):
  63                 definition = definition.replace(" -->", "")
  64             listid = 0
  65             if definition[0] in string.digits:
  66                 listid = int(definition[:1])
  67                 definition = definition[2:]
  68             # Hide double slashes as a control character
  69             definition = definition.replace("\\\\", "\001")
  70             # Do some normal C style transforms
  71             definition = definition.replace("\\n", "\n")
  72             definition = definition.replace("\\t", "\t")
  73             # Get the doubled backslashes back as single backslashes
  74             definition = definition.replace("\001", "\\")
  75             startRepeat = definition.find("\\(")
  76             endRepeat = definition.find("\\)")
  77             intro = definition[:startRepeat]
  78             out = ""
  79             if intro.endswith("\n"):
  80                 pos = 0
  81             else:
  82                 pos = len(intro)
  83             out += intro
  84             middle = definition[startRepeat+2:endRepeat]
  85             for i in lists[listid]:
  86                 item = middle.replace("\\*", i)
  87                 if pos and (pos + len(item) >= 80):
  88                     out += "\\\n"
  89                     pos = 0
  90                 out += item
  91                 pos += len(item)
  92                 if item.endswith("\n"):
  93                     pos = 0
  94             outro = definition[endRepeat+2:]
  95             out += outro
  96             out = out.replace("\n", eolType) # correct EOLs in generated content
  97             output.append(out)
  98         elif line.startswith(commentPrefix + "--Autogenerated"):
  99             copying = 1
 100             if retainDefs:
 101                 output.append(line)
 102     output = [line.rstrip(" \t") for line in output] # trim trailing whitespace
 103     return eolType.join(output) + eolType
 104
 105 def UpdateFile(filename, updated):
 106     """ If the file is different to updated then copy updated
 107     into the file else leave alone so CVS and make don't treat
 108     it as modified. """
 109     try:
 110         infile = open(filename, "rb")
 111     except IOError:     # File is not there yet
 112         out = open(filename, "wb")
 113         out.write(updated.encode('utf-8'))
 114         out.close()
 115         print("New %s" % filename)
 116         return
 117     original = infile.read()
 118     infile.close()
 119     original = original.decode('utf-8')
 120     if updated != original:
 121         os.unlink(filename)
 122         out = open(filename, "wb")
 123         out.write(updated.encode('utf-8'))
 124         out.close()
 125         print("Changed %s " % filename)
 126     #~ else:
 127         #~ print "Unchanged", filename
 128
 129 def Generate(inpath, outpath, commentPrefix, eolType, *lists):
 130     """Generate 'outpath' from 'inpath'.
 131
 132         "eolType" indicates the type of EOLs to use in the generated
 133             file. It should be one of following constants: LF, CRLF,
 134             CR, or NATIVE.
 135     """
 136     #print "generate '%s' -> '%s' (comment prefix: %r, eols: %r)"\
 137     #      % (inpath, outpath, commentPrefix, eolType)
 138     try:
 139         infile = open(inpath, "rb")
 140     except IOError:
 141         print("Can not open %s" % inpath)
 142         return
 143     original = infile.read()
 144     infile.close()
 145     original = original.decode('utf-8')
 146     updated = CopyWithInsertion(original, commentPrefix,
 147         inpath == outpath, eolType, *lists)
 148     UpdateFile(outpath, updated)
 149
 150 def Regenerate(filename, commentPrefix, eolType, *lists):
 151     """Regenerate the given file.
 152
 153         "eolType" indicates the type of EOLs to use in the generated
 154             file. It should be one of following constants: LF, CRLF,
 155             CR, or NATIVE.
 156     """
 157     Generate(filename, filename, commentPrefix, eolType, *lists)
 158
 159 def FindModules(lexFile):
 160     modules = []
 161     f = open(lexFile)
 162     for l in f.readlines():
 163         if l.startswith("LexerModule"):
 164             l = l.replace("(", " ")
 165             modules.append(l.split()[1])
 166     return modules
 167
 168 # Properties that start with lexer. or fold. are automatically found but there are some
 169 # older properties that don't follow this pattern so must be explicitly listed.
 170 knownIrregularProperties = [
 171     "fold",
 172     "styling.within.preprocessor",
 173     "tab.timmy.whinge.level",
 174     "asp.default.language",
 175     "html.tags.case.sensitive",
 176     "ps.level",
 177     "ps.tokenize",
 178     "sql.backslash.escapes",
 179     "nsis.uservars",
 180     "nsis.ignorecase"
 181 ]
 182
 183 def FindProperties(lexFile):
 184     properties = {}
 185     f = open(lexFile)
 186     for l in f.readlines():
 187         if ("GetProperty" in l or "DefineProperty" in l) and "\"" in l:
 188             l = l.strip()
 189             if not l.startswith("//"):  # Drop comments
 190                 propertyName = l.split("\"")[1]
 191                 if propertyName.lower() == propertyName:
 192                     # Only allow lower case property names
 193                     if propertyName in knownIrregularProperties or \
 194                         propertyName.startswith("fold.") or \
 195                         propertyName.startswith("lexer."):
 196                         properties[propertyName] = 1
 197     return properties
 198
 199 def FindPropertyDocumentation(lexFile):
 200     documents = {}
 201     f = open(lexFile)
 202     name = ""
 203     for l in f.readlines():
 204         l = l.strip()
 205         if "// property " in l:
 206             propertyName = l.split()[2]
 207             if propertyName.lower() == propertyName:
 208                 # Only allow lower case property names
 209                 name = propertyName
 210                 documents[name] = ""
 211         elif "DefineProperty" in l and "\"" in l:
 212             propertyName = l.split("\"")[1]
 213             if propertyName.lower() == propertyName:
 214                 # Only allow lower case property names
 215                 name = propertyName
 216                 documents[name] = ""
 217         elif name:
 218             if l.startswith("//"):
 219                 if documents[name]:
 220                     documents[name] += " "
 221                 documents[name] += l[2:].strip()
 222             elif l.startswith("\""):
 223                 l = l[1:].strip()
 224                 if l.endswith(";"):
 225                     l = l[:-1].strip()
 226                 if l.endswith(")"):
 227                     l = l[:-1].strip()
 228                 if l.endswith("\""):
 229                     l = l[:-1]
 230                 # Fix escaped double quotes
 231                 l = l.replace("\\\"", "\"")
 232                 documents[name] += l
 233             else:
 234                 name = ""
 235     for name in list(documents.keys()):
 236         if documents[name] == "":
 237             del documents[name]
 238     return documents
 239
 240 def ciCompare(a,b):
 241     return cmp(a.lower(), b.lower())
 242
 243 def ciKey(a):
 244     return a.lower()
 245
 246 def sortListInsensitive(l):
 247     try:    # Try key function
 248         l.sort(key=ciKey)
 249     except TypeError:    # Earlier version of Python, so use comparison function
 250         l.sort(ciCompare)
 251
 252 def RegenerateAll():
 253     root="../../"
 254
 255     # Find all the lexer source code files
 256     lexFilePaths = glob.glob(root + "scintilla/lexers/Lex*.cxx")
 257     sortListInsensitive(lexFilePaths)
 258     lexFiles = [os.path.basename(f)[:-4] for f in lexFilePaths]
 259     print(lexFiles)
 260     lexerModules = []
 261     lexerProperties = {}
 262     propertyDocuments = {}
 263     for lexFile in lexFilePaths:
 264         lexerModules.extend(FindModules(lexFile))
 265         for k in FindProperties(lexFile).keys():
 266             lexerProperties[k] = 1
 267         documents = FindPropertyDocumentation(lexFile)
 268         for k in documents.keys():
 269             propertyDocuments[k] = documents[k]
 270     sortListInsensitive(lexerModules)
 271     lexerProperties = list(lexerProperties.keys())
 272     sortListInsensitive(lexerProperties)
 273
 274     # Generate HTML to document each property
 275     # This is done because tags can not be safely put inside comments in HTML
 276     documentProperties = list(propertyDocuments.keys())
 277     sortListInsensitive(documentProperties)
 278     propertiesHTML = []
 279     for k in documentProperties:
 280         propertiesHTML.append("\t<tr>\n\t<td>%s</td>\n\t<td>%s</td>\n\t</tr>" %
 281             (k, propertyDocuments[k]))
 282
 283     # Find all the SciTE properties files
 284     otherProps = ["abbrev.properties", "Embedded.properties", "SciTEGlobal.properties", "SciTE.properties"]
 285     if os.path.exists(root + "scite"):
 286         propFilePaths = glob.glob(root + "scite/src/*.properties")
 287         sortListInsensitive(propFilePaths)
 288         propFiles = [os.path.basename(f) for f in propFilePaths if os.path.basename(f) not in otherProps]
 289         sortListInsensitive(propFiles)
 290         print(propFiles)
 291
 292     Regenerate(root + "scintilla/src/Catalogue.cxx", "//", NATIVE, lexerModules)
 293     Regenerate(root + "scintilla/win32/scintilla.mak", "#", NATIVE, lexFiles)
 294     Regenerate(root + "scintilla/win32/scintilla_vc6.mak", "#", NATIVE, lexFiles)
 295     if os.path.exists(root + "scite"):
 296         Regenerate(root + "scite/win32/makefile", "#", NATIVE, propFiles)
 297         Regenerate(root + "scite/win32/scite.mak", "#", NATIVE, propFiles)
 298         Regenerate(root + "scite/src/SciTEProps.cxx", "//", NATIVE, lexerProperties)
 299         Regenerate(root + "scite/doc/SciTEDoc.html", "<!--", NATIVE, propertiesHTML)
 300         Generate(root + "scite/boundscheck/vcproj.gen",
 301          root + "scite/boundscheck/SciTE.vcproj", "#", NATIVE, lexFiles)
 302
 303 RegenerateAll()