updated Scintilla to 2.29
[TortoiseGit.git] / ext / scintilla / src / LexGen.py
blobe86f333baa3eed221cfd093c47465f1de7497b3c
1 #!/usr/bin/env python
2 # LexGen.py - implemented 2002 by Neil Hodgson neilh@scintilla.org
3 # Released to the public domain.
5 # Regenerate the Scintilla and SciTE source files that list
6 # all the lexers and all the properties files.
7 # Should be run whenever a new lexer is added or removed.
8 # Requires Python 2.4 or later
9 # Most files are regenerated in place with templates stored in comments.
10 # The VS .NET project file is generated into a different file as the
11 # VS .NET environment will not retain comments when modifying the file.
12 # The files are copied to a string apart from sections between a
13 # ++Autogenerated comment and a --Autogenerated comment which is
14 # generated by the CopyWithInsertion function. After the whole
15 # string is instantiated, it is compared with the target file and
16 # if different the file is rewritten.
17 # Does not regenerate the Visual C++ 6 project files but does the VS .NET
18 # project file.
20 import string
21 import sys
22 import os
23 import glob
25 # EOL constants
26 CR = "\r"
27 LF = "\n"
28 CRLF = "\r\n"
29 if sys.platform == "win32":
30 NATIVE = CRLF
31 else:
32 # Yes, LF is the native EOL even on Mac OS X. CR is just for
33 # Mac OS <=9 (a.k.a. "Mac Classic")
34 NATIVE = LF
36 # Automatically generated sections contain start and end comments,
37 # a definition line and the results.
38 # The results are replaced by regenerating based on the definition line.
39 # The definition line is a comment prefix followed by "**".
40 # If there is a digit after the ** then this indicates which list to use
41 # and the digit and next character are not part of the definition
42 # Backslash is used as an escape within the definition line.
43 # The part between \( and \) is repeated for each item in the list.
44 # \* is replaced by each list item. \t, and \n are tab and newline.
45 def CopyWithInsertion(input, commentPrefix, retainDefs, eolType, *lists):
46 copying = 1
47 listid = 0
48 output = []
49 for line in input.splitlines(0):
50 isStartGenerated = line.startswith(commentPrefix + "++Autogenerated")
51 if copying and not isStartGenerated:
52 output.append(line)
53 if isStartGenerated:
54 if retainDefs:
55 output.append(line)
56 copying = 0
57 definition = ""
58 elif not copying and line.startswith(commentPrefix + "**"):
59 if retainDefs:
60 output.append(line)
61 definition = line[len(commentPrefix + "**"):]
62 if (commentPrefix == "<!--") and (" -->" in definition):
63 definition = definition.replace(" -->", "")
64 listid = 0
65 if definition[0] in string.digits:
66 listid = int(definition[:1])
67 definition = definition[2:]
68 # Hide double slashes as a control character
69 definition = definition.replace("\\\\", "\001")
70 # Do some normal C style transforms
71 definition = definition.replace("\\n", "\n")
72 definition = definition.replace("\\t", "\t")
73 # Get the doubled backslashes back as single backslashes
74 definition = definition.replace("\001", "\\")
75 startRepeat = definition.find("\\(")
76 endRepeat = definition.find("\\)")
77 intro = definition[:startRepeat]
78 out = ""
79 if intro.endswith("\n"):
80 pos = 0
81 else:
82 pos = len(intro)
83 out += intro
84 middle = definition[startRepeat+2:endRepeat]
85 for i in lists[listid]:
86 item = middle.replace("\\*", i)
87 if pos and (pos + len(item) >= 80):
88 out += "\\\n"
89 pos = 0
90 out += item
91 pos += len(item)
92 if item.endswith("\n"):
93 pos = 0
94 outro = definition[endRepeat+2:]
95 out += outro
96 out = out.replace("\n", eolType) # correct EOLs in generated content
97 output.append(out)
98 elif line.startswith(commentPrefix + "--Autogenerated"):
99 copying = 1
100 if retainDefs:
101 output.append(line)
102 output = [line.rstrip(" \t") for line in output] # trim trailing whitespace
103 return eolType.join(output) + eolType
105 def UpdateFile(filename, updated):
106 """ If the file is different to updated then copy updated
107 into the file else leave alone so CVS and make don't treat
108 it as modified. """
109 try:
110 infile = open(filename, "rb")
111 except IOError: # File is not there yet
112 out = open(filename, "wb")
113 out.write(updated.encode('utf-8'))
114 out.close()
115 print("New %s" % filename)
116 return
117 original = infile.read()
118 infile.close()
119 original = original.decode('utf-8')
120 if updated != original:
121 os.unlink(filename)
122 out = open(filename, "wb")
123 out.write(updated.encode('utf-8'))
124 out.close()
125 print("Changed %s " % filename)
126 #~ else:
127 #~ print "Unchanged", filename
129 def Generate(inpath, outpath, commentPrefix, eolType, *lists):
130 """Generate 'outpath' from 'inpath'.
132 "eolType" indicates the type of EOLs to use in the generated
133 file. It should be one of following constants: LF, CRLF,
134 CR, or NATIVE.
136 #print "generate '%s' -> '%s' (comment prefix: %r, eols: %r)"\
137 # % (inpath, outpath, commentPrefix, eolType)
138 try:
139 infile = open(inpath, "rb")
140 except IOError:
141 print("Can not open %s" % inpath)
142 return
143 original = infile.read()
144 infile.close()
145 original = original.decode('utf-8')
146 updated = CopyWithInsertion(original, commentPrefix,
147 inpath == outpath, eolType, *lists)
148 UpdateFile(outpath, updated)
150 def Regenerate(filename, commentPrefix, eolType, *lists):
151 """Regenerate the given file.
153 "eolType" indicates the type of EOLs to use in the generated
154 file. It should be one of following constants: LF, CRLF,
155 CR, or NATIVE.
157 Generate(filename, filename, commentPrefix, eolType, *lists)
159 def FindModules(lexFile):
160 modules = []
161 f = open(lexFile)
162 for l in f.readlines():
163 if l.startswith("LexerModule"):
164 l = l.replace("(", " ")
165 modules.append(l.split()[1])
166 return modules
168 # Properties that start with lexer. or fold. are automatically found but there are some
169 # older properties that don't follow this pattern so must be explicitly listed.
170 knownIrregularProperties = [
171 "fold",
172 "styling.within.preprocessor",
173 "tab.timmy.whinge.level",
174 "asp.default.language",
175 "html.tags.case.sensitive",
176 "ps.level",
177 "ps.tokenize",
178 "sql.backslash.escapes",
179 "nsis.uservars",
180 "nsis.ignorecase"
183 def FindProperties(lexFile):
184 properties = {}
185 f = open(lexFile)
186 for l in f.readlines():
187 if ("GetProperty" in l or "DefineProperty" in l) and "\"" in l:
188 l = l.strip()
189 if not l.startswith("//"): # Drop comments
190 propertyName = l.split("\"")[1]
191 if propertyName.lower() == propertyName:
192 # Only allow lower case property names
193 if propertyName in knownIrregularProperties or \
194 propertyName.startswith("fold.") or \
195 propertyName.startswith("lexer."):
196 properties[propertyName] = 1
197 return properties
199 def FindPropertyDocumentation(lexFile):
200 documents = {}
201 f = open(lexFile)
202 name = ""
203 for l in f.readlines():
204 l = l.strip()
205 if "// property " in l:
206 propertyName = l.split()[2]
207 if propertyName.lower() == propertyName:
208 # Only allow lower case property names
209 name = propertyName
210 documents[name] = ""
211 elif "DefineProperty" in l and "\"" in l:
212 propertyName = l.split("\"")[1]
213 if propertyName.lower() == propertyName:
214 # Only allow lower case property names
215 name = propertyName
216 documents[name] = ""
217 elif name:
218 if l.startswith("//"):
219 if documents[name]:
220 documents[name] += " "
221 documents[name] += l[2:].strip()
222 elif l.startswith("\""):
223 l = l[1:].strip()
224 if l.endswith(";"):
225 l = l[:-1].strip()
226 if l.endswith(")"):
227 l = l[:-1].strip()
228 if l.endswith("\""):
229 l = l[:-1]
230 # Fix escaped double quotes
231 l = l.replace("\\\"", "\"")
232 documents[name] += l
233 else:
234 name = ""
235 for name in list(documents.keys()):
236 if documents[name] == "":
237 del documents[name]
238 return documents
240 def ciCompare(a,b):
241 return cmp(a.lower(), b.lower())
243 def ciKey(a):
244 return a.lower()
246 def sortListInsensitive(l):
247 try: # Try key function
248 l.sort(key=ciKey)
249 except TypeError: # Earlier version of Python, so use comparison function
250 l.sort(ciCompare)
252 def RegenerateAll():
253 root="../../"
255 # Find all the lexer source code files
256 lexFilePaths = glob.glob(root + "scintilla/lexers/Lex*.cxx")
257 sortListInsensitive(lexFilePaths)
258 lexFiles = [os.path.basename(f)[:-4] for f in lexFilePaths]
259 print(lexFiles)
260 lexerModules = []
261 lexerProperties = {}
262 propertyDocuments = {}
263 for lexFile in lexFilePaths:
264 lexerModules.extend(FindModules(lexFile))
265 for k in FindProperties(lexFile).keys():
266 lexerProperties[k] = 1
267 documents = FindPropertyDocumentation(lexFile)
268 for k in documents.keys():
269 propertyDocuments[k] = documents[k]
270 sortListInsensitive(lexerModules)
271 lexerProperties = list(lexerProperties.keys())
272 sortListInsensitive(lexerProperties)
274 # Generate HTML to document each property
275 # This is done because tags can not be safely put inside comments in HTML
276 documentProperties = list(propertyDocuments.keys())
277 sortListInsensitive(documentProperties)
278 propertiesHTML = []
279 for k in documentProperties:
280 propertiesHTML.append("\t<tr>\n\t<td>%s</td>\n\t<td>%s</td>\n\t</tr>" %
281 (k, propertyDocuments[k]))
283 # Find all the SciTE properties files
284 otherProps = ["abbrev.properties", "Embedded.properties", "SciTEGlobal.properties", "SciTE.properties"]
285 if os.path.exists(root + "scite"):
286 propFilePaths = glob.glob(root + "scite/src/*.properties")
287 sortListInsensitive(propFilePaths)
288 propFiles = [os.path.basename(f) for f in propFilePaths if os.path.basename(f) not in otherProps]
289 sortListInsensitive(propFiles)
290 print(propFiles)
292 Regenerate(root + "scintilla/src/Catalogue.cxx", "//", NATIVE, lexerModules)
293 Regenerate(root + "scintilla/win32/scintilla.mak", "#", NATIVE, lexFiles)
294 Regenerate(root + "scintilla/win32/scintilla_vc6.mak", "#", NATIVE, lexFiles)
295 if os.path.exists(root + "scite"):
296 Regenerate(root + "scite/win32/makefile", "#", NATIVE, propFiles)
297 Regenerate(root + "scite/win32/scite.mak", "#", NATIVE, propFiles)
298 Regenerate(root + "scite/src/SciTEProps.cxx", "//", NATIVE, lexerProperties)
299 Regenerate(root + "scite/doc/SciTEDoc.html", "<!--", NATIVE, propertiesHTML)
300 Generate(root + "scite/boundscheck/vcproj.gen",
301 root + "scite/boundscheck/SciTE.vcproj", "#", NATIVE, lexFiles)
303 RegenerateAll()