Update spell checker dictionaries
[TortoiseGit.git] / contrib / Utils / MakeUTF8.c
blob488c14671f561b46c5f910994dd3385c41c9a96a
1 /******************************************************************************
2 MakeUTF8.c
4 Copyright (C) 2002 - 2006, 2013 Simon Large
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 Description:
21 This program checks text files for the presence of a byte-order-mark (BOM)
22 and for a UTF-8 encoding indicator in the XML version tag. You can also
23 opt to add either or both of these features.
25 Use:
26 MakeUTF8 [ -b ] [ -x ] file [ file ... ]
27 Wildcard filenames are supported. Subdirectory recursion is not at present.
28 -b option adds/corrects BOM in file if not already present.
29 -x option adds/corrects XML tag if not already present.
30 With no options, the current stateis reported but nothing is changed.
32 Example:
33 MakeUTF8 -b *.xml tsvn_dug\*.xml
34 Fixes BOMs (but not XML tags) in all .xml files in the current directory,
35 and in the tsvn_dug subdirectory.
37 This program has only been built using the Microsoft Visual C++ compiler.
38 Library calls for finding files (_findfirst64) will probably need to be
39 changed in other environments.
41 No special compiler options were used. CL MakeUTF8.c works OK.
42 ******************************************************************************/
44 #ifndef _CRT_SECURE_NO_WARNINGS
45 #define _CRT_SECURE_NO_WARNINGS 1
46 #endif
48 #include <stdio.h>
49 #include <string.h>
50 #include <stdlib.h>
51 #include <io.h>
53 // Status flags returned from the file processor.
54 #define ADD_BOM 1 // BOM is missing
55 #define DOUBLE_BOM 2 // Double BOM found
56 #define XML_TAG 4 // XML tag missing, or UTF-8 not included
57 #define FIXED_BOM 64 // BOM has been added or fixed
58 #define FIXED_TAG 128 // XML tag has been added or fixed
60 char *help =
61 "MakeUTF8 Version 1.1\n"
62 "Add UTF-8 byte-order-mark and XML-tag to start of text file.\n\n"
63 "Use: MakeUTF8 [ -b ] [ -x ] file [ file ... ]\n"
64 " -b option adds/corrects BOM in file if not already present\n"
65 " -x option adds/corrects XML tag if not already present\n"
66 " With no options, just report current state\n\n";
68 int ProcessFile(const char *FName, const char *TName, int Action);
70 int main(int argc, char *argv[])
72 int n, Action = 0, Result = 0;
73 char Path[_MAX_PATH], Temp[_MAX_PATH];
74 char *FName;
75 struct __finddata64_t FileInfo;
76 intptr_t hFile;
78 if (argc < 2)
80 fprintf(stderr, "%s", help);
81 exit(0);
84 for (n = 1; n < argc; n++)
86 if (_stricmp(argv[n], "-b") == 0)
88 Action |= ADD_BOM | DOUBLE_BOM;
89 continue;
91 if (_stricmp(argv[n], "-x") == 0)
93 Action |= XML_TAG;
94 continue;
96 // Unscramble wildcard filenames
97 if ((hFile = _findfirst64(argv[n], &FileInfo)) != -1)
99 printf("BOM\tXML-tag\tFile\n");
100 printf("--------------------\n");
101 // Extract path from original argument.
102 strcpy(Path, argv[n]);
103 // Set FName to point to filename portion of path
104 FName = strrchr(Path, '\\');
105 if (FName == NULL)
107 FName = strrchr(Path, '/');
109 if (FName == NULL)
111 FName = strrchr(Path, ':');
113 if (FName == NULL)
115 FName = Path;
117 else
119 ++FName;
122 // Process all matching files.
125 if (!(FileInfo.attrib & _A_SUBDIR))
127 // Append filename to path
128 char *p;
129 strcpy(FName, FileInfo.name);
130 // Create temp filename by replacing extension with $$$
131 strcpy(Temp, Path);
132 p = strrchr(Temp, '.');
133 if (p != NULL)
135 *p = '\0'; // Trim off extension
137 strcat(Temp, ".$$$");
138 Result = ProcessFile(Path, Temp, Action);
139 if (Result < 0)
141 break; // Failed.
143 // Show results of analysis / repair
144 if (Result & ADD_BOM)
146 if (Result & FIXED_BOM)
148 p = "Added";
150 else
152 p = "None";
155 else if (Result & DOUBLE_BOM)
157 if (Result & FIXED_BOM)
159 p = "Fixed";
161 else
163 p = "Multi";
166 else
168 p = "OK";
170 printf("%s\t", p);
171 if (Result & XML_TAG)
173 if (Result & FIXED_TAG)
175 p = "Fixed";
177 else
179 p = "None";
182 else
184 p = "OK";
186 printf("%s\t%s\n", p, FileInfo.name);
189 while (_findnext64(hFile, &FileInfo) == 0);
190 _findclose(hFile);
193 exit((Result < 0) ? 1 : 0);
196 // These 3 bytes are the BOM we want
197 char BOMbuf[3] = { 0xef, 0xbb, 0xbf };
199 // This is the XML tag we want
200 char *UTFtag = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
202 // Read this amount at start of file to check for BOM and tag
203 #define BUFSIZE 2048
205 int ProcessFile(const char *FName, const char *TName, int Action)
207 FILE *fp, *fpout;
208 char Buffer[BUFSIZE + 1024];
209 size_t Len;
210 size_t NumRead;
211 int Changed = 0, Checked = 0;
212 size_t UTFtaglen;
213 char *TagStart, *TagStop;
214 char *AfterBOM = Buffer;
216 if ((fp = fopen(FName, "r")) == NULL)
218 return -1;
221 // Check if output file exists already
222 if ((fpout = fopen(TName, "r")) != NULL) {
223 fprintf(stderr, "%s:\tTemp file already exists\n", TName);
224 fclose(fpout);
225 fclose(fp);
226 return -1;
229 while ((NumRead = fread(Buffer, 1, BUFSIZE, fp)) > 0)
231 if (!Checked)
233 Checked = 1;
234 // Check for no BOM or multiple BOM.
235 if (memcmp(BOMbuf, Buffer, 3) == 0)
237 // BOM already exists.
238 AfterBOM = Buffer + 3;
239 while (memcmp(BOMbuf, AfterBOM, 3) == 0)
241 // Multiple BOM found.
242 Changed |= DOUBLE_BOM;
243 if (Action & DOUBLE_BOM)
245 // Delete BOM from source
246 NumRead -= 3;
247 memmove(Buffer, AfterBOM, NumRead);
248 Buffer[NumRead] = '\0';
250 else
252 break;
256 else
258 // No BOM found.
259 Changed |= ADD_BOM;
260 if (Action & ADD_BOM)
262 // Add BOM to source
263 AfterBOM = Buffer + 3;
264 memmove(AfterBOM, Buffer, NumRead);
265 memcpy(Buffer, BOMbuf, 3);
266 NumRead += 3;
270 // Check for XML tag <?xml version="1.0" encoding="UTF-8"?>
271 Buffer[NumRead] = '\0'; // Add null terminator for string search.
272 UTFtaglen = strlen(UTFtag);
273 if (strstr(Buffer, "encoding=\"UTF-8\"") == NULL)
275 // No XML tag found.
276 Changed |= XML_TAG;
277 if (Action & XML_TAG)
279 TagStart = strstr(Buffer, "<?xml version");
280 if (TagStart != NULL)
282 TagStop = strstr(TagStart, "?>");
283 if (TagStop != NULL)
285 // Version tag present without UTF-8
286 Len = UTFtaglen - (TagStop - TagStart + 2);
287 if (Len != 0)
289 // Expand/contract the space
290 memmove(TagStop + Len, TagStop, NumRead - (TagStop - Buffer));
291 NumRead += Len;
293 memcpy(TagStart, UTFtag, UTFtaglen);
295 else
297 // Version tag is not terminated. Cannot fix.
298 Action &= ~XML_TAG;
301 else
303 // No version tag found. Add one after BOM, with newline.
304 memmove(AfterBOM + UTFtaglen + 1, AfterBOM, NumRead);
305 memcpy(AfterBOM, UTFtag, UTFtaglen);
306 AfterBOM[UTFtaglen] = '\n';
307 NumRead += UTFtaglen + 1;
312 if (!(Action & Changed))
314 // If no problems marked for fixing, leave it here.
315 break;
317 // Changes made - open a temp file for the BOM'ed version
318 if ((fpout = fopen(TName, "w")) == NULL)
320 fprintf(stderr, "Cannot open temp file\n");
321 fclose(fp);
322 return -1;
325 if (fwrite(Buffer, 1, NumRead, fpout) != NumRead)
327 fprintf(stderr, "Error writing to temp file\n");
328 fclose(fpout);
329 fclose(fp);
330 return -1;
334 fclose(fp);
336 // If changes have been made, replace original file with temp file.
337 if (Changed & Action)
339 // Replace original with temp file
340 if (fpout)
342 fclose(fpout);
344 if (remove(FName) != 0)
346 fprintf(stderr, "Cannot delete original file\n");
347 return -1;
349 if (rename(TName, FName) != 0)
351 fprintf(stderr, "Cannot replace original file with fixed version\n");
352 return -1;
354 // Add flags to indicate what we have actually fixed
355 if (Changed & Action & (DOUBLE_BOM | ADD_BOM))
357 Changed |= FIXED_BOM;
359 if (Changed & Action & XML_TAG)
361 Changed |= FIXED_TAG;
365 return Changed;