ChangedDlg: move buttons to the right
[TortoiseGit.git] / contrib / Utils / MakeUTF8.c
blob46879ca501538b6e928e925c55a629740bc6e566
1 /******************************************************************************
2 MakeUTF8.c
4 Copyright (C) 2002 - 2006 Simon Large
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 Description:
21 This program checks text files for the presence of a byte-order-mark (BOM)
22 and for a UTF-8 encoding indicator in the XML version tag. You can also
23 opt to add either or both of these features.
25 Use:
26 MakeUTF8 [ -b ] [ -x ] file [ file ... ]
27 Wildcard filenames are supported. Subdirectory recursion is not at present.
28 -b option adds/corrects BOM in file if not already present.
29 -x option adds/corrects XML tag if not already present.
30 With no options, the current stateis reported but nothing is changed.
32 Example:
33 MakeUTF8 -b *.xml tsvn_dug\*.xml
34 Fixes BOMs (but not XML tags) in all .xml files in the current directory,
35 and in the tsvn_dug subdirectory.
37 This program has only been built using the Microsoft Visual C++ compiler.
38 Library calls for finding files (_findfirst64) will probably need to be
39 changed in other environments.
41 No special compiler options were used. CL MakeUTF8.c works OK.
42 ******************************************************************************/
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <io.h>
49 // Status flags returned from the file processor.
50 #define ADD_BOM 1 // BOM is missing
51 #define DOUBLE_BOM 2 // Double BOM found
52 #define XML_TAG 4 // XML tag missing, or UTF-8 not included
53 #define FIXED_BOM 64 // BOM has been added or fixed
54 #define FIXED_TAG 128 // XML tag has been added or fixed
56 char *help =
57 "MakeUTF8 Version 1.1\n"
58 "Add UTF-8 byte-order-mark and XML-tag to start of text file.\n\n"
59 "Use: MakeUTF8 [ -b ] [ -x ] file [ file ... ]\n"
60 " -b option adds/corrects BOM in file if not already present\n"
61 " -x option adds/corrects XML tag if not already present\n"
62 " With no options, just report current state\n\n";
64 int ProcessFile(const char *FName, const char *TName, int Action);
66 main(int argc, char *argv[])
68 int n, Action = 0, Result = 0;
69 char Path[_MAX_PATH], Temp[_MAX_PATH];
70 char *FName;
71 struct __finddata64_t FileInfo;
72 intptr_t hFile;
74 if (argc < 2)
76 fprintf(stderr, "%s", help);
77 exit(0);
80 for (n = 1; n < argc; n++)
82 if (stricmp(argv[n], "-b") == 0)
84 Action |= ADD_BOM | DOUBLE_BOM;
85 continue;
87 if (stricmp(argv[n], "-x") == 0)
89 Action |= XML_TAG;
90 continue;
92 // Unscramble wildcard filenames
93 if ((hFile = _findfirst64(argv[n], &FileInfo)) != -1)
95 printf("BOM\tXML-tag\tFile\n");
96 printf("--------------------\n");
97 // Extract path from original argument.
98 strcpy(Path, argv[n]);
99 // Set FName to point to filename portion of path
100 FName = strrchr(Path, '\\');
101 if (FName == NULL) FName = strrchr(Path, '/');
102 if (FName == NULL) FName = strrchr(Path, ':');
103 if (FName == NULL) FName = Path;
104 else ++FName;
106 // Process all matching files.
109 if (!(FileInfo.attrib & _A_SUBDIR))
111 // Append filename to path
112 char *p;
113 strcpy(FName, FileInfo.name);
114 // Create temp filename by replacing extension with $$$
115 strcpy(Temp, Path);
116 p = strrchr(Temp, '.');
117 if (p != NULL) *p = '\0'; // Trim off extension
118 strcat(Temp, ".$$$");
119 Result = ProcessFile(Path, Temp, Action);
120 if (Result < 0) break; // Failed.
121 // Show results of analysis / repair
122 if (Result & ADD_BOM)
124 if (Result & FIXED_BOM)
125 p = "Added";
126 else
127 p = "None";
129 else if (Result & DOUBLE_BOM)
131 if (Result & FIXED_BOM)
132 p = "Fixed";
133 else
134 p = "Multi";
136 else
137 p = "OK";
138 printf("%s\t", p);
139 if (Result & XML_TAG)
141 if (Result & FIXED_TAG)
142 p = "Fixed";
143 else
144 p = "None";
146 else
147 p = "OK";
148 printf("%s\t%s\n", p, FileInfo.name);
151 while (_findnext64(hFile, &FileInfo) == 0);
152 _findclose(hFile);
155 exit((Result < 0) ? 1 : 0);
158 // These 3 bytes are the BOM we want
159 char BOMbuf[3] = { 0xef, 0xbb, 0xbf };
161 // This is the XML tag we want
162 char *UTFtag = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
164 // Read this amount at start of file to check for BOM and tag
165 #define BUFSIZE 2048
167 int ProcessFile(const char *FName, const char *TName, int Action)
169 FILE *fp, *fpout;
170 char Buffer[BUFSIZE + 1024];
171 int Len;
172 size_t NumRead;
173 int Changed = 0, Checked = 0;
174 int UTFtaglen;
175 char *TagStart, *TagStop;
176 char *AfterBOM = Buffer;
178 if ((fp = fopen(FName, "r")) == NULL)
179 return -1;
181 // Check if output file exists already
182 if ((fpout = fopen(TName, "r")) != NULL) {
183 fprintf(stderr, "%s:\tTemp file already exists\n", TName);
184 fclose(fpout);
185 fclose(fp);
186 return -1;
189 while ((NumRead = fread(Buffer, 1, BUFSIZE, fp)) > 0)
191 if (!Checked)
193 Checked = 1;
194 // Check for no BOM or multiple BOM.
195 if (memcmp(BOMbuf, Buffer, 3) == 0)
197 // BOM already exists.
198 AfterBOM = Buffer + 3;
199 while (memcmp(BOMbuf, AfterBOM, 3) == 0)
201 // Multiple BOM found.
202 Changed |= DOUBLE_BOM;
203 if (Action & DOUBLE_BOM)
205 // Delete BOM from source
206 NumRead -= 3;
207 memmove(Buffer, AfterBOM, NumRead);
208 Buffer[NumRead] = '\0';
210 else
211 break;
214 else
216 // No BOM found.
217 Changed |= ADD_BOM;
218 if (Action & ADD_BOM)
220 // Add BOM to source
221 AfterBOM = Buffer + 3;
222 memmove(AfterBOM, Buffer, NumRead);
223 memcpy(Buffer, BOMbuf, 3);
224 NumRead += 3;
228 // Check for XML tag <?xml version="1.0" encoding="UTF-8"?>
229 Buffer[NumRead] = '\0'; // Add null terminator for string search.
230 UTFtaglen = strlen(UTFtag);
231 if (strstr(Buffer, "encoding=\"UTF-8\"") == NULL)
233 // No XML tag found.
234 Changed |= XML_TAG;
235 if (Action & XML_TAG)
237 TagStart = strstr(Buffer, "<?xml version");
238 if (TagStart != NULL)
240 TagStop = strstr(TagStart, "?>");
241 if (TagStop != NULL)
243 // Version tag present without UTF-8
244 Len = UTFtaglen - (TagStop - TagStart + 2);
245 if (Len != 0)
247 // Expand/contract the space
248 memmove(TagStop + Len, TagStop, NumRead - (TagStop - Buffer));
249 NumRead += Len;
251 memcpy(TagStart, UTFtag, UTFtaglen);
253 else
255 // Version tag is not terminated. Cannot fix.
256 Action &= ~XML_TAG;
259 else
261 // No version tag found. Add one after BOM, with newline.
262 memmove(AfterBOM + UTFtaglen + 1, AfterBOM, NumRead);
263 memcpy(AfterBOM, UTFtag, UTFtaglen);
264 AfterBOM[UTFtaglen] = '\n';
265 NumRead += UTFtaglen + 1;
270 if (!(Action & Changed))
272 // If no problems marked for fixing, leave it here.
273 break;
275 // Changes made - open a temp file for the BOM'ed version
276 if ((fpout = fopen(TName, "w")) == NULL)
278 fprintf(stderr, "Cannot open temp file\n");
279 fclose(fp);
280 return -1;
283 if (fwrite(Buffer, 1, NumRead, fpout) != NumRead)
285 fprintf(stderr, "Error writing to temp file\n");
286 fclose(fpout);
287 fclose(fp);
288 return -1;
292 fclose(fp);
294 // If changes have been made, replace original file with temp file.
295 if (Changed & Action)
297 // Replace original with temp file
298 fclose(fpout);
299 if (remove(FName) != 0)
301 fprintf(stderr, "Cannot delete original file\n");
302 return -1;
304 if (rename(TName, FName) != 0)
306 fprintf(stderr, "Cannot replace original file with fixed version\n");
307 return -1;
309 // Add flags to indicate what we have actually fixed
310 if (Changed & Action & (DOUBLE_BOM | ADD_BOM))
311 Changed |= FIXED_BOM;
312 if (Changed & Action & XML_TAG)
313 Changed |= FIXED_TAG;
316 return Changed;