1
/******************************************************************************
4 Copyright (C) 2002 - 2006, 2013 Simon Large
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 This program checks text files for the presence of a byte-order-mark (BOM)
22 and for a UTF-8 encoding indicator in the XML version tag. You can also
23 opt to add either or both of these features.
26 MakeUTF8 [ -b ] [ -x ] file [ file ... ]
27 Wildcard filenames are supported. Subdirectory recursion is not at present.
28 -b option adds/corrects BOM in file if not already present.
29 -x option adds/corrects XML tag if not already present.
30 With no options, the current stateis reported but nothing is changed.
33 MakeUTF8 -b *.xml tsvn_dug\*.xml
34 Fixes BOMs (but not XML tags) in all .xml files in the current directory,
35 and in the tsvn_dug subdirectory.
37 This program has only been built using the Microsoft Visual C++ compiler.
38 Library calls for finding files (_findfirst64) will probably need to be
39 changed in other environments.
41 No special compiler options were used. CL MakeUTF8.c works OK.
42 ******************************************************************************/
44 #ifndef _CRT_SECURE_NO_WARNINGS
45 #define _CRT_SECURE_NO_WARNINGS 1
53 // Status flags returned from the file processor.
54 #define ADD_BOM 1 // BOM is missing
55 #define DOUBLE_BOM 2 // Double BOM found
56 #define XML_TAG 4 // XML tag missing, or UTF-8 not included
57 #define FIXED_BOM 64 // BOM has been added or fixed
58 #define FIXED_TAG 128 // XML tag has been added or fixed
61 "MakeUTF8 Version 1.1\n"
62 "Add UTF-8 byte-order-mark and XML-tag to start of text file.\n\n"
63 "Use: MakeUTF8 [ -b ] [ -x ] file [ file ... ]\n"
64 " -b option adds/corrects BOM in file if not already present\n"
65 " -x option adds/corrects XML tag if not already present\n"
66 " With no options, just report current state\n\n";
68 int ProcessFile(const char *FName
, const char *TName
, int Action
);
70 int main(int argc
, char *argv
[])
72 int n
, Action
= 0, Result
= 0;
73 char Path
[_MAX_PATH
], Temp
[_MAX_PATH
];
75 struct __finddata64_t FileInfo
;
80 fprintf(stderr
, "%s", help
);
84 for (n
= 1; n
< argc
; n
++)
86 if (_stricmp(argv
[n
], "-b") == 0)
88 Action
|= ADD_BOM
| DOUBLE_BOM
;
91 if (_stricmp(argv
[n
], "-x") == 0)
96 // Unscramble wildcard filenames
97 if ((hFile
= _findfirst64(argv
[n
], &FileInfo
)) != -1)
99 printf("BOM\tXML-tag\tFile\n");
100 printf("--------------------\n");
101 // Extract path from original argument.
102 strcpy(Path
, argv
[n
]);
103 // Set FName to point to filename portion of path
104 FName
= strrchr(Path
, '\\');
107 FName
= strrchr(Path
, '/');
111 FName
= strrchr(Path
, ':');
122 // Process all matching files.
125 if (!(FileInfo
.attrib
& _A_SUBDIR
))
127 // Append filename to path
129 strcpy(FName
, FileInfo
.name
);
130 // Create temp filename by replacing extension with $$$
132 p
= strrchr(Temp
, '.');
135 *p
= '\0'; // Trim off extension
137 strcat(Temp
, ".$$$");
138 Result
= ProcessFile(Path
, Temp
, Action
);
143 // Show results of analysis / repair
144 if (Result
& ADD_BOM
)
146 if (Result
& FIXED_BOM
)
155 else if (Result
& DOUBLE_BOM
)
157 if (Result
& FIXED_BOM
)
171 if (Result
& XML_TAG
)
173 if (Result
& FIXED_TAG
)
186 printf("%s\t%s\n", p
, FileInfo
.name
);
189 while (_findnext64(hFile
, &FileInfo
) == 0);
193 exit((Result
< 0) ? 1 : 0);
196 // These 3 bytes are the BOM we want
197 char BOMbuf
[3] = { 0xef, 0xbb, 0xbf };
199 // This is the XML tag we want
200 char *UTFtag
= "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
202 // Read this amount at start of file to check for BOM and tag
205 int ProcessFile(const char *FName
, const char *TName
, int Action
)
208 char Buffer
[BUFSIZE
+ 1024];
211 int Changed
= 0, Checked
= 0;
213 char *TagStart
, *TagStop
;
214 char *AfterBOM
= Buffer
;
216 if ((fp
= fopen(FName
, "r")) == NULL
)
221 // Check if output file exists already
222 if ((fpout
= fopen(TName
, "r")) != NULL
) {
223 fprintf(stderr
, "%s:\tTemp file already exists\n", TName
);
229 while ((NumRead
= fread(Buffer
, 1, BUFSIZE
, fp
)) > 0)
234 // Check for no BOM or multiple BOM.
235 if (memcmp(BOMbuf
, Buffer
, 3) == 0)
237 // BOM already exists.
238 AfterBOM
= Buffer
+ 3;
239 while (memcmp(BOMbuf
, AfterBOM
, 3) == 0)
241 // Multiple BOM found.
242 Changed
|= DOUBLE_BOM
;
243 if (Action
& DOUBLE_BOM
)
245 // Delete BOM from source
247 memmove(Buffer
, AfterBOM
, NumRead
);
248 Buffer
[NumRead
] = '\0';
260 if (Action
& ADD_BOM
)
263 AfterBOM
= Buffer
+ 3;
264 memmove(AfterBOM
, Buffer
, NumRead
);
265 memcpy(Buffer
, BOMbuf
, 3);
270 // Check for XML tag <?xml version="1.0" encoding="UTF-8"?>
271 Buffer
[NumRead
] = '\0'; // Add null terminator for string search.
272 UTFtaglen
= strlen(UTFtag
);
273 if (strstr(Buffer
, "encoding=\"UTF-8\"") == NULL
)
277 if (Action
& XML_TAG
)
279 TagStart
= strstr(Buffer
, "<?xml version");
280 if (TagStart
!= NULL
)
282 TagStop
= strstr(TagStart
, "?>");
285 // Version tag present without UTF-8
286 Len
= UTFtaglen
- (TagStop
- TagStart
+ 2);
289 // Expand/contract the space
290 memmove(TagStop
+ Len
, TagStop
, NumRead
- (TagStop
- Buffer
));
293 memcpy(TagStart
, UTFtag
, UTFtaglen
);
297 // Version tag is not terminated. Cannot fix.
303 // No version tag found. Add one after BOM, with newline.
304 memmove(AfterBOM
+ UTFtaglen
+ 1, AfterBOM
, NumRead
);
305 memcpy(AfterBOM
, UTFtag
, UTFtaglen
);
306 AfterBOM
[UTFtaglen
] = '\n';
307 NumRead
+= UTFtaglen
+ 1;
312 if (!(Action
& Changed
))
314 // If no problems marked for fixing, leave it here.
317 // Changes made - open a temp file for the BOM'ed version
318 if ((fpout
= fopen(TName
, "w")) == NULL
)
320 fprintf(stderr
, "Cannot open temp file\n");
325 if (fwrite(Buffer
, 1, NumRead
, fpout
) != NumRead
)
327 fprintf(stderr
, "Error writing to temp file\n");
336 // If changes have been made, replace original file with temp file.
337 if (Changed
& Action
)
339 // Replace original with temp file
344 if (remove(FName
) != 0)
346 fprintf(stderr
, "Cannot delete original file\n");
349 if (rename(TName
, FName
) != 0)
351 fprintf(stderr
, "Cannot replace original file with fixed version\n");
354 // Add flags to indicate what we have actually fixed
355 if (Changed
& Action
& (DOUBLE_BOM
| ADD_BOM
))
357 Changed
|= FIXED_BOM
;
359 if (Changed
& Action
& XML_TAG
)
361 Changed
|= FIXED_TAG
;