1
/******************************************************************************
4 Copyright (C) 2002 - 2006 Simon Large
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 This program checks text files for the presence of a byte-order-mark (BOM)
22 and for a UTF-8 encoding indicator in the XML version tag. You can also
23 opt to add either or both of these features.
26 MakeUTF8 [ -b ] [ -x ] file [ file ... ]
27 Wildcard filenames are supported. Subdirectory recursion is not at present.
28 -b option adds/corrects BOM in file if not already present.
29 -x option adds/corrects XML tag if not already present.
30 With no options, the current stateis reported but nothing is changed.
33 MakeUTF8 -b *.xml tsvn_dug\*.xml
34 Fixes BOMs (but not XML tags) in all .xml files in the current directory,
35 and in the tsvn_dug subdirectory.
37 This program has only been built using the Microsoft Visual C++ compiler.
38 Library calls for finding files (_findfirst64) will probably need to be
39 changed in other environments.
41 No special compiler options were used. CL MakeUTF8.c works OK.
42 ******************************************************************************/
49 // Status flags returned from the file processor.
50 #define ADD_BOM 1 // BOM is missing
51 #define DOUBLE_BOM 2 // Double BOM found
52 #define XML_TAG 4 // XML tag missing, or UTF-8 not included
53 #define FIXED_BOM 64 // BOM has been added or fixed
54 #define FIXED_TAG 128 // XML tag has been added or fixed
57 "MakeUTF8 Version 1.1\n"
58 "Add UTF-8 byte-order-mark and XML-tag to start of text file.\n\n"
59 "Use: MakeUTF8 [ -b ] [ -x ] file [ file ... ]\n"
60 " -b option adds/corrects BOM in file if not already present\n"
61 " -x option adds/corrects XML tag if not already present\n"
62 " With no options, just report current state\n\n";
64 int ProcessFile(const char *FName
, const char *TName
, int Action
);
66 main(int argc
, char *argv
[])
68 int n
, Action
= 0, Result
= 0;
69 char Path
[_MAX_PATH
], Temp
[_MAX_PATH
];
71 struct __finddata64_t FileInfo
;
76 fprintf(stderr
, "%s", help
);
80 for (n
= 1; n
< argc
; n
++)
82 if (stricmp(argv
[n
], "-b") == 0)
84 Action
|= ADD_BOM
| DOUBLE_BOM
;
87 if (stricmp(argv
[n
], "-x") == 0)
92 // Unscramble wildcard filenames
93 if ((hFile
= _findfirst64(argv
[n
], &FileInfo
)) != -1)
95 printf("BOM\tXML-tag\tFile\n");
96 printf("--------------------\n");
97 // Extract path from original argument.
98 strcpy(Path
, argv
[n
]);
99 // Set FName to point to filename portion of path
100 FName
= strrchr(Path
, '\\');
101 if (FName
== NULL
) FName
= strrchr(Path
, '/');
102 if (FName
== NULL
) FName
= strrchr(Path
, ':');
103 if (FName
== NULL
) FName
= Path
;
106 // Process all matching files.
109 if (!(FileInfo
.attrib
& _A_SUBDIR
))
111 // Append filename to path
113 strcpy(FName
, FileInfo
.name
);
114 // Create temp filename by replacing extension with $$$
116 p
= strrchr(Temp
, '.');
117 if (p
!= NULL
) *p
= '\0'; // Trim off extension
118 strcat(Temp
, ".$$$");
119 Result
= ProcessFile(Path
, Temp
, Action
);
120 if (Result
< 0) break; // Failed.
121 // Show results of analysis / repair
122 if (Result
& ADD_BOM
)
124 if (Result
& FIXED_BOM
)
129 else if (Result
& DOUBLE_BOM
)
131 if (Result
& FIXED_BOM
)
139 if (Result
& XML_TAG
)
141 if (Result
& FIXED_TAG
)
148 printf("%s\t%s\n", p
, FileInfo
.name
);
151 while (_findnext64(hFile
, &FileInfo
) == 0);
155 exit((Result
< 0) ? 1 : 0);
158 // These 3 bytes are the BOM we want
159 char BOMbuf
[3] = { 0xef, 0xbb, 0xbf };
161 // This is the XML tag we want
162 char *UTFtag
= "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
164 // Read this amount at start of file to check for BOM and tag
167 int ProcessFile(const char *FName
, const char *TName
, int Action
)
170 char Buffer
[BUFSIZE
+ 1024];
173 int Changed
= 0, Checked
= 0;
175 char *TagStart
, *TagStop
;
176 char *AfterBOM
= Buffer
;
178 if ((fp
= fopen(FName
, "r")) == NULL
)
181 // Check if output file exists already
182 if ((fpout
= fopen(TName
, "r")) != NULL
) {
183 fprintf(stderr
, "%s:\tTemp file already exists\n", TName
);
189 while ((NumRead
= fread(Buffer
, 1, BUFSIZE
, fp
)) > 0)
194 // Check for no BOM or multiple BOM.
195 if (memcmp(BOMbuf
, Buffer
, 3) == 0)
197 // BOM already exists.
198 AfterBOM
= Buffer
+ 3;
199 while (memcmp(BOMbuf
, AfterBOM
, 3) == 0)
201 // Multiple BOM found.
202 Changed
|= DOUBLE_BOM
;
203 if (Action
& DOUBLE_BOM
)
205 // Delete BOM from source
207 memmove(Buffer
, AfterBOM
, NumRead
);
208 Buffer
[NumRead
] = '\0';
218 if (Action
& ADD_BOM
)
221 AfterBOM
= Buffer
+ 3;
222 memmove(AfterBOM
, Buffer
, NumRead
);
223 memcpy(Buffer
, BOMbuf
, 3);
228 // Check for XML tag <?xml version="1.0" encoding="UTF-8"?>
229 Buffer
[NumRead
] = '\0'; // Add null terminator for string search.
230 UTFtaglen
= strlen(UTFtag
);
231 if (strstr(Buffer
, "encoding=\"UTF-8\"") == NULL
)
235 if (Action
& XML_TAG
)
237 TagStart
= strstr(Buffer
, "<?xml version");
238 if (TagStart
!= NULL
)
240 TagStop
= strstr(TagStart
, "?>");
243 // Version tag present without UTF-8
244 Len
= UTFtaglen
- (TagStop
- TagStart
+ 2);
247 // Expand/contract the space
248 memmove(TagStop
+ Len
, TagStop
, NumRead
- (TagStop
- Buffer
));
251 memcpy(TagStart
, UTFtag
, UTFtaglen
);
255 // Version tag is not terminated. Cannot fix.
261 // No version tag found. Add one after BOM, with newline.
262 memmove(AfterBOM
+ UTFtaglen
+ 1, AfterBOM
, NumRead
);
263 memcpy(AfterBOM
, UTFtag
, UTFtaglen
);
264 AfterBOM
[UTFtaglen
] = '\n';
265 NumRead
+= UTFtaglen
+ 1;
270 if (!(Action
& Changed
))
272 // If no problems marked for fixing, leave it here.
275 // Changes made - open a temp file for the BOM'ed version
276 if ((fpout
= fopen(TName
, "w")) == NULL
)
278 fprintf(stderr
, "Cannot open temp file\n");
283 if (fwrite(Buffer
, 1, NumRead
, fpout
) != NumRead
)
285 fprintf(stderr
, "Error writing to temp file\n");
294 // If changes have been made, replace original file with temp file.
295 if (Changed
& Action
)
297 // Replace original with temp file
299 if (remove(FName
) != 0)
301 fprintf(stderr
, "Cannot delete original file\n");
304 if (rename(TName
, FName
) != 0)
306 fprintf(stderr
, "Cannot replace original file with fixed version\n");
309 // Add flags to indicate what we have actually fixed
310 if (Changed
& Action
& (DOUBLE_BOM
| ADD_BOM
))
311 Changed
|= FIXED_BOM
;
312 if (Changed
& Action
& XML_TAG
)
313 Changed
|= FIXED_TAG
;