3 static const char *utf8_replace_character
= "�";
6 * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
9 int cmd__xml_encode(int argc
, const char **argv
)
11 unsigned char buf
[1024], tmp
[4], *tmp2
= NULL
;
12 ssize_t cur
= 0, len
= 1, remaining
= 0;
17 len
= xread(0, buf
, sizeof(buf
));
21 die_errno("Could not read <stdin>");
27 if ((ch
& 0xc0) != 0x80) {
28 fputs(utf8_replace_character
, stdout
);
35 if (--remaining
== 0) {
36 fwrite(tmp
, tmp2
- tmp
, 1, stdout
);
45 fputs("&", stdout
);
47 fputs("'", stdout
);
49 fputs(""", stdout
);
51 fputs("<", stdout
);
53 fputs(">", stdout
);
56 else if (ch
== 0x09 || ch
== 0x0a || ch
== 0x0d)
57 fprintf(stdout
, "&#x%02x;", ch
);
59 fputs(utf8_replace_character
, stdout
);
60 } else if ((ch
& 0xe0) == 0xc0) {
61 /* 110XXXXx 10xxxxxx */
65 } else if ((ch
& 0xf0) == 0xe0) {
66 /* 1110XXXX 10Xxxxxx 10xxxxxx */
70 } else if ((ch
& 0xf8) == 0xf0) {
71 /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
76 fputs(utf8_replace_character
, stdout
);