1 // Scintilla source code edit control
2 /** @file UniConversion.h
3 ** Functions to handle UTF-8 and UTF-16 strings.
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #ifndef UNICONVERSION_H
9 #define UNICONVERSION_H
15 const int UTF8MaxBytes
= 4;
17 const int unicodeReplacementChar
= 0xFFFD;
19 unsigned int UTF8Length(const wchar_t *uptr
, unsigned int tlen
);
20 void UTF8FromUTF16(const wchar_t *uptr
, unsigned int tlen
, char *putf
, unsigned int len
);
21 unsigned int UTF8CharLength(unsigned char ch
);
22 size_t UTF16Length(const char *s
, size_t len
);
23 size_t UTF16FromUTF8(const char *s
, size_t len
, wchar_t *tbuf
, size_t tlen
);
24 unsigned int UTF32FromUTF8(const char *s
, unsigned int len
, unsigned int *tbuf
, unsigned int tlen
);
25 unsigned int UTF16FromUTF32Character(unsigned int val
, wchar_t *tbuf
);
27 extern int UTF8BytesOfLead
[256];
28 void UTF8BytesOfLeadInitialise();
30 inline bool UTF8IsTrailByte(int ch
) {
31 return (ch
>= 0x80) && (ch
< 0xc0);
34 inline bool UTF8IsAscii(int ch
) {
38 enum { UTF8MaskWidth
=0x7, UTF8MaskInvalid
=0x8 };
39 int UTF8Classify(const unsigned char *us
, int len
);
41 // Similar to UTF8Classify but returns a length of 1 for invalid bytes
42 // instead of setting the invalid flag
43 int UTF8DrawBytes(const unsigned char *us
, int len
);
45 // Line separator is U+2028 \xe2\x80\xa8
46 // Paragraph separator is U+2029 \xe2\x80\xa9
47 const int UTF8SeparatorLength
= 3;
48 inline bool UTF8IsSeparator(const unsigned char *us
) {
49 return (us
[0] == 0xe2) && (us
[1] == 0x80) && ((us
[2] == 0xa8) || (us
[2] == 0xa9));
52 // NEL is U+0085 \xc2\x85
53 const int UTF8NELLength
= 2;
54 inline bool UTF8IsNEL(const unsigned char *us
) {
55 return (us
[0] == 0xc2) && (us
[1] == 0x85);
58 enum { SURROGATE_LEAD_FIRST
= 0xD800 };
59 enum { SURROGATE_LEAD_LAST
= 0xDBFF };
60 inline unsigned int UTF16CharLength(wchar_t uch
) {
61 return ((uch
>= SURROGATE_LEAD_FIRST
) && (uch
<= SURROGATE_LEAD_LAST
)) ? 2 : 1;