1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident "%Z%%M% %I% %E% SMI"
9 #include "UnicodeCodingSystem.h"
15 #ifdef DECLARE_MEMMOVE
17 void *memmove(void *, const void *, size_t);
22 namespace SP_NAMESPACE
{
25 const unsigned short byteOrderMark
= 0xfeff;
26 const unsigned short swappedByteOrderMark
= 0xfffe;
28 class UnicodeDecoder
: public Decoder
{
30 UnicodeDecoder(const InputCodingSystem
*sub
);
31 size_t decode(Char
*to
, const char *from
, size_t fromLen
,
33 Boolean
convertOffset(unsigned long &offset
) const;
35 PackedBoolean hadFirstChar_
;
36 PackedBoolean hadByteOrderMark_
;
37 PackedBoolean swapBytes_
;
38 Owner
<Decoder
> subDecoder_
;
39 const InputCodingSystem
*subCodingSystem_
;
42 class UnicodeEncoder
: public Encoder
{
46 void output(Char
*, size_t, OutputByteStream
*);
47 void output(const Char
*, size_t, OutputByteStream
*);
48 void startFile(OutputByteStream
*);
50 void allocBuf(size_t);
55 UnicodeCodingSystem::UnicodeCodingSystem(const InputCodingSystem
*sub
)
60 Decoder
*UnicodeCodingSystem::makeDecoder() const
62 return new UnicodeDecoder(sub_
);
65 Encoder
*UnicodeCodingSystem::makeEncoder() const
67 return new UnicodeEncoder
;
70 unsigned UnicodeCodingSystem::fixedBytesPerChar() const
75 UnicodeDecoder::UnicodeDecoder(const InputCodingSystem
*subCodingSystem
)
76 : Decoder(subCodingSystem
? 1 : 2), subCodingSystem_(subCodingSystem
),
77 hadByteOrderMark_(0), hadFirstChar_(0), swapBytes_(0)
82 size_t UnicodeDecoder::decode(Char
*to
, const char *from
, size_t fromLen
,
91 return subDecoder_
->decode(to
, from
, fromLen
, rest
);
100 u
.bytes
[0] = from
[0];
101 u
.bytes
[1] = from
[1];
102 if (u
.word
== byteOrderMark
) {
103 hadByteOrderMark_
= 1;
107 else if (u
.word
== swappedByteOrderMark
) {
108 hadByteOrderMark_
= 1;
113 else if (subCodingSystem_
) {
114 subDecoder_
= subCodingSystem_
->makeDecoder();
115 minBytesPerChar_
= subDecoder_
->minBytesPerChar();
116 return subDecoder_
->decode(to
, from
, fromLen
, rest
);
120 *rest
= from
+ fromLen
;
121 if (sizeof(Char
) == 2) {
123 if (from
!= (char *)to
)
124 memmove(to
, from
, fromLen
);
129 for (size_t n
= fromLen
; n
> 0; n
-= 2) {
131 u
.bytes
[1] = *from
++;
132 u
.bytes
[0] = *from
++;
137 for (size_t n
= fromLen
; n
> 0; n
-= 2) {
139 u
.bytes
[0] = *from
++;
140 u
.bytes
[1] = *from
++;
147 Boolean
UnicodeDecoder::convertOffset(unsigned long &n
) const
150 return subDecoder_
->convertOffset(n
);
151 if (hadByteOrderMark_
)
157 UnicodeEncoder::UnicodeEncoder()
158 : buf_(0), bufSize_(0)
162 UnicodeEncoder::~UnicodeEncoder()
167 void UnicodeEncoder::allocBuf(size_t n
)
171 buf_
= new unsigned short[bufSize_
= n
];
175 void UnicodeEncoder::startFile(OutputByteStream
*sb
)
177 const unsigned short n
= byteOrderMark
;
178 sb
->sputn((char *)&n
, 2);
181 void UnicodeEncoder::output(Char
*s
, size_t n
, OutputByteStream
*sb
)
183 if (sizeof(Char
) == 2) {
184 sb
->sputn((char *)s
, n
*2);
187 ASSERT(sizeof(Char
) >= 2);
188 unsigned short *p
= (unsigned short *)s
;
189 for (size_t i
= 0; i
< n
; i
++)
190 p
[i
] = s
[i
] & 0xffff;
191 sb
->sputn((char *)s
, n
*2);
194 void UnicodeEncoder::output(const Char
*s
, size_t n
, OutputByteStream
*sb
)
196 if (sizeof(Char
) == 2) {
197 sb
->sputn((char *)s
, n
*2);
201 for (size_t i
= 0; i
< n
; i
++)
202 buf_
[i
] = s
[i
] & 0xffff;
203 sb
->sputn((char *)buf_
, n
*2);
210 #else /* not SP_MULTI_BYTE */
213 static char non_empty_translation_unit
; // sigh
216 #endif /* not SP_MULTI_BYTE */