3065 some functions in the tcp module can be static
[unleashed.git] / usr / src / cmd / man / src / util / nsgmls.src / lib / UnicodeCodingSystem.cxx
blob39c78cf599fabf3fb2e55efb4b666ac9e6766ea1
1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident "%Z%%M% %I% %E% SMI"
5 #include "splib.h"
7 #ifdef SP_MULTI_BYTE
9 #include "UnicodeCodingSystem.h"
10 #include "macros.h"
11 #include "Owner.h"
13 #include <stddef.h>
14 #include <string.h>
15 #ifdef DECLARE_MEMMOVE
16 extern "C" {
17 void *memmove(void *, const void *, size_t);
19 #endif
21 #ifdef SP_NAMESPACE
22 namespace SP_NAMESPACE {
23 #endif
25 const unsigned short byteOrderMark = 0xfeff;
26 const unsigned short swappedByteOrderMark = 0xfffe;
28 class UnicodeDecoder : public Decoder {
29 public:
30 UnicodeDecoder(const InputCodingSystem *sub);
31 size_t decode(Char *to, const char *from, size_t fromLen,
32 const char **rest);
33 Boolean convertOffset(unsigned long &offset) const;
34 private:
35 PackedBoolean hadFirstChar_;
36 PackedBoolean hadByteOrderMark_;
37 PackedBoolean swapBytes_;
38 Owner<Decoder> subDecoder_;
39 const InputCodingSystem *subCodingSystem_;
42 class UnicodeEncoder : public Encoder {
43 public:
44 UnicodeEncoder();
45 ~UnicodeEncoder();
46 void output(Char *, size_t, OutputByteStream *);
47 void output(const Char *, size_t, OutputByteStream *);
48 void startFile(OutputByteStream *);
49 private:
50 void allocBuf(size_t);
51 unsigned short *buf_;
52 size_t bufSize_;
55 UnicodeCodingSystem::UnicodeCodingSystem(const InputCodingSystem *sub)
56 : sub_(sub)
60 Decoder *UnicodeCodingSystem::makeDecoder() const
62 return new UnicodeDecoder(sub_);
65 Encoder *UnicodeCodingSystem::makeEncoder() const
67 return new UnicodeEncoder;
70 unsigned UnicodeCodingSystem::fixedBytesPerChar() const
72 return 2;
75 UnicodeDecoder::UnicodeDecoder(const InputCodingSystem *subCodingSystem)
76 : Decoder(subCodingSystem ? 1 : 2), subCodingSystem_(subCodingSystem),
77 hadByteOrderMark_(0), hadFirstChar_(0), swapBytes_(0)
82 size_t UnicodeDecoder::decode(Char *to, const char *from, size_t fromLen,
83 const char **rest)
85 union U {
86 unsigned short word;
87 char bytes[2];
90 if (subDecoder_)
91 return subDecoder_->decode(to, from, fromLen, rest);
92 if (!hadFirstChar_) {
93 if (fromLen < 2) {
94 *rest = from;
95 return 0;
97 hadFirstChar_ = 1;
98 minBytesPerChar_ = 2;
99 U u;
100 u.bytes[0] = from[0];
101 u.bytes[1] = from[1];
102 if (u.word == byteOrderMark) {
103 hadByteOrderMark_ = 1;
104 from += 2;
105 fromLen -= 2;
107 else if (u.word == swappedByteOrderMark) {
108 hadByteOrderMark_ = 1;
109 from += 2;
110 fromLen -= 2;
111 swapBytes_ = 1;
113 else if (subCodingSystem_) {
114 subDecoder_ = subCodingSystem_->makeDecoder();
115 minBytesPerChar_ = subDecoder_->minBytesPerChar();
116 return subDecoder_->decode(to, from, fromLen, rest);
119 fromLen &= ~1;
120 *rest = from + fromLen;
121 if (sizeof(Char) == 2) {
122 if (!swapBytes_) {
123 if (from != (char *)to)
124 memmove(to, from, fromLen);
125 return fromLen/2;
128 if (swapBytes_) {
129 for (size_t n = fromLen; n > 0; n -= 2) {
130 U u;
131 u.bytes[1] = *from++;
132 u.bytes[0] = *from++;
133 *to++ = u.word;
136 else {
137 for (size_t n = fromLen; n > 0; n -= 2) {
138 U u;
139 u.bytes[0] = *from++;
140 u.bytes[1] = *from++;
141 *to++ = u.word;
144 return fromLen/2;
147 Boolean UnicodeDecoder::convertOffset(unsigned long &n) const
149 if (subDecoder_)
150 return subDecoder_->convertOffset(n);
151 if (hadByteOrderMark_)
152 n += 1;
153 n *= 2;
154 return true;
157 UnicodeEncoder::UnicodeEncoder()
158 : buf_(0), bufSize_(0)
162 UnicodeEncoder::~UnicodeEncoder()
164 delete [] buf_;
167 void UnicodeEncoder::allocBuf(size_t n)
169 if (bufSize_ < n) {
170 delete [] buf_;
171 buf_ = new unsigned short[bufSize_ = n];
175 void UnicodeEncoder::startFile(OutputByteStream *sb)
177 const unsigned short n = byteOrderMark;
178 sb->sputn((char *)&n, 2);
181 void UnicodeEncoder::output(Char *s, size_t n, OutputByteStream *sb)
183 if (sizeof(Char) == 2) {
184 sb->sputn((char *)s, n*2);
185 return;
187 ASSERT(sizeof(Char) >= 2);
188 unsigned short *p = (unsigned short *)s;
189 for (size_t i = 0; i < n; i++)
190 p[i] = s[i] & 0xffff;
191 sb->sputn((char *)s, n*2);
194 void UnicodeEncoder::output(const Char *s, size_t n, OutputByteStream *sb)
196 if (sizeof(Char) == 2) {
197 sb->sputn((char *)s, n*2);
198 return;
200 allocBuf(n);
201 for (size_t i = 0; i < n; i++)
202 buf_[i] = s[i] & 0xffff;
203 sb->sputn((char *)buf_, n*2);
206 #ifdef SP_NAMESPACE
208 #endif
210 #else /* not SP_MULTI_BYTE */
212 #ifndef __GNUG__
213 static char non_empty_translation_unit; // sigh
214 #endif
216 #endif /* not SP_MULTI_BYTE */