Import 2.1.112pre1
[davej-history.git] / fs / ntfs / util.c
blob0cd5561779aef1280bcbd906e907771579f57d29
1 /*
2 * util.c
3 * Miscellaneous support
5 * Copyright (C) 1997 Martin von Löwis
6 * Copyright (C) 1997 Régis Duchesne
8 * The utf8 routines are copied from Python wstrop module,
9 */
11 #include "types.h"
12 #include "struct.h"
13 #include "util.h"
15 #include <errno.h>
16 /* FreeBSD doesn't seem to have EILSEQ in errno.h */
17 #ifndef EILSEQ
18 # define EILSEQ EINVAL
19 #endif
20 #include "support.h"
22 /* Converts a single wide character to a sequence of utf8 bytes.
23 * Returns the number of bytes, or 0 on error.
25 static int
26 to_utf8(ntfs_u16 c,unsigned char* buf)
28 if(c==0)
29 return 0; /* No support for embedded 0 runes */
30 if(c<0x80){
31 if(buf)buf[0]=c;
32 return 1;
34 if(c<0x800){
35 if(buf){
36 buf[0] = 0xc0 | (c>>6);
37 buf[1] = 0x80 | (c & 0x3f);
39 return 2;
41 if(c<0x10000){
42 if(buf){
43 buf[0] = 0xe0 | (c>>12);
44 buf[1] = 0x80 | ((c>>6) & 0x3f);
45 buf[2] = 0x80 | (c & 0x3f);
47 return 3;
49 /* We don't support characters above 0xFFFF in NTFS */
50 return 0;
53 /* Decodes a sequence of utf8 bytes into a single wide character.
54 * Returns the number of bytes consumed, or 0 on error
56 static int
57 from_utf8(const unsigned char* str,ntfs_u16 *c)
59 int l=0,i;
61 if(*str<0x80){
62 *c = *str;
63 return 1;
65 if(*str<0xc0) /* lead byte must not be 10xxxxxx */
66 return 0; /* is c0 a possible lead byte? */
67 if(*str<0xe0){ /* 110xxxxx */
68 *c = *str & 0x1f;
69 l=2;
70 }else if(*str<0xf0){ /* 1110xxxx */
71 *c = *str & 0xf;
72 l=3;
73 }else if(*str<0xf8){ /* 11110xxx */
74 *c = *str & 7;
75 l=4;
76 }else /* We don't support characters above 0xFFFF in NTFS */
77 return 0;
80 for(i=1;i<l;i++){
81 /* all other bytes must be 10xxxxxx */
82 if((str[i] & 0xc0) != 0x80)
83 return 0;
84 *c <<= 6;
85 *c |= str[i] & 0x3f;
87 return l;
90 /* Converts wide string to UTF-8. Expects two in- and two out-parameters.
91 * Returns 0 on success, or error code.
92 * The caller has to free the result string.
93 * There is no support for UTF-16, yet
95 static int ntfs_dupuni2utf8(ntfs_u16* in, int in_len,char **out,int *out_len)
97 int i,tmp;
98 int len8;
99 unsigned char *result;
101 ntfs_debug(DEBUG_OTHER,"converting l=%d\n",in_len);
102 /* count the length of the resulting UTF-8 */
103 for(i=len8=0;i<in_len;i++){
104 tmp=to_utf8(in[i],0);
105 if(!tmp)
106 /* invalid character */
107 return EILSEQ;
108 len8+=tmp;
110 *out=result=ntfs_malloc(len8+1); /* allow for zero-termination */
112 if(!result)
113 return ENOMEM;
114 result[len8]='\0';
115 *out_len=len8;
116 for(i=len8=0;i<in_len;i++)
117 len8+=to_utf8(in[i],result+len8);
118 return 0;
121 /* Converts an UTF-8 sequence to a wide string. Same conventions as the
122 * previous function
124 static int ntfs_duputf82uni(unsigned char* in, int in_len,ntfs_u16** out,int *out_len)
126 int i,tmp;
127 int len16;
129 ntfs_u16* result;
130 ntfs_u16 wtmp;
131 for(i=len16=0;i<in_len;i+=tmp,len16++){
132 tmp=from_utf8(in+i,&wtmp);
133 if(!tmp)
134 return EILSEQ;
136 *out=result=ntfs_malloc(2*(len16+1));
137 if(!result)
138 return ENOMEM;
139 result[len16]=0;
140 *out_len=len16;
141 for(i=len16=0;i<in_len;i+=tmp,len16++)
142 tmp=from_utf8(in+i,result+len16);
143 return 0;
146 /* See above. Produces ISO-8859-1 from wide strings */
147 static int ntfs_dupuni288591(ntfs_u16* in,int in_len,char** out,int *out_len)
149 int i;
150 char *result;
152 /* check for characters out of range */
153 for(i=0;i<in_len;i++)
154 if(in[i]>=256)
155 return EILSEQ;
156 *out=result=ntfs_malloc(in_len+1);
157 if(!result)
158 return ENOMEM;
159 result[in_len]='\0';
160 *out_len=in_len;
161 for(i=0;i<in_len;i++)
162 result[i]=in[i];
163 return 0;
166 /* See above */
167 static int ntfs_dup885912uni(unsigned char* in,int in_len,ntfs_u16 **out,int *out_len)
169 int i;
171 ntfs_u16* result;
172 *out=result=ntfs_malloc(2*in_len);
173 if(!result)
174 return ENOMEM;
175 *out_len=in_len;
176 for(i=0;i<in_len;i++)
177 result[i]=in[i];
178 return 0;
181 /* Encodings dispatcher */
182 int ntfs_encodeuni(ntfs_volume *vol,ntfs_u16 *in, int in_len,
183 char **out, int *out_len)
185 if(vol->nct & nct_utf8)
186 return ntfs_dupuni2utf8(in,in_len,out,out_len);
187 else if(vol->nct & nct_iso8859_1)
188 return ntfs_dupuni288591(in,in_len,out,out_len);
189 else if(vol->nct & (nct_map|nct_uni_xlate))
190 /* uni_xlate is handled inside map */
191 return ntfs_dupuni2map(vol,in,in_len,out,out_len);
192 else
193 return EINVAL; /* unknown encoding */
196 int ntfs_decodeuni(ntfs_volume *vol,char *in, int in_len,
197 ntfs_u16 **out, int *out_len)
199 if(vol->nct & nct_utf8)
200 return ntfs_duputf82uni(in,in_len,out,out_len);
201 else if(vol->nct & nct_iso8859_1)
202 return ntfs_dup885912uni(in,in_len,out,out_len);
203 else if(vol->nct & (nct_map | nct_uni_xlate))
204 return ntfs_dupmap2uni(vol,in,in_len,out,out_len);
205 else
206 return EINVAL;
209 /* Same address space copies */
210 void ntfs_put(ntfs_io *dest,void *src,ntfs_size_t n)
212 ntfs_memcpy(dest->param,src,n);
213 dest->param+=n;
216 void ntfs_get(void* dest,ntfs_io *src,ntfs_size_t n)
218 ntfs_memcpy(dest,src->param,n);
219 src->param+=n;
222 void *ntfs_calloc(int size)
224 void *result=ntfs_malloc(size);
226 if(result)
227 ntfs_bzero(result,size);
228 return result;
231 #if 0
232 /* copy len unicode characters from from to to :) */
233 void ntfs_uni2ascii(char *to,char *from,int len)
235 int i;
237 for(i=0;i<len;i++)
238 to[i]=from[2*i];
239 to[i]='\0';
241 #endif
243 /* copy len asci characters from from to to :) */
244 void ntfs_ascii2uni(short int *to,char *from,int len)
246 int i;
248 for(i=0;i<len;i++)
249 to[i]=from[i];
250 to[i]=0;
253 /* strncmp for Unicode strings */
254 int ntfs_uni_strncmp(short int* a,short int *b,int n)
256 int i;
258 for(i=0;i<n;i++)
260 if(a[i]<b[i])
261 return -1;
262 if(b[i]<a[i])
263 return 1;
265 return 0;
268 /* strncmp between Unicode and ASCII strings */
269 int ntfs_ua_strncmp(short int* a,char* b,int n)
271 int i;
273 for(i=0;i<n;i++)
275 if(a[i]<b[i])
276 return -1;
277 if(b[i]<a[i])
278 return 1;
280 return 0;
283 /* Convert the NT UTC (based 1.1.1601, in hundred nanosecond units)
284 * into Unix UTC (based 1.1.1970, in seconds)
286 ntfs_time_t ntfs_ntutc2unixutc(ntfs_time64_t ntutc)
289 * This is very gross because
290 * 1: We must do 64-bit division on a 32-bit machine
291 * 2: We can't use libgcc for long long operations in the kernel
292 * 3: Floating point math in the kernel would corrupt user data
294 const unsigned int D = 10000000;
295 unsigned int H = (ntutc >> 32);
296 unsigned int L = (unsigned int)ntutc;
297 unsigned int numerator2;
298 unsigned int lowseconds;
299 unsigned int result;
301 /* It is best to subtract 0x019db1ded53e8000 first. */
302 /* Then the 1601-based date becomes a 1970-based date. */
303 if(L < (unsigned)0xd53e8000) H--;
304 L -= (unsigned)0xd53e8000;
305 H -= (unsigned)0x019db1de;
308 * Now divide 64-bit numbers on a 32-bit machine :-)
309 * With the subtraction already done, the result fits in 32 bits.
310 * The numerator fits in 56 bits and the denominator fits
311 * in 24 bits, so we can shift by 8 bits to make this work.
314 numerator2 = (H<<8) | (L>>24);
315 result = (numerator2 / D); /* shifted 24 right!! */
316 lowseconds = result << 24;
318 numerator2 = ((numerator2-result*D)<<8) | ((L>>16)&0xff);
319 result = (numerator2 / D); /* shifted 16 right!! */
320 lowseconds |= result << 16;
322 numerator2 = ((numerator2-result*D)<<8) | ((L>>8)&0xff);
323 result = (numerator2 / D); /* shifted 8 right!! */
324 lowseconds |= result << 8;
326 numerator2 = ((numerator2-result*D)<<8) | (L&0xff);
327 result = (numerator2 / D); /* not shifted */
328 lowseconds |= result;
330 return lowseconds;
333 /* Convert the Unix UTC into NT UTC */
334 ntfs_time64_t ntfs_unixutc2ntutc(ntfs_time_t t)
336 return ((t + (ntfs_time64_t)(369*365+89)*24*3600) * 10000000);
340 * Local variables:
341 * c-file-style: "linux"
342 * End: