fs/ntfs/util.c

   1 /*
   2  *  util.c
   3  *  Miscellaneous support
   4  *
   5  *  Copyright (C) 1997 Martin von Löwis
   6  *  Copyright (C) 1997 Régis Duchesne
   7  *
   8  *  The utf8 routines are copied from Python wstrop module,
   9  */
  10
  11 #include "types.h"
  12 #include "struct.h"
  13 #include "util.h"
  14
  15 #include <errno.h>
  16 /* FreeBSD doesn't seem to have EILSEQ in errno.h */
  17 #ifndef EILSEQ
  18 # define EILSEQ EINVAL
  19 #endif
  20 #include "support.h"
  21
  22 /* Converts a single wide character to a sequence of utf8 bytes.
  23  * Returns the number of bytes, or 0 on error.
  24  */
  25 static int
  26 to_utf8(ntfs_u16 c,unsigned char* buf)
  27 {
  28         if(c==0)
  29                 return 0; /* No support for embedded 0 runes */
  30         if(c<0x80){
  31                 if(buf)buf[0]=c;
  32                 return 1;
  33         }
  34         if(c<0x800){
  35                 if(buf){
  36                         buf[0] = 0xc0 | (c>>6);
  37                         buf[1] = 0x80 | (c & 0x3f);
  38                 }
  39                 return 2;
  40         }
  41         if(c<0x10000){
  42                 if(buf){
  43                         buf[0] = 0xe0 | (c>>12);
  44                         buf[1] = 0x80 | ((c>>6) & 0x3f);
  45                         buf[2] = 0x80 | (c & 0x3f);
  46                 }
  47                 return 3;
  48         }
  49         /* We don't support characters above 0xFFFF in NTFS */
  50         return 0;
  51 }
  52
  53 /* Decodes a sequence of utf8 bytes into a single wide character.
  54  * Returns the number of bytes consumed, or 0 on error
  55  */
  56 static int
  57 from_utf8(const unsigned char* str,ntfs_u16 *c)
  58 {
  59         int l=0,i;
  60
  61         if(*str<0x80){
  62                 *c = *str;
  63                 return 1;
  64         }
  65         if(*str<0xc0) /* lead byte must not be 10xxxxxx */
  66                 return 0;   /* is c0 a possible lead byte? */
  67         if(*str<0xe0){         /* 110xxxxx */
  68                 *c = *str & 0x1f;
  69                 l=2;
  70         }else if(*str<0xf0){   /* 1110xxxx */
  71                 *c = *str & 0xf;
  72                 l=3;
  73         }else if(*str<0xf8){   /* 11110xxx */
  74                 *c = *str & 7;
  75                 l=4;
  76         }else /* We don't support characters above 0xFFFF in NTFS */
  77                 return 0;
  78
  79
  80         for(i=1;i<l;i++){
  81                 /* all other bytes must be 10xxxxxx */
  82                 if((str[i] & 0xc0) != 0x80)
  83                         return 0;
  84                 *c <<= 6;
  85                 *c |= str[i] & 0x3f;
  86         }
  87         return l;
  88 }
  89
  90 /* Converts wide string to UTF-8. Expects two in- and two out-parameters.
  91  * Returns 0 on success, or error code.
  92  * The caller has to free the result string.
  93  * There is no support for UTF-16, yet
  94  */
  95 static int ntfs_dupuni2utf8(ntfs_u16* in, int in_len,char **out,int *out_len)
  96 {
  97         int i,tmp;
  98         int len8;
  99         unsigned char *result;
 100
 101         ntfs_debug(DEBUG_OTHER,"converting l=%d\n",in_len);
 102         /* count the length of the resulting UTF-8 */
 103         for(i=len8=0;i<in_len;i++){
 104                 tmp=to_utf8(in[i],0);
 105                 if(!tmp)
 106                         /* invalid character */
 107                         return EILSEQ;
 108                 len8+=tmp;
 109         }
 110         *out=result=ntfs_malloc(len8+1); /* allow for zero-termination */
 111
 112         if(!result)
 113                 return ENOMEM;
 114         result[len8]='\0';
 115         *out_len=len8;
 116         for(i=len8=0;i<in_len;i++)
 117                 len8+=to_utf8(in[i],result+len8);
 118         return 0;
 119 }
 120
 121 /* Converts an UTF-8 sequence to a wide string. Same conventions as the
 122  * previous function
 123  */
 124 static int ntfs_duputf82uni(unsigned char* in, int in_len,ntfs_u16** out,int *out_len)
 125 {
 126         int i,tmp;
 127         int len16;
 128
 129         ntfs_u16* result;
 130         ntfs_u16 wtmp;
 131         for(i=len16=0;i<in_len;i+=tmp,len16++){
 132                 tmp=from_utf8(in+i,&wtmp);
 133                 if(!tmp)
 134                         return EILSEQ;
 135         }
 136         *out=result=ntfs_malloc(2*(len16+1));
 137         if(!result)
 138                 return ENOMEM;
 139         result[len16]=0;
 140         *out_len=len16;
 141         for(i=len16=0;i<in_len;i+=tmp,len16++)
 142                 tmp=from_utf8(in+i,result+len16);
 143         return 0;
 144 }
 145
 146 /* See above. Produces ISO-8859-1 from wide strings */
 147 static int ntfs_dupuni288591(ntfs_u16* in,int in_len,char** out,int *out_len)
 148 {
 149         int i;
 150         char *result;
 151
 152         /* check for characters out of range */
 153         for(i=0;i<in_len;i++)
 154                 if(in[i]>=256)
 155                         return EILSEQ;
 156         *out=result=ntfs_malloc(in_len+1);
 157         if(!result)
 158                 return ENOMEM;
 159         result[in_len]='\0';
 160         *out_len=in_len;
 161         for(i=0;i<in_len;i++)
 162                 result[i]=in[i];
 163         return 0;
 164 }
 165
 166 /* See above */
 167 static int ntfs_dup885912uni(unsigned char* in,int in_len,ntfs_u16 **out,int *out_len)
 168 {
 169         int i;
 170
 171         ntfs_u16* result;
 172         *out=result=ntfs_malloc(2*in_len);
 173         if(!result)
 174                 return ENOMEM;
 175         *out_len=in_len;
 176         for(i=0;i<in_len;i++)
 177                 result[i]=in[i];
 178         return 0;
 179 }
 180
 181 /* Encodings dispatcher */
 182 int ntfs_encodeuni(ntfs_volume *vol,ntfs_u16 *in, int in_len,
 183                    char **out, int *out_len)
 184 {
 185         if(vol->nct & nct_utf8)
 186                 return ntfs_dupuni2utf8(in,in_len,out,out_len);
 187         else if(vol->nct & nct_iso8859_1)
 188                 return ntfs_dupuni288591(in,in_len,out,out_len);
 189         else if(vol->nct & (nct_map|nct_uni_xlate))
 190                 /* uni_xlate is handled inside map */
 191                 return ntfs_dupuni2map(vol,in,in_len,out,out_len);
 192         else
 193                 return EINVAL; /* unknown encoding */
 194 }
 195
 196 int ntfs_decodeuni(ntfs_volume *vol,char *in, int in_len,
 197                    ntfs_u16 **out, int *out_len)
 198 {
 199         if(vol->nct & nct_utf8)
 200                 return ntfs_duputf82uni(in,in_len,out,out_len);
 201         else if(vol->nct & nct_iso8859_1)
 202                 return ntfs_dup885912uni(in,in_len,out,out_len);
 203         else if(vol->nct & (nct_map | nct_uni_xlate))
 204                 return ntfs_dupmap2uni(vol,in,in_len,out,out_len);
 205         else
 206                 return EINVAL;
 207 }
 208
 209 /* Same address space copies */
 210 void ntfs_put(ntfs_io *dest,void *src,ntfs_size_t n)
 211 {
 212         ntfs_memcpy(dest->param,src,n);
 213         dest->param+=n;
 214 }
 215
 216 void ntfs_get(void* dest,ntfs_io *src,ntfs_size_t n)
 217 {
 218         ntfs_memcpy(dest,src->param,n);
 219         src->param+=n;
 220 }
 221
 222 void *ntfs_calloc(int size)
 223 {
 224         void *result=ntfs_malloc(size);
 225
 226         if(result)
 227                 ntfs_bzero(result,size);
 228         return result;
 229 }
 230
 231 #if 0
 232 /* copy len unicode characters from from to to :) */
 233 void ntfs_uni2ascii(char *to,char *from,int len)
 234 {
 235         int i;
 236
 237         for(i=0;i<len;i++)
 238                 to[i]=from[2*i];
 239         to[i]='\0';
 240 }
 241 #endif
 242
 243 /* copy len asci characters from from to to :) */
 244 void ntfs_ascii2uni(short int *to,char *from,int len)
 245 {
 246         int i;
 247
 248         for(i=0;i<len;i++)
 249                 to[i]=from[i];
 250         to[i]=0;
 251 }
 252
 253 /* strncmp for Unicode strings */
 254 int ntfs_uni_strncmp(short int* a,short int *b,int n)
 255 {
 256         int i;
 257
 258         for(i=0;i<n;i++)
 259         {
 260                 if(a[i]<b[i])
 261                         return -1;
 262                 if(b[i]<a[i])
 263                         return 1;
 264         }
 265         return 0;
 266 }
 267
 268 /* strncmp between Unicode and ASCII strings */
 269 int ntfs_ua_strncmp(short int* a,char* b,int n)
 270 {
 271         int i;
 272
 273         for(i=0;i<n;i++)
 274         {
 275                 if(a[i]<b[i])
 276                         return -1;
 277                 if(b[i]<a[i])
 278                         return 1;
 279         }
 280         return 0;
 281 }
 282
 283 /* Convert the NT UTC (based 1.1.1601, in hundred nanosecond units)
 284  * into Unix UTC (based 1.1.1970, in seconds)
 285  */
 286 ntfs_time_t ntfs_ntutc2unixutc(ntfs_time64_t ntutc)
 287 {
 288 /*
 289  * This is very gross because
 290  * 1: We must do 64-bit division on a 32-bit machine
 291  * 2: We can't use libgcc for long long operations in the kernel
 292  * 3: Floating point math in the kernel would corrupt user data
 293  */
 294         const unsigned int D = 10000000;
 295         unsigned int H = (ntutc >> 32);
 296         unsigned int L = (unsigned int)ntutc;
 297         unsigned int numerator2;
 298         unsigned int lowseconds;
 299         unsigned int result;
 300
 301         /* It is best to subtract 0x019db1ded53e8000 first. */
 302         /* Then the 1601-based date becomes a 1970-based date. */
 303         if(L < (unsigned)0xd53e8000) H--;
 304         L -= (unsigned)0xd53e8000;
 305         H -= (unsigned)0x019db1de;
 306
 307         /*
 308          * Now divide 64-bit numbers on a 32-bit machine :-)
 309          * With the subtraction already done, the result fits in 32 bits.
 310          * The numerator fits in 56 bits and the denominator fits
 311          * in 24 bits, so we can shift by 8 bits to make this work.
 312          */
 313
 314         numerator2  = (H<<8) | (L>>24);
 315         result      = (numerator2 / D);   /* shifted 24 right!! */
 316         lowseconds  = result << 24;
 317
 318         numerator2  = ((numerator2-result*D)<<8) | ((L>>16)&0xff);
 319         result      = (numerator2 / D);   /* shifted 16 right!! */
 320         lowseconds |= result << 16;
 321
 322         numerator2  = ((numerator2-result*D)<<8) | ((L>>8)&0xff);
 323         result      = (numerator2 / D);   /* shifted 8 right!! */
 324         lowseconds |= result << 8;
 325
 326         numerator2  = ((numerator2-result*D)<<8) | (L&0xff);
 327         result      = (numerator2 / D);   /* not shifted */
 328         lowseconds |= result;
 329
 330         return lowseconds;
 331 }
 332
 333 /* Convert the Unix UTC into NT UTC */
 334 ntfs_time64_t ntfs_unixutc2ntutc(ntfs_time_t t)
 335 {
 336         return ((t + (ntfs_time64_t)(369*365+89)*24*3600) * 10000000);
 337 }
 338
 339 /*
 340  * Local variables:
 341  * c-file-style: "linux"
 342  * End:
 343  */