Import 2.3.26pre2
[davej-history.git] / fs / hfs / trans.c
blobfe8d02ad68ae052961970d06554f69affd93e70c
1 /*
2 * linux/fs/hfs/trans.c
4 * Copyright (C) 1995-1997 Paul H. Hargrove
5 * This file may be distributed under the terms of the GNU Public License.
7 * This file contains routines for converting between the Macintosh
8 * character set and various other encodings. This includes dealing
9 * with ':' vs. '/' as the path-element separator.
11 * Latin-1 translation based on code contributed by Holger Schemel
12 * (aeglos@valinor.owl.de).
14 * The '8-bit', '7-bit ASCII' and '7-bit alphanumeric' encodings are
15 * implementations of the three encodings recommended by Apple in the
16 * document "AppleSingle/AppleDouble Formats: Developer's Note
17 * (9/94)". This document is available from Apple's Technical
18 * Information Library from the World Wide Web server
19 * www.info.apple.com.
21 * The 'CAP' encoding is an implementation of the naming scheme used
22 * by the Columbia AppleTalk Package, available for anonymous FTP from
23 * ????.
25 * "XXX" in a comment is a note to myself to consider changing something.
27 * In function preconditions the term "valid" applied to a pointer to
28 * a structure means that the pointer is non-NULL and the structure it
29 * points to has all fields initialized to consistent values.
32 #include "hfs.h"
33 #include <linux/hfs_fs_sb.h>
34 #include <linux/hfs_fs_i.h>
35 #include <linux/hfs_fs.h>
37 /*================ File-local variables ================*/
39 /* int->ASCII map for a single hex digit */
40 static char hex[16] = {'0','1','2','3','4','5','6','7',
41 '8','9','a','b','c','d','e','f'};
43 * Latin-1 to Mac character set map
45 * For the sake of consistency this map is generated from the Mac to
46 * Latin-1 map the first time it is needed. This means there is just
47 * one map to maintain.
49 static unsigned char latin2mac_map[128]; /* initially all zero */
52 * Mac to Latin-1 map for the upper 128 characters (both have ASCII in
53 * the lower 128 positions)
55 static unsigned char mac2latin_map[128] = {
56 0xC4, 0xC5, 0xC7, 0xC9, 0xD1, 0xD6, 0xDC, 0xE1,
57 0xE0, 0xE2, 0xE4, 0xE3, 0xE5, 0xE7, 0xE9, 0xE8,
58 0xEA, 0xEB, 0xED, 0xEC, 0xEE, 0xEF, 0xF1, 0xF3,
59 0xF2, 0xF4, 0xF6, 0xF5, 0xFA, 0xF9, 0xFB, 0xFC,
60 0x00, 0xB0, 0xA2, 0xA3, 0xA7, 0xB7, 0xB6, 0xDF,
61 0xAE, 0xA9, 0x00, 0xB4, 0xA8, 0x00, 0xC6, 0xD8,
62 0x00, 0xB1, 0x00, 0x00, 0xA5, 0xB5, 0xF0, 0x00,
63 0x00, 0x00, 0x00, 0xAA, 0xBA, 0x00, 0xE6, 0xF8,
64 0xBF, 0xA1, 0xAC, 0x00, 0x00, 0x00, 0x00, 0xAB,
65 0xBB, 0x00, 0xA0, 0xC0, 0xC3, 0xD5, 0x00, 0x00,
66 0xAD, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF7, 0x00,
67 0xFF, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x00,
68 0x00, 0x00, 0xB8, 0x00, 0x00, 0xC2, 0xCA, 0xC1,
69 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xD3, 0xD4,
70 0x00, 0xD2, 0xDA, 0xDB, 0xD9, 0x00, 0x00, 0x00,
71 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
74 /*================ File-local functions ================*/
77 * dehex()
79 * Given a hexadecimal digit in ASCII, return the integer representation.
81 static inline const unsigned char dehex(char c) {
82 if ((c>='0')&&(c<='9')) {
83 return c-'0';
85 if ((c>='a')&&(c<='f')) {
86 return c-'a'+10;
88 if ((c>='A')&&(c<='F')) {
89 return c-'A'+10;
91 return 0xff;
94 /*================ Global functions ================*/
97 * hfs_mac2nat()
99 * Given a 'Pascal String' (a string preceded by a length byte) in
100 * the Macintosh character set produce the corresponding filename using
101 * the Netatalk name-mangling scheme, returning the length of the
102 * mangled filename. Note that the output string is not NULL terminated.
104 * The name-mangling works as follows:
105 * Characters 32-126 (' '-'~') except '/' and any initial '.' are passed
106 * unchanged from input to output. The remaining characters are replaced
107 * by three characters: ':xx' where xx is the hexadecimal representation
108 * of the character, using lowercase 'a' through 'f'.
110 int hfs_mac2nat(char *out, const struct hfs_name *in) {
111 unsigned char c;
112 const unsigned char *p = in->Name;
113 int len = in->Len;
114 int count = 0;
116 /* Special case for .AppleDesktop which in the
117 distant future may be a pseudodirectory. */
118 if (strncmp(".AppleDesktop", p, len) == 0) {
119 strncpy(out, p, 13);
120 return 13;
123 while (len--) {
124 c = *p++;
125 if ((c<32) || (c=='/') || (c>126) || (!count && (c=='.'))) {
126 *out++ = ':';
127 *out++ = hex[(c>>4) & 0xf];
128 *out++ = hex[c & 0xf];
129 count += 3;
130 } else {
131 *out++ = c;
132 count++;
135 return count;
139 * hfs_mac2cap()
141 * Given a 'Pascal String' (a string preceded by a length byte) in
142 * the Macintosh character set produce the corresponding filename using
143 * the CAP name-mangling scheme, returning the length of the mangled
144 * filename. Note that the output string is not NULL terminated.
146 * The name-mangling works as follows:
147 * Characters 32-126 (' '-'~') except '/' are passed unchanged from
148 * input to output. The remaining characters are replaced by three
149 * characters: ':xx' where xx is the hexadecimal representation of the
150 * character, using lowercase 'a' through 'f'.
152 int hfs_mac2cap(char *out, const struct hfs_name *in) {
153 unsigned char c;
154 const unsigned char *p = in->Name;
155 int len = in->Len;
156 int count = 0;
158 while (len--) {
159 c = *p++;
160 if ((c<32) || (c=='/') || (c>126)) {
161 *out++ = ':';
162 *out++ = hex[(c>>4) & 0xf];
163 *out++ = hex[c & 0xf];
164 count += 3;
165 } else {
166 *out++ = c;
167 count++;
170 return count;
174 * hfs_mac2eight()
176 * Given a 'Pascal String' (a string preceded by a length byte) in
177 * the Macintosh character set produce the corresponding filename using
178 * the '8-bit' name-mangling scheme, returning the length of the
179 * mangled filename. Note that the output string is not NULL
180 * terminated.
182 * This is one of the three recommended naming conventions described
183 * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
184 * Note (9/94)"
186 * The name-mangling works as follows:
187 * Characters 0, '%' and '/' are replaced by three characters: '%xx'
188 * where xx is the hexadecimal representation of the character, using
189 * lowercase 'a' through 'f'. All other characters are passed
190 * unchanged from input to output. Note that this format is mainly
191 * implemented for completeness and is rather hard to read.
193 int hfs_mac2eight(char *out, const struct hfs_name *in) {
194 unsigned char c;
195 const unsigned char *p = in->Name;
196 int len = in->Len;
197 int count = 0;
199 while (len--) {
200 c = *p++;
201 if (!c || (c=='/') || (c=='%')) {
202 *out++ = '%';
203 *out++ = hex[(c>>4) & 0xf];
204 *out++ = hex[c & 0xf];
205 count += 3;
206 } else {
207 *out++ = c;
208 count++;
211 return count;
215 * hfs_mac2seven()
217 * Given a 'Pascal String' (a string preceded by a length byte) in
218 * the Macintosh character set produce the corresponding filename using
219 * the '7-bit ASCII' name-mangling scheme, returning the length of the
220 * mangled filename. Note that the output string is not NULL
221 * terminated.
223 * This is one of the three recommended naming conventions described
224 * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
225 * Note (9/94)"
227 * The name-mangling works as follows:
228 * Characters 0, '%', '/' and 128-255 are replaced by three
229 * characters: '%xx' where xx is the hexadecimal representation of the
230 * character, using lowercase 'a' through 'f'. All other characters
231 * are passed unchanged from input to output. Note that control
232 * characters (including newline) and space are unchanged make reading
233 * these filenames difficult.
235 int hfs_mac2seven(char *out, const struct hfs_name *in) {
236 unsigned char c;
237 const unsigned char *p = in->Name;
238 int len = in->Len;
239 int count = 0;
241 while (len--) {
242 c = *p++;
243 if (!c || (c=='/') || (c=='%') || (c&0x80)) {
244 *out++ = '%';
245 *out++ = hex[(c>>4) & 0xf];
246 *out++ = hex[c & 0xf];
247 count += 3;
248 } else {
249 *out++ = c;
250 count++;
253 return count;
257 * hfs_mac2alpha()
259 * Given a 'Pascal String' (a string preceded by a length byte) in
260 * the Macintosh character set produce the corresponding filename using
261 * the '7-bit alphanumeric' name-mangling scheme, returning the length
262 * of the mangled filename. Note that the output string is not NULL
263 * terminated.
265 * This is one of the three recommended naming conventions described
266 * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
267 * Note (9/94)"
269 * The name-mangling works as follows:
270 * The characters 'a'-'z', 'A'-'Z', '0'-'9', '_' and the last '.' in
271 * the filename are passed unchanged from input to output. All
272 * remaining characters (including any '.'s other than the last) are
273 * replaced by three characters: '%xx' where xx is the hexadecimal
274 * representation of the character, using lowercase 'a' through 'f'.
276 int hfs_mac2alpha(char *out, const struct hfs_name *in) {
277 unsigned char c;
278 const unsigned char *p = in->Name;
279 int len = in->Len;
280 int count = 0;
281 const unsigned char *lp; /* last period */
283 /* strrchr() would be good here, but 'in' is not null-terminated */
284 for (lp=p+len-1; (lp>=p)&&(*lp!='.'); --lp) {}
285 ++lp;
287 while (len--) {
288 c = *p++;
289 if ((p==lp) || ((c>='0')&&(c<='9')) || ((c>='A')&&(c<='Z')) ||
290 ((c>='a')&&(c<='z')) || (c=='_')) {
291 *out++ = c;
292 count++;
293 } else {
294 *out++ = '%';
295 *out++ = hex[(c>>4) & 0xf];
296 *out++ = hex[c & 0xf];
297 count += 3;
300 return count;
304 * hfs_mac2triv()
306 * Given a 'Pascal String' (a string preceded by a length byte) in
307 * the Macintosh character set produce the corresponding filename using
308 * the 'trivial' name-mangling scheme, returning the length of the
309 * mangled filename. Note that the output string is not NULL
310 * terminated.
312 * The name-mangling works as follows:
313 * The character '/', which is illegal in Linux filenames is replaced
314 * by ':' which never appears in HFS filenames. All other characters
315 * are passed unchanged from input to output.
317 int hfs_mac2triv(char *out, const struct hfs_name *in) {
318 unsigned char c;
319 const unsigned char *p = in->Name;
320 int len = in->Len;
321 int count = 0;
323 while (len--) {
324 c = *p++;
325 if (c=='/') {
326 *out++ = ':';
327 } else {
328 *out++ = c;
330 count++;
332 return count;
336 * hfs_mac2latin()
338 * Given a 'Pascal String' (a string preceded by a length byte) in
339 * the Macintosh character set produce the corresponding filename using
340 * the 'Latin-1' name-mangling scheme, returning the length of the
341 * mangled filename. Note that the output string is not NULL
342 * terminated.
344 * The Macintosh character set and Latin-1 are both extensions of the
345 * ASCII character set. Some, but certainly not all, of the characters
346 * in the Macintosh character set are also in Latin-1 but not with the
347 * same encoding. This name-mangling scheme replaces the characters in
348 * the Macintosh character set that have Latin-1 equivalents by those
349 * equivalents; the characters 32-126, excluding '/' and '%', are
350 * passed unchanged from input to output. The remaining characters
351 * are replaced by three characters: '%xx' where xx is the hexadecimal
352 * representation of the character, using lowercase 'a' through 'f'.
354 * The array mac2latin_map[] indicates the correspondence between the
355 * two character sets. The byte in element x-128 gives the Latin-1
356 * encoding of the character with encoding x in the Macintosh
357 * character set. A value of zero indicates Latin-1 has no
358 * corresponding character.
360 int hfs_mac2latin(char *out, const struct hfs_name *in) {
361 unsigned char c;
362 const unsigned char *p = in->Name;
363 int len = in->Len;
364 int count = 0;
366 while (len--) {
367 c = *p++;
369 if ((c & 0x80) && mac2latin_map[c & 0x7f]) {
370 *out++ = mac2latin_map[c & 0x7f];
371 count++;
372 } else if ((c>=32) && (c<=126) && (c!='/') && (c!='%')) {
373 *out++ = c;
374 count++;
375 } else {
376 *out++ = '%';
377 *out++ = hex[(c>>4) & 0xf];
378 *out++ = hex[c & 0xf];
379 count += 3;
382 return count;
386 * hfs_colon2mac()
388 * Given an ASCII string (not null-terminated) and its length,
389 * generate the corresponding filename in the Macintosh character set
390 * using the 'CAP' name-mangling scheme, returning the length of the
391 * mangled filename. Note that the output string is not NULL
392 * terminated.
394 * This routine is a inverse to hfs_mac2cap() and hfs_mac2nat().
395 * A ':' not followed by a 2-digit hexadecimal number (or followed
396 * by the codes for NULL or ':') is replaced by a '|'.
398 void hfs_colon2mac(struct hfs_name *out, const char *in, int len) {
399 int hi, lo;
400 unsigned char code, c, *count;
401 unsigned char *p = out->Name;
403 out->Len = 0;
404 count = &out->Len;
405 while (len-- && (*count < HFS_NAMELEN)) {
406 c = *in++;
407 (*count)++;
408 if (c!=':') {
409 *p++ = c;
410 } else if ((len<2) ||
411 ((hi=dehex(in[0])) & 0xf0) ||
412 ((lo=dehex(in[1])) & 0xf0) ||
413 !(code = (hi << 4) | lo) ||
414 (code == ':')) {
415 *p++ = '|';
416 } else {
417 *p++ = code;
418 len -= 2;
419 in += 2;
425 * hfs_prcnt2mac()
427 * Given an ASCII string (not null-terminated) and its length,
428 * generate the corresponding filename in the Macintosh character set
429 * using Apple's three recommended name-mangling schemes, returning
430 * the length of the mangled filename. Note that the output string is
431 * not NULL terminated.
433 * This routine is a inverse to hfs_mac2alpha(), hfs_mac2seven() and
434 * hfs_mac2eight().
435 * A '%' not followed by a 2-digit hexadecimal number (or followed
436 * by the code for NULL or ':') is unchanged.
437 * A ':' is replaced by a '|'.
439 void hfs_prcnt2mac(struct hfs_name *out, const char *in, int len) {
440 int hi, lo;
441 unsigned char code, c, *count;
442 unsigned char *p = out->Name;
444 out->Len = 0;
445 count = &out->Len;
446 while (len-- && (*count < HFS_NAMELEN)) {
447 c = *in++;
448 (*count)++;
449 if (c==':') {
450 *p++ = '|';
451 } else if (c!='%') {
452 *p++ = c;
453 } else if ((len<2) ||
454 ((hi=dehex(in[0])) & 0xf0) ||
455 ((lo=dehex(in[1])) & 0xf0) ||
456 !(code = (hi << 4) | lo) ||
457 (code == ':')) {
458 *p++ = '%';
459 } else {
460 *p++ = code;
461 len -= 2;
462 in += 2;
468 * hfs_triv2mac()
470 * Given an ASCII string (not null-terminated) and its length,
471 * generate the corresponding filename in the Macintosh character set
472 * using the 'trivial' name-mangling scheme, returning the length of
473 * the mangled filename. Note that the output string is not NULL
474 * terminated.
476 * This routine is a inverse to hfs_mac2triv().
477 * A ':' is replaced by a '/'.
479 void hfs_triv2mac(struct hfs_name *out, const char *in, int len) {
480 unsigned char c, *count;
481 unsigned char *p = out->Name;
483 out->Len = 0;
484 count = &out->Len;
485 while (len-- && (*count < HFS_NAMELEN)) {
486 c = *in++;
487 (*count)++;
488 if (c==':') {
489 *p++ = '/';
490 } else {
491 *p++ = c;
497 * hfs_latin2mac()
499 * Given an Latin-1 string (not null-terminated) and its length,
500 * generate the corresponding filename in the Macintosh character set
501 * using the 'Latin-1' name-mangling scheme, returning the length of
502 * the mangled filename. Note that the output string is not NULL
503 * terminated.
505 * This routine is a inverse to hfs_latin2cap().
506 * A '%' not followed by a 2-digit hexadecimal number (or followed
507 * by the code for NULL or ':') is unchanged.
508 * A ':' is replaced by a '|'.
510 * Note that the character map is built the first time it is needed.
512 void hfs_latin2mac(struct hfs_name *out, const char *in, int len)
514 int hi, lo;
515 unsigned char code, c, *count;
516 unsigned char *p = out->Name;
517 static int map_initialized = 0;
519 if (!map_initialized) {
520 int i;
522 /* build the inverse mapping at run time */
523 for (i = 0; i < 128; i++) {
524 if ((c = mac2latin_map[i])) {
525 latin2mac_map[(int)c - 128] = i + 128;
528 map_initialized = 1;
531 out->Len = 0;
532 count = &out->Len;
533 while (len-- && (*count < HFS_NAMELEN)) {
534 c = *in++;
535 (*count)++;
537 if (c==':') {
538 *p++ = '|';
539 } else if (c!='%') {
540 if (c<128 || !(*p = latin2mac_map[c-128])) {
541 *p = c;
543 p++;
544 } else if ((len<2) ||
545 ((hi=dehex(in[0])) & 0xf0) ||
546 ((lo=dehex(in[1])) & 0xf0) ||
547 !(code = (hi << 4) | lo) ||
548 (code == ':')) {
549 *p++ = '%';
550 } else {
551 *p++ = code;
552 len -= 2;
553 in += 2;