Show line contents with compile errors or warnings
[survex.git] / src / message.c
blob028482d63bb0f75f1f6212d26e0a6c783fa03ac2
1 /* message.c
2 * Fairly general purpose message and error routines
3 * Copyright (C) 1993-2003,2004,2005,2006,2007,2010,2011,2012,2014,2015,2016 Olly Betts
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 /*#define DEBUG 1*/
22 #ifdef HAVE_CONFIG_H
23 # include <config.h>
24 #endif
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <limits.h>
31 #include <errno.h>
32 #include <locale.h>
34 #include "cmdline.h"
35 #include "whichos.h"
36 #include "filename.h"
37 #include "message.h"
38 #include "osdepend.h"
39 #include "filelist.h"
40 #include "debug.h"
41 #include "str.h"
43 #ifdef AVEN
44 # include "aven.h"
45 #endif
47 #if OS_WIN32
48 # define WIN32_LEAN_AND_MEAN
49 # include <windows.h>
50 #elif OS_UNIX
51 # include <sys/types.h>
52 # include <sys/stat.h>
53 #endif
55 /* For funcs which want to be immune from messing around with different
56 * calling conventions */
57 #ifndef CDECL
58 # define CDECL
59 #endif
61 int msg_warnings = 0; /* keep track of how many warnings we've given */
62 int msg_errors = 0; /* and how many (non-fatal) errors */
64 /* in case osmalloc() fails before appname_copy is set up */
65 static const char *appname_copy = "anonymous program";
67 /* Path to use to look for executables (used by aven to find cavern). */
68 static const char *exe_pth = "";
70 /* error code for failed osmalloc and osrealloc calls */
71 static void
72 outofmem(OSSIZE_T size)
74 /* TRANSLATORS: "%lu" is a placeholder for the number of bytes which Survex
75 * was trying to allocate space for. */
76 fatalerror(/*Out of memory (couldn’t find %lu bytes).*/1,
77 (unsigned long)size);
80 #ifdef TOMBSTONES
81 #define TOMBSTONE_SIZE 16
82 static const char tombstone[TOMBSTONE_SIZE] = "012345\xfftombstone";
83 #endif
85 /* malloc with error catching if it fails. Also allows us to write special
86 * versions easily eg for MS Windows.
88 void *
89 osmalloc(OSSIZE_T size)
91 void *p;
92 #ifdef TOMBSTONES
93 size += TOMBSTONE_SIZE * 2;
94 p = malloc(size);
95 #else
96 p = xosmalloc(size);
97 #endif
98 if (p == NULL) outofmem(size);
99 #ifdef TOMBSTONES
100 printf("osmalloc truep=%p truesize=%d\n", p, size);
101 memcpy(p, tombstone, TOMBSTONE_SIZE);
102 memcpy(p + size - TOMBSTONE_SIZE, tombstone, TOMBSTONE_SIZE);
103 *(size_t *)p = size;
104 p += TOMBSTONE_SIZE;
105 #endif
106 return p;
109 /* realloc with error catching if it fails. */
110 void *
111 osrealloc(void *p, OSSIZE_T size)
113 /* some pre-ANSI realloc implementations don't cope with a NULL pointer */
114 if (p == NULL) {
115 p = xosmalloc(size);
116 } else {
117 #ifdef TOMBSTONES
118 int true_size;
119 size += TOMBSTONE_SIZE * 2;
120 p -= TOMBSTONE_SIZE;
121 true_size = *(size_t *)p;
122 printf("osrealloc (in truep=%p truesize=%d)\n", p, true_size);
123 if (memcmp(p + sizeof(size_t), tombstone + sizeof(size_t),
124 TOMBSTONE_SIZE - sizeof(size_t)) != 0) {
125 printf("start tombstone for block %p, size %d corrupted!",
126 p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
128 if (memcmp(p + true_size - TOMBSTONE_SIZE, tombstone,
129 TOMBSTONE_SIZE) != 0) {
130 printf("end tombstone for block %p, size %d corrupted!",
131 p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
133 p = realloc(p, size);
134 if (p == NULL) outofmem(size);
135 printf("osrealloc truep=%p truesize=%d\n", p, size);
136 memcpy(p, tombstone, TOMBSTONE_SIZE);
137 memcpy(p + size - TOMBSTONE_SIZE, tombstone, TOMBSTONE_SIZE);
138 *(size_t *)p = size;
139 p += TOMBSTONE_SIZE;
140 #else
141 p = xosrealloc(p, size);
142 #endif
144 if (p == NULL) outofmem(size);
145 return p;
148 char *
149 osstrdup(const char *str)
151 char *p;
152 OSSIZE_T len;
153 len = strlen(str) + 1;
154 p = osmalloc(len);
155 memcpy(p, str, len);
156 return p;
159 /* osfree is usually just a macro in osalloc.h */
160 #ifdef TOMBSTONES
161 void
162 osfree(void *p)
164 int true_size;
165 if (!p) return;
166 p -= TOMBSTONE_SIZE;
167 true_size = *(size_t *)p;
168 printf("osfree truep=%p truesize=%d\n", p, true_size);
169 if (memcmp(p + sizeof(size_t), tombstone + sizeof(size_t),
170 TOMBSTONE_SIZE - sizeof(size_t)) != 0) {
171 printf("start tombstone for block %p, size %d corrupted!",
172 p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
174 if (memcmp(p + true_size - TOMBSTONE_SIZE, tombstone,
175 TOMBSTONE_SIZE) != 0) {
176 printf("end tombstone for block %p, size %d corrupted!",
177 p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
179 free(p);
181 #endif
183 static int
184 default_charset(void)
186 if (getenv("SURVEX_UTF8")) return CHARSET_UTF8;
187 #if OS_WIN32
188 # ifdef AVEN
189 # define CODEPAGE GetACP()
190 # else
191 # define CODEPAGE GetConsoleOutputCP()
192 # endif
193 switch (CODEPAGE) {
194 case 0: return CHARSET_UTF8;
195 case 1252: return CHARSET_WINCP1252;
196 case 1250: return CHARSET_WINCP1250;
197 case 850: return CHARSET_DOSCP850;
199 return CHARSET_USASCII;
200 #elif OS_UNIX
201 #ifdef AVEN
202 return CHARSET_UTF8;
203 #else
204 const char *p = getenv("LC_ALL");
205 if (p == NULL || p[0] == '\0') {
206 p = getenv("LC_CTYPE");
207 if (p == NULL || p[0] == '\0') {
208 p = getenv("LANG");
209 /* Something (AutoCAD?) on Microsoft Windows sets LANG to a number. */
210 if (p == NULL || !isalpha((unsigned char)p[0]))
211 p = msg_lang;
215 if (p) {
216 const char *q = strchr(p, '.');
217 if (q) p = q + 1;
220 if (p) {
221 const char *chset = p;
222 size_t name_len;
224 while (*p != '\0' && *p != '@') p++;
226 name_len = p - chset;
228 if (name_len) {
229 int only_digit = 1;
230 size_t cnt;
232 for (cnt = 0; cnt < name_len; ++cnt)
233 if (isalpha((unsigned char)chset[cnt])) {
234 only_digit = 0;
235 break;
238 if (only_digit) goto iso;
240 switch (tolower(chset[0])) {
241 case 'i':
242 if (tolower(chset[1]) == 's' && tolower(chset[2]) == 'o') {
243 chset += 3;
244 iso:
245 if (strncmp(chset, "8859", 4) == 0) {
246 chset += 4;
247 while (chset < p && *chset && !isdigit((unsigned char)*chset))
248 chset++;
249 switch (atoi(chset)) {
250 case 1: return CHARSET_ISO_8859_1;
251 case 2: return CHARSET_ISO_8859_2;
252 case 15: return CHARSET_ISO_8859_15;
253 default: return CHARSET_USASCII;
257 break;
258 case 'u':
259 if (tolower(chset[1]) == 't' && tolower(chset[2]) == 'f') {
260 chset += 3;
261 while (chset < p && *chset && !isdigit((unsigned char)*chset))
262 chset++;
263 switch (atoi(chset)) {
264 case 8: return CHARSET_UTF8;
265 default: return CHARSET_USASCII;
271 return CHARSET_USASCII;
272 #endif
273 #else
274 # error Do not know operating system!
275 #endif
278 /* It seems that Swedish and maybe some other scandanavian languages don't
279 * transliterate &auml; to ae - but it seems there may be conflicting views
280 * on this...
282 #define umlaut_to_e() 1
284 /* values <= 127 already dealt with */
285 static int
286 add_unicode(int charset, unsigned char *p, int value)
288 #ifdef DEBUG
289 fprintf(stderr, "add_unicode(%d, %p, %d)\n", charset, p, value);
290 #endif
291 if (value == 0) return 0;
292 switch (charset) {
293 case CHARSET_USASCII:
294 if (value < 0x80) {
295 *p = value;
296 return 1;
298 break;
299 case CHARSET_ISO_8859_1:
300 if (value < 0x100) {
301 *p = value;
302 return 1;
304 break;
305 case CHARSET_ISO_8859_2:
306 if (value >= 0xa0) {
307 int v = 0;
308 switch (value) {
309 case 0xa0: case 0xa4: case 0xa7: case 0xa8: case 0xad: case 0xb0:
310 case 0xb4: case 0xb8: case 0xc1: case 0xc2: case 0xc4: case 0xc7:
311 case 0xc9: case 0xcb: case 0xcd: case 0xce: case 0xd3: case 0xd4:
312 case 0xd6: case 0xd7: case 0xda: case 0xdc: case 0xdd: case 0xdf:
313 case 0xe1: case 0xe2: case 0xe4: case 0xe7: case 0xe9: case 0xeb:
314 case 0xed: case 0xee: case 0xf3: case 0xf4: case 0xf6: case 0xf7:
315 case 0xfa: case 0xfc: case 0xfd:
316 v = value; break;
317 case 0x104: v = '\xa1'; break;
318 case 0x2d8: v = '\xa2'; break;
319 case 0x141: v = '\xa3'; break;
320 case 0x13d: v = '\xa5'; break;
321 case 0x15a: v = '\xa6'; break;
322 case 0x160: v = '\xa9'; break;
323 case 0x15e: v = '\xaa'; break; /* Scedil */
324 case 0x164: v = '\xab'; break;
325 case 0x179: v = '\xac'; break;
326 case 0x17d: v = '\xae'; break;
327 case 0x17b: v = '\xaf'; break;
328 case 0x105: v = '\xb1'; break;
329 case 0x2db: v = '\xb2'; break;
330 case 0x142: v = '\xb3'; break;
331 case 0x13e: v = '\xb5'; break;
332 case 0x15b: v = '\xb6'; break;
333 case 0x2c7: v = '\xb7'; break;
334 case 0x161: v = '\xb9'; break;
335 case 0x15f: v = '\xba'; break; /* scedil */
336 case 0x165: v = '\xbb'; break;
337 case 0x17a: v = '\xbc'; break;
338 case 0x2dd: v = '\xbd'; break;
339 case 0x17e: v = '\xbe'; break;
340 case 0x17c: v = '\xbf'; break;
341 case 0x154: v = '\xc0'; break;
342 case 0x102: v = '\xc3'; break;
343 case 0x139: v = '\xc5'; break;
344 case 0x106: v = '\xc6'; break;
345 case 0x10c: v = '\xc8'; break;
346 case 0x118: v = '\xca'; break;
347 case 0x11a: v = '\xcc'; break;
348 case 0x10e: v = '\xcf'; break;
349 case 0x110: v = '\xd0'; break;
350 case 0x143: v = '\xd1'; break;
351 case 0x147: v = '\xd2'; break;
352 case 0x150: v = '\xd5'; break;
353 case 0x158: v = '\xd8'; break;
354 case 0x16e: v = '\xd9'; break;
355 case 0x170: v = '\xdb'; break;
356 case 0x162: v = '\xde'; break; /* &Tcedil; */
357 case 0x155: v = '\xe0'; break;
358 case 0x103: v = '\xe3'; break;
359 case 0x13a: v = '\xe5'; break;
360 case 0x107: v = '\xe6'; break;
361 case 0x10d: v = '\xe8'; break;
362 case 0x119: v = '\xea'; break;
363 case 0x11b: v = '\xec'; break;
364 case 0x10f: v = '\xef'; break;
365 case 0x111: v = '\xf0'; break;
366 case 0x144: v = '\xf1'; break;
367 case 0x148: v = '\xf2'; break;
368 case 0x151: v = '\xf5'; break;
369 case 0x159: v = '\xf8'; break;
370 case 0x16f: v = '\xf9'; break;
371 case 0x171: v = '\xfb'; break;
372 case 0x163: v = '\xfe'; break; /* tcedil */
373 case 0x2d9: v = '\xff'; break;
375 if (v == 0) break;
376 value = v;
378 *p = value;
379 return 1;
380 case CHARSET_ISO_8859_15:
381 switch (value) {
382 case 0xa4: case 0xa6: case 0xb0: case 0xc4:
383 case 0xd0: case 0xd4: case 0xd5: case 0xd6:
384 goto donthave;
385 case 0x152: value = 0xd4; break; /* &OElig; */
386 case 0x153: value = 0xd5; break; /* &oelig; */
387 #if 0
388 case 0x0: value = 0xa4; break; /* euro */
389 #endif
390 case 0x160: value = 0xa6; break; /* Scaron */
391 case 0x161: value = 0xb0; break; /* scaron */
392 case 0x17d: value = 0xc4; break; /* Zcaron */
393 case 0x17e: value = 0xd0; break; /* zcaron */
394 #if 0
395 case 0x0: value = 0xd6; break; /* Ydiersis */
396 #endif
398 if (value < 0x100) {
399 *p = value;
400 return 1;
402 donthave:
403 break;
404 #if OS_WIN32
405 case CHARSET_WINCP1250:
406 /* MS Windows rough equivalent to ISO-8859-2 */
407 if (value >= 0x80) {
408 int v = 0;
409 /* This mapping is complete - there are 5 unused positions:
410 * 0x81 0x83 0x88 0x90 0x98 */
411 switch (value) {
412 case 0xa0: case 0xa4: case 0xa6: case 0xa7: case 0xa8: case 0xa9:
413 case 0xab: case 0xac: case 0xad: case 0xae: case 0xb0: case 0xb1:
414 case 0xb4: case 0xb5: case 0xb6: case 0xb7: case 0xb8: case 0xbb:
415 case 0xc1: case 0xc2: case 0xc4: case 0xc7: case 0xc9: case 0xcb:
416 case 0xcd: case 0xce: case 0xd3: case 0xd4: case 0xd6: case 0xd7:
417 case 0xda: case 0xdc: case 0xdd: case 0xdf: case 0xe1: case 0xe2:
418 case 0xe4: case 0xe7: case 0xe9: case 0xeb: case 0xed: case 0xee:
419 case 0xf3: case 0xf4: case 0xf6: case 0xf7: case 0xfa: case 0xfc:
420 case 0xfd:
421 v = value; break;
422 case 0x20ac: v = '\x80'; break;
423 case 0x201a: v = '\x82'; break;
424 case 0x201e: v = '\x84'; break;
425 case 0x2026: v = '\x85'; break;
426 case 0x2020: v = '\x86'; break;
427 case 0x2021: v = '\x87'; break;
428 case 0x2030: v = '\x89'; break;
429 case 0x0160: v = '\x8a'; break;
430 case 0x2039: v = '\x8b'; break;
431 case 0x015a: v = '\x8c'; break;
432 case 0x0164: v = '\x8d'; break;
433 case 0x017d: v = '\x8e'; break;
434 case 0x0179: v = '\x8f'; break;
435 case 0x2018: v = '\x91'; break;
436 case 0x2019: v = '\x92'; break;
437 case 0x201c: v = '\x93'; break;
438 case 0x201d: v = '\x94'; break;
439 case 0x2022: v = '\x95'; break;
440 case 0x2013: v = '\x96'; break;
441 case 0x2014: v = '\x97'; break;
442 case 0x2122: v = '\x99'; break;
443 case 0x0161: v = '\x9a'; break;
444 case 0x203a: v = '\x9b'; break;
445 case 0x015b: v = '\x9c'; break;
446 case 0x0165: v = '\x9d'; break;
447 case 0x017e: v = '\x9e'; break;
448 case 0x017a: v = '\x9f'; break;
449 case 0x02c7: v = '\xa1'; break;
450 case 0x02d8: v = '\xa2'; break;
451 case 0x0141: v = '\xa3'; break;
452 case 0x0104: v = '\xa5'; break;
453 case 0x015e: v = '\xaa'; break; /* Scedil */
454 case 0x017b: v = '\xaf'; break;
455 case 0x02db: v = '\xb2'; break;
456 case 0x0142: v = '\xb3'; break;
457 case 0x0105: v = '\xb9'; break;
458 case 0x015f: v = '\xba'; break; /* scedil */
459 case 0x013d: v = '\xbc'; break;
460 case 0x02dd: v = '\xbd'; break;
461 case 0x013e: v = '\xbe'; break;
462 case 0x017c: v = '\xbf'; break;
463 case 0x0154: v = '\xc0'; break;
464 case 0x0102: v = '\xc3'; break;
465 case 0x0139: v = '\xc5'; break;
466 case 0x0106: v = '\xc6'; break;
467 case 0x010c: v = '\xc8'; break;
468 case 0x0118: v = '\xca'; break;
469 case 0x011a: v = '\xcc'; break;
470 case 0x010e: v = '\xcf'; break;
471 case 0x0110: v = '\xd0'; break;
472 case 0x0143: v = '\xd1'; break;
473 case 0x0147: v = '\xd2'; break;
474 case 0x0150: v = '\xd5'; break;
475 case 0x0158: v = '\xd8'; break;
476 case 0x016e: v = '\xd9'; break;
477 case 0x0170: v = '\xdb'; break;
478 case 0x0162: v = '\xde'; break; /* &Tcedil; */
479 case 0x0155: v = '\xe0'; break;
480 case 0x0103: v = '\xe3'; break;
481 case 0x013a: v = '\xe5'; break;
482 case 0x0107: v = '\xe6'; break;
483 case 0x010d: v = '\xe8'; break;
484 case 0x0119: v = '\xea'; break;
485 case 0x011b: v = '\xec'; break;
486 case 0x010f: v = '\xef'; break;
487 case 0x0111: v = '\xf0'; break;
488 case 0x0144: v = '\xf1'; break;
489 case 0x0148: v = '\xf2'; break;
490 case 0x0151: v = '\xf5'; break;
491 case 0x0159: v = '\xf8'; break;
492 case 0x016f: v = '\xf9'; break;
493 case 0x0171: v = '\xfb'; break;
494 case 0x0163: v = '\xfe'; break; /* tcedil */
495 case 0x02d9: v = '\xff'; break;
497 if (v == 0) break;
498 value = v;
500 *p = value;
501 return 1;
502 case CHARSET_WINCP1252:
503 /* MS Windows extensions to ISO-8859-1 */
504 /* This mapping is complete - there are 5 unused positions:
505 * 0x81 0x8d 0x8f 0x90 0x9d */
506 switch (value) {
507 case 0x2026: value = 0x85; break; /* hellip */
508 case 0x0160: value = 0x8a; break; /* Scaron */
509 case 0x0152: value = 0x8c; break; /* OElig */
510 case 0x017d: value = 0x8e; break; /* Zcaron */
511 case 0x2019: value = 0x92; break; /* lsquo */
512 case 0x201c: value = 0x93; break; /* ldquo */
513 case 0x201d: value = 0x94; break; /* rdquo */
514 case 0x0161: value = 0x9a; break; /* scaron */
515 case 0x0153: value = 0x9c; break; /* oelig */
516 case 0x017e: value = 0x9e; break; /* zcaron */
517 #if 0
518 /* there are a few other obscure ones we don't currently need */
519 case 0x20ac: value = 0x80; break;
520 case 0x201a: value = 0x82; break;
521 case 0x0192: value = 0x83; break;
522 case 0x201e: value = 0x84; break;
523 case 0x2020: value = 0x86; break;
524 case 0x2021: value = 0x87; break;
525 case 0x02c6: value = 0x88; break;
526 case 0x2030: value = 0x89; break;
527 case 0x2039: value = 0x8b; break;
528 case 0x2018: value = 0x91; break;
529 case 0x2022: value = 0x95; break;
530 case 0x2013: value = 0x96; break;
531 case 0x2014: value = 0x97; break;
532 case 0x02dc: value = 0x98; break;
533 case 0x2122: value = 0x99; break;
534 case 0x203a: value = 0x9b; break;
535 case 0x0178: value = 0x9f; break;
536 #endif
538 if (value < 0x100) {
539 *p = value;
540 return 1;
542 break;
543 #endif
544 #if OS_WIN32
545 case CHARSET_DOSCP850: {
546 static const unsigned char uni2dostab[] = {
547 255, 173, 189, 156, 207, 190, 221, 245,
548 249, 184, 166, 174, 170, 240, 169, 238,
549 248, 241, 253, 252, 239, 230, 244, 250,
550 247, 251, 167, 175, 172, 171, 243, 168,
551 183, 181, 182, 199, 142, 143, 146, 128,
552 212, 144, 210, 211, 222, 214, 215, 216,
553 209, 165, 227, 224, 226, 229, 153, 158,
554 157, 235, 233, 234, 154, 237, 232, 225,
555 133, 160, 131, 198, 132, 134, 145, 135,
556 138, 130, 136, 137, 141, 161, 140, 139,
557 208, 164, 149, 162, 147, 228, 148, 246,
558 155, 151, 163, 150, 129, 236, 231, 152
560 if (value >= 160 && value < 256) {
561 *p = (int)uni2dostab[value - 160];
562 return 1;
564 #if 0
565 if (value == 305) { /* LATIN SMALL LETTER DOTLESS I */
566 *p = 213; /* "Modified CP850" has the Euro sign here. */
567 return 1;
569 if (value == 402) { /* LATIN SMALL LETTER F WITH HOOK */
570 *p = 159;
571 return 1;
573 #endif
574 break;
576 #endif
578 /* Transliterate characters we can't represent */
579 #ifdef DEBUG
580 fprintf(stderr, "transliterate “%c” 0x%x\n", value, value);
581 #endif
582 switch (value) {
583 case 160:
584 *p = ' '; return 1;
585 case 161 /* ¡ */:
586 *p = '!'; return 1;
587 case 176 /* ° */:
588 *p = 'd'; p[1] = 'g'; return 2;
589 case 191 /* ¿ */:
590 *p = '?'; return 1;
591 case 192 /* À */: case 193 /* Á */: case 194 /* Â */: case 195 /* Ã */:
592 *p = 'A'; return 1;
593 case 197 /* Å */:
594 p[1] = *p = 'A'; return 2;
595 case 196 /* Ä */: /* &Auml; */
596 *p = 'A';
597 if (!umlaut_to_e()) return 1;
598 p[1] = 'E'; return 2;
599 case 198 /* Æ */:
600 *p = 'A'; p[1] = 'E'; return 2;
601 case 199 /* Ç */: case 268: /* &Ccaron; */
602 *p = 'C'; return 1;
603 case 270: /* &Dcaron; */
604 *p = 'D'; return 1;
605 case 200 /* È */: case 201 /* É */: case 202 /* Ê */: case 203 /* Ë */:
606 *p = 'E'; return 1;
607 case 204 /* Ì */: case 205 /* Í */: case 206 /* Î */: case 207 /* Ï */:
608 *p = 'I'; return 1;
609 case 208 /* Ð */: case 222 /* Þ */:
610 *p = 'T'; p[1] = 'H'; return 2;
611 case 315: /* &Lacute; */
612 case 317: /* &Lcaron; */
613 *p = 'L'; return 1;
614 case 209 /* Ñ */:
615 *p = 'N'; return 1;
616 case 210 /* Ò */: case 211 /* Ó */: case 212 /* Ô */: case 213 /* Õ */:
617 *p = 'O'; return 1;
618 case 214 /* Ö */: /* &Ouml; */ case 0x152: /* &OElig; */
619 *p = 'O'; p[1] = 'E'; return 2;
620 case 352: /* &Scaron; */
621 case 0x15e: /* &Scedil; */
622 *p = 'S'; return 1;
623 case 0x162: /* &Tcedil; */
624 case 0x164: /* &Tcaron; */
625 *p = 'T'; return 1;
626 case 217 /* Ù */: case 218 /* Ú */: case 219 /* Û */:
627 *p = 'U'; return 1;
628 case 220 /* Ü */: /* &Uuml; */
629 *p = 'U'; p[1] = 'E'; return 2;
630 case 221 /* Ý */:
631 *p = 'Y'; return 1;
632 case 381: /* &Zcaron; */
633 *p = 'Z'; return 1;
634 case 223 /* ß */:
635 p[1] = *p = 's'; return 2;
636 case 224 /* à */: case 225 /* á */: case 226 /* â */: case 227 /* ã */:
637 case 259: /* &abreve; */
638 *p = 'a'; return 1;
639 case 228 /* ä */: /* &auml; */ case 230 /* æ */:
640 *p = 'a'; p[1] = 'e'; return 2;
641 case 229 /* å */:
642 p[1] = *p = 'a'; return 2;
643 case 231 /* ç */: case 269 /* &ccaron; */:
644 *p = 'c'; return 1;
645 case 271: /* &dcaron; */
646 *p = 'd'; return 1;
647 case 232 /* è */: case 233 /* é */: case 234 /* ê */: case 235 /* ë */:
648 case 283 /* &ecaron; */:
649 *p = 'e'; return 1;
650 case 236 /* ì */: case 237 /* í */: case 238 /* î */: case 239 /* ï */:
651 *p = 'i'; return 1;
652 case 316 /* &lacute; */:
653 case 318 /* &lcaron; */:
654 *p = 'l'; return 1;
655 case 241 /* ñ */: case 328 /* &ncaron; */:
656 *p = 'n'; return 1;
657 case 345: /* &rcaron; */
658 *p = 'r'; return 1;
659 case 353: /* &scaron; */
660 case 0x15f: /* &scedil; */
661 *p = 's'; return 1;
662 case 357: /* &tcaron; */
663 case 0x163: /* &tcedil; */
664 *p = 't'; return 1;
665 case 240 /* ð */: case 254 /* þ */:
666 *p = 't'; p[1] = 'h'; return 2;
667 case 242 /* ò */: case 243 /* ó */: case 244 /* ô */: case 245 /* õ */:
668 *p = 'o'; return 1;
669 case 246 /* ö */: /* &ouml; */ case 0x153: /* &oelig; */
670 *p = 'o'; p[1] = 'e'; return 2;
671 case 249 /* ù */: case 250 /* ú */: case 251 /* û */:
672 case 367 /* &uring; */:
673 *p = 'u'; return 1;
674 case 252 /* ü */: /* &uuml; */
675 *p = 'u'; p[1] = 'e'; return 2;
676 case 253 /* ý */: case 255 /* ÿ */:
677 *p = 'y'; return 1;
678 case 382: /* &zcaron; */
679 *p = 'z'; return 1;
680 case 0x2019: /* &lsquo; */
681 *p = '\''; return 1;
682 case 171: /* « */ case 187: /* » */
683 case 0x201c: /* &ldquo; */ case 0x201d: /* &rdquo; */
684 *p = '"'; return 1;
685 case 0x2026: /* &hellip; */
686 *p = '.'; p[1] = '.'; p[2] = '.'; return 3;
687 case 0x2192: /* &rarr; */
688 *p = '-'; p[1] = '>'; return 2;
689 case 0x1d4d: /* gradient symbol */
690 *p = 'g'; p[1] = 'r'; p[2] = 'd'; return 3;
691 case 0x221e: /* infinity symbol */
692 *p = 'i'; p[1] = 'n'; p[2] = 'f'; return 3;
694 #ifdef DEBUG
695 /* 169 is reported (copyright symbol), but there isn't a good <= 2 ASCII
696 * character transliteration for that, so we handle that elsewhere. */
697 fprintf(stderr, "failed to transliterate codepoint %d\n", value);
698 #endif
699 return 0;
702 #if OS_UNIX && defined DATADIR && defined PACKAGE
703 /* Under Unix, we compile in the configured path */
704 static const char *pth_cfg_files = DATADIR "/" PACKAGE;
705 #else
706 /* On other platforms, we fall back on looking in the current directory */
707 static const char *pth_cfg_files = "";
708 #endif
710 static int num_msgs = 0;
711 static char **msg_array = NULL;
713 static bool msg_lang_explicit = fFalse;
714 const char *msg_lang = NULL;
715 const char *msg_lang2 = NULL;
717 static char **
718 parse_msgs(int n, unsigned char *p, int charset_code) {
719 int i;
721 char **msgs = osmalloc(n * sizeof(char *));
723 for (i = 0; i < n; i++) {
724 unsigned char *to = p;
725 int ch;
726 msgs[i] = (char *)p;
728 /* If we want UTF8 anyway, we just need to find the start of each
729 * message */
730 if (charset_code == CHARSET_UTF8) {
731 p += strlen((char *)p) + 1;
732 continue;
735 while ((ch = *p++) != 0) {
736 /* A byte in the range 0x80-0xbf or 0xf0-0xff isn't valid in
737 * this state, (0xf0-0xfd mean values > 0xffff) so treat as
738 * literal and try to resync so we cope better when fed
739 * non-utf-8 data. Similarly we abandon a multibyte sequence
740 * if we hit an invalid character. */
741 if (ch >= 0xc0 && ch < 0xf0) {
742 int ch1 = *p;
743 if ((ch1 & 0xc0) != 0x80) goto resync;
745 if (ch < 0xe0) {
746 /* 2 byte sequence */
747 ch = ((ch & 0x1f) << 6) | (ch1 & 0x3f);
748 p++;
749 } else {
750 /* 3 byte sequence */
751 int ch2 = p[1];
752 if ((ch2 & 0xc0) != 0x80) goto resync;
753 ch = ((ch & 0x1f) << 12) | ((ch1 & 0x3f) << 6) | (ch2 & 0x3f);
754 p += 2;
758 resync:
760 if (ch < 127) {
761 *to++ = (char)ch;
762 } else {
763 /* We assume an N byte UTF-8 code never transliterates to more
764 * than N characters (so we can't transliterate © to (C) or
765 * ® to (R) for example) */
766 to += add_unicode(charset_code, to, ch);
769 *to++ = '\0';
771 return msgs;
774 /* This is the name of the default language, which can be set like so:
775 * ./configure --enable-defaultlang=fr
777 #ifdef DEFAULTLANG
778 /* No point extracting these errors as they won't get used if file opens */
779 # include "../lib/defaultlang.h"
780 #else
781 #define N_DONTEXTRACTMSGS 5
782 static unsigned char dontextractmsgs[] =
783 "Can't open message file \"%s\" using path \"%s\"\0"/*1000*/
784 "Problem with message file \"%s\"\0"/*1001*/
785 "I don't understand this message file version\0"/*1002*/
786 "Message file truncated?\0"/*1003*/
787 "Out of memory (couldn't find %lu bytes).\0"/*1004*/;
788 #endif
790 static char **dontextract = NULL;
792 static void
793 parse_msg_file(int charset_code)
795 FILE *fh;
796 unsigned char header[20];
797 int i;
798 unsigned len;
799 unsigned char *p;
800 char *fnm, *s;
801 int n;
803 #ifdef DEBUG
804 fprintf(stderr, "parse_msg_file(%d)\n", charset_code);
805 #endif
807 /* sort out messages we need to print if we can't open the message file */
808 dontextract = parse_msgs(N_DONTEXTRACTMSGS, dontextractmsgs, charset_code);
810 fnm = osstrdup(msg_lang);
811 /* trim off charset from stuff like "de_DE.iso8859_1" */
812 s = strchr(fnm, '.');
813 if (s) *s = '\0';
814 /* trim off any "@<something>" modifier. */
815 s = strchr(fnm, '@');
816 if (s) *s = '\0';
818 fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
820 if (!fh && strlen(fnm) > 3 && fnm[2] == '_') {
821 /* e.g. if 'en_GB' is unknown, see if we know 'en' */
822 fnm[2] = '\0';
823 fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
824 if (!fh) fnm[2] = '_'; /* for error reporting */
827 if (!fh && !msg_lang_explicit) {
828 /* If msg_lang wasn't specified using environment variable SURVEXLANG,
829 * then default to 'en' if we don't find messages for language msg_lang.
831 if (fnm[0] && fnm[1]) {
832 strcpy(fnm, "en");
833 } else {
834 osfree(fnm);
835 fnm = osstrdup("en");
837 fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
840 if (!fh) {
841 fatalerror(/*Can't open message file “%s” using path “%s”*/1000,
842 fnm, pth_cfg_files);
845 if (fread(header, 1, 20, fh) < 20 ||
846 memcmp(header, "Svx\nMsg\r\n\xfe\xff", 12) != 0) {
847 fatalerror(/*Problem with message file “%s”*/1001, fnm);
850 if (header[12] != 0)
851 fatalerror(/*I don't understand this message file version*/1002);
853 n = (header[14] << 8) | header[15];
855 len = 0;
856 for (i = 16; i < 20; i++) len = (len << 8) | header[i];
858 p = osmalloc(len);
859 if (fread(p, 1, len, fh) < len)
860 fatalerror(/*Message file truncated?*/1003);
862 fclose(fh);
864 #ifdef DEBUG
865 fprintf(stderr, "fnm = “%s”, n = %d, len = %d\n", fnm, n, len);
866 #endif
867 osfree(fnm);
869 msg_array = parse_msgs(n, p, charset_code);
870 num_msgs = n;
873 const char *
874 msg_cfgpth(void)
876 return pth_cfg_files;
879 const char *
880 msg_exepth(void)
882 return exe_pth;
885 const char *
886 msg_appname(void)
888 return appname_copy;
891 void
892 msg_init(char * const *argv)
894 char *p;
895 SVX_ASSERT(argv);
897 /* Point to argv[0] itself so we report a more helpful error if the
898 * code to work out the clean appname generates a signal */
899 appname_copy = argv[0];
900 #if OS_UNIX
901 /* use name as-is on Unix - programs run from path get name as supplied */
902 appname_copy = osstrdup(argv[0]);
903 #else
904 /* use the lower-cased leafname on other platforms */
905 p = leaf_from_fnm(argv[0]);
906 appname_copy = p;
907 while (*p) {
908 *p = tolower(*p);
909 ++p;
911 #endif
913 /* shortcut --version so you can check the version number even when the
914 * correct message file can't be found... */
915 if (argv[1] && strcmp(argv[1], "--version") == 0) {
916 cmdline_version();
917 exit(0);
919 if (argv[0]) {
920 exe_pth = path_from_fnm(argv[0]);
921 #ifdef MACOSX_BUNDLE
922 /* If we're being built into a bundle, always look relative to
923 * the path to the binary. */
924 #ifdef AVEN
925 /* Aven is packaged as an application, so we must look inside there. */
926 pth_cfg_files = use_path(exe_pth, "../Resources");
927 #else
928 pth_cfg_files = use_path(exe_pth, "share/survex");
929 #endif
930 #elif OS_UNIX && defined DATADIR && defined PACKAGE
931 bool free_pth = fFalse;
932 char *pth = getenv("srcdir");
933 if (!pth || !pth[0]) {
934 pth = path_from_fnm(argv[0]);
935 free_pth = fTrue;
937 if (pth[0]) {
938 struct stat buf;
939 #if OS_UNIX_MACOSX
940 /* On MacOS X the programs may be installed anywhere, with the
941 * share directory and the binaries in the same directory. */
942 p = use_path(pth, "share/survex/en.msg");
943 if (lstat(p, &buf) == 0 && S_ISREG(buf.st_mode)) {
944 pth_cfg_files = use_path(pth, "share/survex");
945 goto macosx_got_msg;
947 osfree(p);
948 /* The cavern which aven runs is a hardlinked copy alongside
949 * the aven binary.
951 p = use_path(pth, "../Resources/en.msg");
952 if (lstat(p, &buf) == 0 && S_ISREG(buf.st_mode)) {
953 pth_cfg_files = use_path(pth, "../Resources");
954 goto macosx_got_msg;
956 osfree(p);
957 #endif
958 /* If we're run with an explicit path, check if "../lib/en.msg"
959 * from the program's path exists, and if so look there for
960 * support files - this allows us to test binaries in the build
961 * tree easily. */
962 p = use_path(pth, "../lib/en.msg");
963 if (lstat(p, &buf) == 0) {
964 #ifdef S_ISREG
965 /* POSIX way */
966 if (S_ISREG(buf.st_mode)) {
967 pth_cfg_files = use_path(pth, "../lib");
969 #else
970 /* BSD way */
971 if ((buf.st_mode & S_IFMT) == S_IFREG) {
972 pth_cfg_files = use_path(pth, "../lib");
974 #endif
976 #if defined(__GNUC__) && defined(__APPLE_CC__)
977 macosx_got_msg:
978 #endif
979 osfree(p);
982 if (free_pth) osfree(pth);
983 #elif OS_WIN32
984 DWORD len = 256;
985 char *buf = NULL, *modname;
986 while (1) {
987 DWORD got;
988 buf = osrealloc(buf, len);
989 got = GetModuleFileName(NULL, buf, len);
990 if (got < len) break;
991 len += len;
993 modname = buf;
994 /* Strange Win32 nastiness - strip prefix "\\?\" if present */
995 if (strncmp(modname, "\\\\?\\", 4) == 0) modname += 4;
996 pth_cfg_files = path_from_fnm(modname);
997 osfree(buf);
998 #else
999 /* Get the path to the support files from argv[0] */
1000 pth_cfg_files = path_from_fnm(argv[0]);
1001 #endif
1004 msg_lang = getenv("SURVEXLANG");
1005 #ifdef DEBUG
1006 fprintf(stderr, "msg_lang = %p (= \"%s\")\n", msg_lang, msg_lang?msg_lang:"(null)");
1007 #endif
1009 msg_lang_explicit = fTrue;
1010 if (!msg_lang || !*msg_lang) {
1011 msg_lang_explicit = fFalse;
1012 msg_lang = getenv("LC_ALL");
1014 if (!msg_lang || !*msg_lang) {
1015 msg_lang = getenv("LC_MESSAGES");
1016 if (!msg_lang || !*msg_lang) {
1017 msg_lang = getenv("LANG");
1018 /* Something (AutoCAD?) on Microsoft Windows sets LANG to a number. */
1019 if (msg_lang && !isalpha(msg_lang[0])) msg_lang = NULL;
1021 if (!msg_lang || !*msg_lang) {
1022 #if OS_WIN32
1023 LCID locid;
1024 #endif
1025 #ifdef DEFAULTLANG
1026 msg_lang = STRING(DEFAULTLANG);
1027 #else
1028 msg_lang = "en";
1029 #endif
1030 #if OS_WIN32
1031 /* GetUserDefaultUILanguage() requires Microsoft Windows 2000 or
1032 * newer. For older versions, we use GetUserDefaultLCID().
1035 HMODULE win32 = GetModuleHandle(TEXT("kernel32.dll"));
1036 FARPROC f = GetProcAddress(win32, "GetUserDefaultUILanguage");
1037 if (f) {
1038 typedef LANGID (WINAPI *func_GetUserDefaultUILanguage)(void);
1039 func_GetUserDefaultUILanguage g;
1040 g = (func_GetUserDefaultUILanguage)f;
1041 locid = g();
1042 } else {
1043 locid = GetUserDefaultLCID();
1046 if (locid) {
1047 WORD langid = LANGIDFROMLCID(locid);
1048 switch (PRIMARYLANGID(langid)) {
1049 case LANG_BULGARIAN:
1050 msg_lang = "bg";
1051 break;
1052 /* older mingw compilers don't seem to supply this value */
1053 #ifndef LANG_CATALAN
1054 # define LANG_CATALAN 0x03
1055 #endif
1056 case LANG_CATALAN:
1057 msg_lang = "ca";
1058 break;
1059 case LANG_CHINESE:
1060 msg_lang = "zh_CN";
1061 break;
1062 case LANG_ENGLISH:
1063 if (SUBLANGID(langid) == SUBLANG_ENGLISH_US)
1064 msg_lang = "en_US";
1065 else
1066 msg_lang = "en";
1067 break;
1068 case LANG_FRENCH:
1069 msg_lang = "fr";
1070 break;
1071 case LANG_GERMAN:
1072 switch (SUBLANGID(langid)) {
1073 case SUBLANG_GERMAN_SWISS:
1074 msg_lang = "de_CH";
1075 break;
1076 default:
1077 msg_lang = "de";
1079 break;
1080 case LANG_GREEK:
1081 msg_lang = "el";
1082 break;
1083 case LANG_HUNGARIAN:
1084 msg_lang = "hu";
1085 break;
1086 case LANG_INDONESIAN:
1087 msg_lang = "id";
1088 break;
1089 case LANG_ITALIAN:
1090 msg_lang = "it";
1091 break;
1092 case LANG_POLISH:
1093 msg_lang = "pl";
1094 break;
1095 case LANG_PORTUGUESE:
1096 if (SUBLANGID(langid) == SUBLANG_PORTUGUESE_BRAZILIAN)
1097 msg_lang = "pt_BR";
1098 else
1099 msg_lang = "pt";
1100 break;
1101 case LANG_ROMANIAN:
1102 msg_lang = "ro";
1103 break;
1104 case LANG_RUSSIAN:
1105 msg_lang = "ru";
1106 break;
1107 case LANG_SLOVAK:
1108 msg_lang = "sk";
1109 break;
1110 case LANG_SPANISH:
1111 msg_lang = "es";
1112 break;
1115 #endif
1118 #ifdef DEBUG
1119 fprintf(stderr, "msg_lang = %p (= \"%s\")\n", msg_lang, msg_lang?msg_lang:"(null)");
1120 #endif
1122 /* On Mandrake LANG defaults to C */
1123 if (strcmp(msg_lang, "C") == 0) msg_lang = "en";
1125 { /* If msg_lang has a country code, snip it out to give msg_lang2. */
1126 size_t b = 0;
1127 while (isalpha((unsigned char)msg_lang[b])) {
1128 ++b;
1130 if (msg_lang[b] == '_') {
1131 char * tmp;
1132 size_t e = b + 1;
1133 while (isalpha((unsigned char)msg_lang[e])) {
1134 ++e;
1136 tmp = osstrdup(msg_lang);
1137 memmove(tmp + b, tmp + e, strlen(tmp + e) + 1);
1138 msg_lang2 = tmp;
1142 #ifdef LC_MESSAGES
1143 /* try to setlocale() appropriately too */
1144 if (!setlocale(LC_MESSAGES, msg_lang)) {
1145 if (msg_lang2) {
1146 (void)setlocale(LC_MESSAGES, msg_lang2);
1149 #endif
1151 select_charset(default_charset());
1154 #if OS_WIN32 || OS_UNIX_MACOSX
1155 const char *
1156 msg_proj_finder(const char * file)
1158 static char * r = NULL;
1159 static int r_len = 0;
1160 s_zero(&r);
1161 s_cat(&r, &r_len, pth_cfg_files);
1162 s_cat(&r, &r_len, "/proj/");
1163 s_cat(&r, &r_len, file);
1164 return r;
1166 #endif
1168 const char *
1169 msg(int en)
1171 /* NB can't use SVX_ASSERT here! */
1172 if (dontextract && en >= 1000 && en < 1000 + N_DONTEXTRACTMSGS)
1173 return dontextract[en - 1000];
1174 if (!msg_array) {
1175 if (en != 1) {
1176 fprintf(STDERR, "Message %d requested before fully initialised\n", en);
1177 return "Message requested before fully initialised\n";
1179 /* this should be the only other message which can be requested before
1180 * the message file is opened and read... */
1181 if (!dontextract) return "Out of memory (couldn't find %lu bytes).";
1182 return dontextract[(/*Out of memory (couldn't find %lu bytes).*/1004)
1183 - 1000];
1186 if (en < 0 || en >= num_msgs) {
1187 fprintf(STDERR, "Message %d out of range\n", en);
1188 return "Message out of range\n";
1191 if (en == 0) {
1192 const char *p = msg_array[0];
1193 if (!*p) p = "(C)";
1194 return p;
1197 return msg_array[en];
1200 void
1201 v_report(int severity, const char *fnm, int line, int col, int en, va_list ap)
1203 #ifdef AVEN
1204 (void)col;
1205 aven_v_report(severity, fnm, line, en, ap);
1206 #else
1207 const char * level;
1208 if (fnm) {
1209 fputs(fnm, STDERR);
1210 if (line) fprintf(STDERR, ":%d", line);
1211 if (col > 0) fprintf(STDERR, ":%d", col);
1212 } else {
1213 fputs(appname_copy, STDERR);
1215 fputs(": ", STDERR);
1217 if (severity == 0) {
1218 /* TRANSLATORS: Indicates a warning message e.g.:
1219 * "spoon.svx:12: warning: *prefix is deprecated" */
1220 level = msg(/*warning*/4);
1221 } else if (severity > 0) {
1222 /* TRANSLATORS: Indicates an error message e.g.:
1223 * "spoon.svx:13:4: error: Field may not be omitted" */
1224 level = msg(/*error*/93);
1227 if(severity == -1)
1228 vfprintf(STDERR, "%s", ap);
1229 else
1231 fputs(level, STDERR);
1232 fputs(": ", STDERR);
1233 vfprintf(STDERR, msg(en), ap);
1236 fputnl(STDERR);
1237 #endif
1239 switch (severity) {
1240 case -1:
1241 /* Just reporting line contents! */
1242 break;
1243 case 0:
1244 msg_warnings++;
1245 break;
1246 case 1:
1247 msg_errors++;
1248 if (msg_errors == 50)
1249 fatalerror_in_file(fnm, 0, /*Too many errors - giving up*/19);
1250 break;
1251 case 2:
1252 exit(EXIT_FAILURE);
1256 void
1257 warning(int en, ...)
1259 va_list ap;
1260 va_start(ap, en);
1261 v_report(0, NULL, 0, 0, en, ap);
1262 va_end(ap);
1265 void
1266 error(int en, ...)
1268 va_list ap;
1269 va_start(ap, en);
1270 v_report(1, NULL, 0, 0, en, ap);
1271 va_end(ap);
1274 void
1275 fatalerror(int en, ...)
1277 va_list ap;
1278 va_start(ap, en);
1279 v_report(2, NULL, 0, 0, en, ap);
1280 va_end(ap);
1283 void
1284 warning_in_file(const char *fnm, int line, int en, ...)
1286 va_list ap;
1287 va_start(ap, en);
1288 v_report(0, fnm, line, 0, en, ap);
1289 va_end(ap);
1292 void
1293 error_in_file(const char *fnm, int line, int en, ...)
1295 va_list ap;
1296 va_start(ap, en);
1297 v_report(1, fnm, line, 0, en, ap);
1298 va_end(ap);
1301 void
1302 fatalerror_in_file(const char *fnm, int line, int en, ...)
1304 va_list ap;
1305 va_start(ap, en);
1306 v_report(2, fnm, line, 0, en, ap);
1307 va_end(ap);
1310 /* Code to support switching character set at runtime (e.g. for a printer
1311 * driver to support different character sets on screen and on the printer)
1313 typedef struct charset_li {
1314 struct charset_li *next;
1315 int code;
1316 char **msg_array;
1317 } charset_li;
1319 static charset_li *charset_head = NULL;
1321 static int charset = CHARSET_BAD;
1324 select_charset(int charset_code)
1326 int old_charset = charset;
1327 charset_li *p;
1329 #ifdef DEBUG
1330 fprintf(stderr, "select_charset(%d), old charset = %d\n", charset_code,
1331 charset);
1332 #endif
1334 charset = charset_code;
1336 /* check if we've already parsed messages for new charset */
1337 for (p = charset_head; p; p = p->next) {
1338 #ifdef DEBUG
1339 printf("%p: code %d msg_array %p\n", p, p->code, p->msg_array);
1340 #endif
1341 if (p->code == charset) {
1342 msg_array = p->msg_array;
1343 return old_charset;
1347 /* nope, got to reparse message file */
1348 parse_msg_file(charset_code);
1350 /* add to list */
1351 p = osnew(charset_li);
1352 p->code = charset;
1353 p->msg_array = msg_array;
1354 p->next = charset_head;
1355 charset_head = p;
1357 return old_charset;