Don't pass signed char values to ctype.h functions
[survex.git] / src / message.c
blob78f5cc14351c5b9f84e3c43fa2219c3d42e70f91
1 /* message.c
2 * Fairly general purpose message and error routines
3 * Copyright (C) 1993-2022 Olly Betts
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 /*#define DEBUG 1*/
22 #ifdef HAVE_CONFIG_H
23 # include <config.h>
24 #endif
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <limits.h>
31 #include <errno.h>
32 #include <locale.h>
34 #include "cmdline.h"
35 #include "whichos.h"
36 #include "filename.h"
37 #include "message.h"
38 #include "filelist.h"
39 #include "debug.h"
40 #include "str.h"
42 #ifdef AVEN
43 # include "aven.h"
44 #endif
46 #if OS_WIN32
47 # define WIN32_LEAN_AND_MEAN
48 # include <windows.h>
49 #elif OS_UNIX
50 # include <sys/types.h>
51 #endif
53 #include <sys/stat.h>
55 /* For funcs which want to be immune from messing around with different
56 * calling conventions */
57 #ifndef CDECL
58 # define CDECL
59 #endif
61 int msg_warnings = 0; /* keep track of how many warnings we've given */
62 int msg_errors = 0; /* and how many (non-fatal) errors */
64 /* in case osmalloc() fails before appname_copy is set up */
65 static const char *appname_copy = "anonymous program";
67 /* Path to use to look for executables (used by aven to find cavern). */
68 static const char *exe_pth = "";
70 /* error code for failed osmalloc and osrealloc calls */
71 static void
72 outofmem(OSSIZE_T size)
74 /* TRANSLATORS: "%lu" is a placeholder for the number of bytes which Survex
75 * was trying to allocate space for. */
76 fatalerror(/*Out of memory (couldn’t find %lu bytes).*/1,
77 (unsigned long)size);
80 #ifdef TOMBSTONES
81 #define TOMBSTONE_SIZE 16
82 static const char tombstone[TOMBSTONE_SIZE] = "012345\xfftombstone";
83 #endif
85 /* malloc with error catching if it fails. Also allows us to write special
86 * versions easily eg for MS Windows.
88 void *
89 osmalloc(OSSIZE_T size)
91 void *p;
92 #ifdef TOMBSTONES
93 size += TOMBSTONE_SIZE * 2;
94 p = malloc(size);
95 #else
96 p = xosmalloc(size);
97 #endif
98 if (p == NULL) outofmem(size);
99 #ifdef TOMBSTONES
100 printf("osmalloc truep=%p truesize=%d\n", p, size);
101 memcpy(p, tombstone, TOMBSTONE_SIZE);
102 memcpy(p + size - TOMBSTONE_SIZE, tombstone, TOMBSTONE_SIZE);
103 *(size_t *)p = size;
104 p += TOMBSTONE_SIZE;
105 #endif
106 return p;
109 /* realloc with error catching if it fails. */
110 void *
111 osrealloc(void *p, OSSIZE_T size)
113 /* some pre-ANSI realloc implementations don't cope with a NULL pointer */
114 if (p == NULL) {
115 p = xosmalloc(size);
116 } else {
117 #ifdef TOMBSTONES
118 int true_size;
119 size += TOMBSTONE_SIZE * 2;
120 p -= TOMBSTONE_SIZE;
121 true_size = *(size_t *)p;
122 printf("osrealloc (in truep=%p truesize=%d)\n", p, true_size);
123 if (memcmp(p + sizeof(size_t), tombstone + sizeof(size_t),
124 TOMBSTONE_SIZE - sizeof(size_t)) != 0) {
125 printf("start tombstone for block %p, size %d corrupted!",
126 p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
128 if (memcmp(p + true_size - TOMBSTONE_SIZE, tombstone,
129 TOMBSTONE_SIZE) != 0) {
130 printf("end tombstone for block %p, size %d corrupted!",
131 p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
133 p = realloc(p, size);
134 if (p == NULL) outofmem(size);
135 printf("osrealloc truep=%p truesize=%d\n", p, size);
136 memcpy(p, tombstone, TOMBSTONE_SIZE);
137 memcpy(p + size - TOMBSTONE_SIZE, tombstone, TOMBSTONE_SIZE);
138 *(size_t *)p = size;
139 p += TOMBSTONE_SIZE;
140 #else
141 p = xosrealloc(p, size);
142 #endif
144 if (p == NULL) outofmem(size);
145 return p;
148 char *
149 osstrdup(const char *str)
151 char *p;
152 OSSIZE_T len;
153 len = strlen(str) + 1;
154 p = osmalloc(len);
155 memcpy(p, str, len);
156 return p;
159 /* osfree is usually just a macro in osalloc.h */
160 #ifdef TOMBSTONES
161 void
162 osfree(void *p)
164 int true_size;
165 if (!p) return;
166 p -= TOMBSTONE_SIZE;
167 true_size = *(size_t *)p;
168 printf("osfree truep=%p truesize=%d\n", p, true_size);
169 if (memcmp(p + sizeof(size_t), tombstone + sizeof(size_t),
170 TOMBSTONE_SIZE - sizeof(size_t)) != 0) {
171 printf("start tombstone for block %p, size %d corrupted!",
172 p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
174 if (memcmp(p + true_size - TOMBSTONE_SIZE, tombstone,
175 TOMBSTONE_SIZE) != 0) {
176 printf("end tombstone for block %p, size %d corrupted!",
177 p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
179 free(p);
181 #endif
183 static int
184 default_charset(void)
186 if (getenv("SURVEX_UTF8")) return CHARSET_UTF8;
187 #if OS_WIN32
188 # ifdef AVEN
189 # define CODEPAGE GetACP()
190 # else
191 # define CODEPAGE GetConsoleOutputCP()
192 # endif
193 switch (CODEPAGE) {
194 case 0: return CHARSET_UTF8;
195 case 1252: return CHARSET_WINCP1252;
196 case 1250: return CHARSET_WINCP1250;
197 case 850: return CHARSET_DOSCP850;
199 return CHARSET_USASCII;
200 #elif OS_UNIX
201 #ifdef AVEN
202 return CHARSET_UTF8;
203 #else
204 const char *p = getenv("LC_ALL");
205 if (p == NULL || p[0] == '\0') {
206 p = getenv("LC_CTYPE");
207 if (p == NULL || p[0] == '\0') {
208 p = getenv("LANG");
209 /* Something (AutoCAD?) on Microsoft Windows sets LANG to a number. */
210 if (p == NULL || !isalpha((unsigned char)p[0]))
211 p = msg_lang;
215 if (p) {
216 const char *q = strchr(p, '.');
217 if (q) p = q + 1;
220 if (p) {
221 const char *chset = p;
222 size_t name_len;
224 while (*p != '\0' && *p != '@') p++;
226 name_len = p - chset;
228 if (name_len) {
229 int only_digit = 1;
230 size_t cnt;
232 for (cnt = 0; cnt < name_len; ++cnt)
233 if (isalpha((unsigned char)chset[cnt])) {
234 only_digit = 0;
235 break;
238 if (only_digit) goto iso;
240 switch (tolower((unsigned char)chset[0])) {
241 case 'i':
242 if (tolower((unsigned char)chset[1]) == 's' &&
243 tolower((unsigned char)chset[2]) == 'o') {
244 chset += 3;
245 iso:
246 if (strncmp(chset, "8859", 4) == 0) {
247 chset += 4;
248 while (chset < p && *chset && !isdigit((unsigned char)*chset))
249 chset++;
250 switch (atoi(chset)) {
251 case 1: return CHARSET_ISO_8859_1;
252 case 2: return CHARSET_ISO_8859_2;
253 case 15: return CHARSET_ISO_8859_15;
254 default: return CHARSET_USASCII;
258 break;
259 case 'u':
260 if (tolower((unsigned char)chset[1]) == 't' &&
261 tolower((unsigned char)chset[2]) == 'f') {
262 chset += 3;
263 while (chset < p && *chset && !isdigit((unsigned char)*chset))
264 chset++;
265 switch (atoi(chset)) {
266 case 8: return CHARSET_UTF8;
267 default: return CHARSET_USASCII;
273 return CHARSET_USASCII;
274 #endif
275 #else
276 # error Do not know operating system!
277 #endif
280 /* It seems that Swedish and maybe some other scandanavian languages don't
281 * transliterate &auml; to ae - but it seems there may be conflicting views
282 * on this...
284 #define umlaut_to_e() 1
286 /* values <= 127 already dealt with */
287 static int
288 add_unicode(int charset, unsigned char *p, int value)
290 #ifdef DEBUG
291 fprintf(stderr, "add_unicode(%d, %p, %d)\n", charset, p, value);
292 #endif
293 if (value == 0) return 0;
294 switch (charset) {
295 case CHARSET_USASCII:
296 if (value < 0x80) {
297 *p = value;
298 return 1;
300 break;
301 case CHARSET_ISO_8859_1:
302 if (value < 0x100) {
303 *p = value;
304 return 1;
306 break;
307 case CHARSET_ISO_8859_2:
308 if (value >= 0xa0) {
309 int v = 0;
310 switch (value) {
311 case 0xa0: case 0xa4: case 0xa7: case 0xa8: case 0xad: case 0xb0:
312 case 0xb4: case 0xb8: case 0xc1: case 0xc2: case 0xc4: case 0xc7:
313 case 0xc9: case 0xcb: case 0xcd: case 0xce: case 0xd3: case 0xd4:
314 case 0xd6: case 0xd7: case 0xda: case 0xdc: case 0xdd: case 0xdf:
315 case 0xe1: case 0xe2: case 0xe4: case 0xe7: case 0xe9: case 0xeb:
316 case 0xed: case 0xee: case 0xf3: case 0xf4: case 0xf6: case 0xf7:
317 case 0xfa: case 0xfc: case 0xfd:
318 v = value; break;
319 case 0x104: v = '\xa1'; break;
320 case 0x2d8: v = '\xa2'; break;
321 case 0x141: v = '\xa3'; break;
322 case 0x13d: v = '\xa5'; break;
323 case 0x15a: v = '\xa6'; break;
324 case 0x160: v = '\xa9'; break;
325 case 0x15e: v = '\xaa'; break; /* Scedil */
326 case 0x164: v = '\xab'; break;
327 case 0x179: v = '\xac'; break;
328 case 0x17d: v = '\xae'; break;
329 case 0x17b: v = '\xaf'; break;
330 case 0x105: v = '\xb1'; break;
331 case 0x2db: v = '\xb2'; break;
332 case 0x142: v = '\xb3'; break;
333 case 0x13e: v = '\xb5'; break;
334 case 0x15b: v = '\xb6'; break;
335 case 0x2c7: v = '\xb7'; break;
336 case 0x161: v = '\xb9'; break;
337 case 0x15f: v = '\xba'; break; /* scedil */
338 case 0x165: v = '\xbb'; break;
339 case 0x17a: v = '\xbc'; break;
340 case 0x2dd: v = '\xbd'; break;
341 case 0x17e: v = '\xbe'; break;
342 case 0x17c: v = '\xbf'; break;
343 case 0x154: v = '\xc0'; break;
344 case 0x102: v = '\xc3'; break;
345 case 0x139: v = '\xc5'; break;
346 case 0x106: v = '\xc6'; break;
347 case 0x10c: v = '\xc8'; break;
348 case 0x118: v = '\xca'; break;
349 case 0x11a: v = '\xcc'; break;
350 case 0x10e: v = '\xcf'; break;
351 case 0x110: v = '\xd0'; break;
352 case 0x143: v = '\xd1'; break;
353 case 0x147: v = '\xd2'; break;
354 case 0x150: v = '\xd5'; break;
355 case 0x158: v = '\xd8'; break;
356 case 0x16e: v = '\xd9'; break;
357 case 0x170: v = '\xdb'; break;
358 case 0x162: v = '\xde'; break; /* &Tcedil; */
359 case 0x155: v = '\xe0'; break;
360 case 0x103: v = '\xe3'; break;
361 case 0x13a: v = '\xe5'; break;
362 case 0x107: v = '\xe6'; break;
363 case 0x10d: v = '\xe8'; break;
364 case 0x119: v = '\xea'; break;
365 case 0x11b: v = '\xec'; break;
366 case 0x10f: v = '\xef'; break;
367 case 0x111: v = '\xf0'; break;
368 case 0x144: v = '\xf1'; break;
369 case 0x148: v = '\xf2'; break;
370 case 0x151: v = '\xf5'; break;
371 case 0x159: v = '\xf8'; break;
372 case 0x16f: v = '\xf9'; break;
373 case 0x171: v = '\xfb'; break;
374 case 0x163: v = '\xfe'; break; /* tcedil */
375 case 0x2d9: v = '\xff'; break;
377 if (v == 0) break;
378 value = v;
380 *p = value;
381 return 1;
382 case CHARSET_ISO_8859_15:
383 switch (value) {
384 case 0xa4: case 0xa6: case 0xb0: case 0xc4:
385 case 0xd0: case 0xd4: case 0xd5: case 0xd6:
386 goto donthave;
387 case 0x152: value = 0xd4; break; /* &OElig; */
388 case 0x153: value = 0xd5; break; /* &oelig; */
389 #if 0
390 case 0x0: value = 0xa4; break; /* euro */
391 #endif
392 case 0x160: value = 0xa6; break; /* Scaron */
393 case 0x161: value = 0xb0; break; /* scaron */
394 case 0x17d: value = 0xc4; break; /* Zcaron */
395 case 0x17e: value = 0xd0; break; /* zcaron */
396 #if 0
397 case 0x0: value = 0xd6; break; /* Ydiersis */
398 #endif
400 if (value < 0x100) {
401 *p = value;
402 return 1;
404 donthave:
405 break;
406 #if OS_WIN32
407 case CHARSET_WINCP1250:
408 /* MS Windows rough equivalent to ISO-8859-2 */
409 if (value >= 0x80) {
410 int v = 0;
411 /* This mapping is complete - there are 5 unused positions:
412 * 0x81 0x83 0x88 0x90 0x98 */
413 switch (value) {
414 case 0xa0: case 0xa4: case 0xa6: case 0xa7: case 0xa8: case 0xa9:
415 case 0xab: case 0xac: case 0xad: case 0xae: case 0xb0: case 0xb1:
416 case 0xb4: case 0xb5: case 0xb6: case 0xb7: case 0xb8: case 0xbb:
417 case 0xc1: case 0xc2: case 0xc4: case 0xc7: case 0xc9: case 0xcb:
418 case 0xcd: case 0xce: case 0xd3: case 0xd4: case 0xd6: case 0xd7:
419 case 0xda: case 0xdc: case 0xdd: case 0xdf: case 0xe1: case 0xe2:
420 case 0xe4: case 0xe7: case 0xe9: case 0xeb: case 0xed: case 0xee:
421 case 0xf3: case 0xf4: case 0xf6: case 0xf7: case 0xfa: case 0xfc:
422 case 0xfd:
423 v = value; break;
424 case 0x20ac: v = '\x80'; break;
425 case 0x201a: v = '\x82'; break;
426 case 0x201e: v = '\x84'; break;
427 case 0x2026: v = '\x85'; break;
428 case 0x2020: v = '\x86'; break;
429 case 0x2021: v = '\x87'; break;
430 case 0x2030: v = '\x89'; break;
431 case 0x0160: v = '\x8a'; break;
432 case 0x2039: v = '\x8b'; break;
433 case 0x015a: v = '\x8c'; break;
434 case 0x0164: v = '\x8d'; break;
435 case 0x017d: v = '\x8e'; break;
436 case 0x0179: v = '\x8f'; break;
437 case 0x2018: v = '\x91'; break;
438 case 0x2019: v = '\x92'; break;
439 case 0x201c: v = '\x93'; break;
440 case 0x201d: v = '\x94'; break;
441 case 0x2022: v = '\x95'; break;
442 case 0x2013: v = '\x96'; break;
443 case 0x2014: v = '\x97'; break;
444 case 0x2122: v = '\x99'; break;
445 case 0x0161: v = '\x9a'; break;
446 case 0x203a: v = '\x9b'; break;
447 case 0x015b: v = '\x9c'; break;
448 case 0x0165: v = '\x9d'; break;
449 case 0x017e: v = '\x9e'; break;
450 case 0x017a: v = '\x9f'; break;
451 case 0x02c7: v = '\xa1'; break;
452 case 0x02d8: v = '\xa2'; break;
453 case 0x0141: v = '\xa3'; break;
454 case 0x0104: v = '\xa5'; break;
455 case 0x015e: v = '\xaa'; break; /* Scedil */
456 case 0x017b: v = '\xaf'; break;
457 case 0x02db: v = '\xb2'; break;
458 case 0x0142: v = '\xb3'; break;
459 case 0x0105: v = '\xb9'; break;
460 case 0x015f: v = '\xba'; break; /* scedil */
461 case 0x013d: v = '\xbc'; break;
462 case 0x02dd: v = '\xbd'; break;
463 case 0x013e: v = '\xbe'; break;
464 case 0x017c: v = '\xbf'; break;
465 case 0x0154: v = '\xc0'; break;
466 case 0x0102: v = '\xc3'; break;
467 case 0x0139: v = '\xc5'; break;
468 case 0x0106: v = '\xc6'; break;
469 case 0x010c: v = '\xc8'; break;
470 case 0x0118: v = '\xca'; break;
471 case 0x011a: v = '\xcc'; break;
472 case 0x010e: v = '\xcf'; break;
473 case 0x0110: v = '\xd0'; break;
474 case 0x0143: v = '\xd1'; break;
475 case 0x0147: v = '\xd2'; break;
476 case 0x0150: v = '\xd5'; break;
477 case 0x0158: v = '\xd8'; break;
478 case 0x016e: v = '\xd9'; break;
479 case 0x0170: v = '\xdb'; break;
480 case 0x0162: v = '\xde'; break; /* &Tcedil; */
481 case 0x0155: v = '\xe0'; break;
482 case 0x0103: v = '\xe3'; break;
483 case 0x013a: v = '\xe5'; break;
484 case 0x0107: v = '\xe6'; break;
485 case 0x010d: v = '\xe8'; break;
486 case 0x0119: v = '\xea'; break;
487 case 0x011b: v = '\xec'; break;
488 case 0x010f: v = '\xef'; break;
489 case 0x0111: v = '\xf0'; break;
490 case 0x0144: v = '\xf1'; break;
491 case 0x0148: v = '\xf2'; break;
492 case 0x0151: v = '\xf5'; break;
493 case 0x0159: v = '\xf8'; break;
494 case 0x016f: v = '\xf9'; break;
495 case 0x0171: v = '\xfb'; break;
496 case 0x0163: v = '\xfe'; break; /* tcedil */
497 case 0x02d9: v = '\xff'; break;
499 if (v == 0) break;
500 value = v;
502 *p = value;
503 return 1;
504 case CHARSET_WINCP1252:
505 /* MS Windows extensions to ISO-8859-1 */
506 /* This mapping is complete - there are 5 unused positions:
507 * 0x81 0x8d 0x8f 0x90 0x9d */
508 switch (value) {
509 case 0x2026: value = 0x85; break; /* hellip */
510 case 0x0160: value = 0x8a; break; /* Scaron */
511 case 0x0152: value = 0x8c; break; /* OElig */
512 case 0x017d: value = 0x8e; break; /* Zcaron */
513 case 0x2019: value = 0x92; break; /* rsquo */
514 case 0x201c: value = 0x93; break; /* ldquo */
515 case 0x201d: value = 0x94; break; /* rdquo */
516 case 0x0161: value = 0x9a; break; /* scaron */
517 case 0x0153: value = 0x9c; break; /* oelig */
518 case 0x017e: value = 0x9e; break; /* zcaron */
519 #if 0
520 /* there are a few other obscure ones we don't currently need */
521 case 0x20ac: value = 0x80; break; /* euro */
522 case 0x201a: value = 0x82; break; /* sbquo */
523 case 0x0192: value = 0x83; break; /* fnof */
524 case 0x201e: value = 0x84; break; /* bdquo */
525 case 0x2020: value = 0x86; break; /* dagger */
526 case 0x2021: value = 0x87; break; /* Dagger */
527 case 0x02c6: value = 0x88; break; /* circ */
528 case 0x2030: value = 0x89; break; /* permil */
529 case 0x2039: value = 0x8b; break; /* lsaquo */
530 case 0x2018: value = 0x91; break; /* lsquo */
531 case 0x2022: value = 0x95; break; /* bull */
532 case 0x2013: value = 0x96; break; /* ndash */
533 case 0x2014: value = 0x97; break; /* mdash */
534 case 0x02dc: value = 0x98; break; /* tilde */
535 case 0x2122: value = 0x99; break; /* trade */
536 case 0x203a: value = 0x9b; break; /* rsaquo */
537 case 0x0178: value = 0x9f; break; /* Yuml */
538 #endif
540 if (value < 0x100) {
541 *p = value;
542 return 1;
544 break;
545 #endif
546 #if OS_WIN32
547 case CHARSET_DOSCP850: {
548 static const unsigned char uni2dostab[] = {
549 255, 173, 189, 156, 207, 190, 221, 245,
550 249, 184, 166, 174, 170, 240, 169, 238,
551 248, 241, 253, 252, 239, 230, 244, 250,
552 247, 251, 167, 175, 172, 171, 243, 168,
553 183, 181, 182, 199, 142, 143, 146, 128,
554 212, 144, 210, 211, 222, 214, 215, 216,
555 209, 165, 227, 224, 226, 229, 153, 158,
556 157, 235, 233, 234, 154, 237, 232, 225,
557 133, 160, 131, 198, 132, 134, 145, 135,
558 138, 130, 136, 137, 141, 161, 140, 139,
559 208, 164, 149, 162, 147, 228, 148, 246,
560 155, 151, 163, 150, 129, 236, 231, 152
562 if (value >= 160 && value < 256) {
563 *p = (int)uni2dostab[value - 160];
564 return 1;
566 #if 0
567 if (value == 305) { /* LATIN SMALL LETTER DOTLESS I */
568 *p = 213; /* "Modified CP850" has the Euro sign here. */
569 return 1;
571 if (value == 402) { /* LATIN SMALL LETTER F WITH HOOK */
572 *p = 159;
573 return 1;
575 #endif
576 break;
578 #endif
580 /* Transliterate characters we can't represent */
581 #ifdef DEBUG
582 fprintf(stderr, "transliterate “%c” 0x%x\n", value, value);
583 #endif
584 switch (value) {
585 case 160:
586 *p = ' '; return 1;
587 case 161 /* ¡ */:
588 *p = '!'; return 1;
589 case 176 /* ° */:
590 *p = 'd'; p[1] = 'g'; return 2;
591 case 191 /* ¿ */:
592 *p = '?'; return 1;
593 case 192 /* À */: case 193 /* Á */: case 194 /* Â */: case 195 /* Ã */:
594 *p = 'A'; return 1;
595 case 197 /* Å */:
596 p[1] = *p = 'A'; return 2;
597 case 196 /* Ä */: /* &Auml; */
598 *p = 'A';
599 if (!umlaut_to_e()) return 1;
600 p[1] = 'E'; return 2;
601 case 198 /* Æ */:
602 *p = 'A'; p[1] = 'E'; return 2;
603 case 199 /* Ç */: case 268: /* &Ccaron; */
604 *p = 'C'; return 1;
605 case 270: /* &Dcaron; */
606 *p = 'D'; return 1;
607 case 200 /* È */: case 201 /* É */: case 202 /* Ê */: case 203 /* Ë */:
608 *p = 'E'; return 1;
609 case 204 /* Ì */: case 205 /* Í */: case 206 /* Î */: case 207 /* Ï */:
610 *p = 'I'; return 1;
611 case 208 /* Ð */: case 222 /* Þ */:
612 *p = 'T'; p[1] = 'H'; return 2;
613 case 315: /* &Lacute; */
614 case 317: /* &Lcaron; */
615 *p = 'L'; return 1;
616 case 209 /* Ñ */:
617 *p = 'N'; return 1;
618 case 210 /* Ò */: case 211 /* Ó */: case 212 /* Ô */: case 213 /* Õ */:
619 *p = 'O'; return 1;
620 case 214 /* Ö */: /* &Ouml; */ case 0x152: /* &OElig; */
621 *p = 'O'; p[1] = 'E'; return 2;
622 case 352: /* &Scaron; */
623 case 0x15e: /* &Scedil; */
624 *p = 'S'; return 1;
625 case 0x162: /* &Tcedil; */
626 case 0x164: /* &Tcaron; */
627 *p = 'T'; return 1;
628 case 217 /* Ù */: case 218 /* Ú */: case 219 /* Û */:
629 *p = 'U'; return 1;
630 case 220 /* Ü */: /* &Uuml; */
631 *p = 'U'; p[1] = 'E'; return 2;
632 case 221 /* Ý */:
633 *p = 'Y'; return 1;
634 case 381: /* &Zcaron; */
635 *p = 'Z'; return 1;
636 case 223 /* ß */:
637 p[1] = *p = 's'; return 2;
638 case 224 /* à */: case 225 /* á */: case 226 /* â */: case 227 /* ã */:
639 case 259: /* &abreve; */
640 *p = 'a'; return 1;
641 case 228 /* ä */: /* &auml; */ case 230 /* æ */:
642 *p = 'a'; p[1] = 'e'; return 2;
643 case 229 /* å */:
644 p[1] = *p = 'a'; return 2;
645 case 231 /* ç */: case 269 /* &ccaron; */:
646 *p = 'c'; return 1;
647 case 271: /* &dcaron; */
648 *p = 'd'; return 1;
649 case 232 /* è */: case 233 /* é */: case 234 /* ê */: case 235 /* ë */:
650 case 283 /* &ecaron; */:
651 *p = 'e'; return 1;
652 case 236 /* ì */: case 237 /* í */: case 238 /* î */: case 239 /* ï */:
653 *p = 'i'; return 1;
654 case 316 /* &lacute; */:
655 case 318 /* &lcaron; */:
656 *p = 'l'; return 1;
657 case 241 /* ñ */: case 328 /* &ncaron; */:
658 *p = 'n'; return 1;
659 case 345: /* &rcaron; */
660 *p = 'r'; return 1;
661 case 353: /* &scaron; */
662 case 0x15f: /* &scedil; */
663 *p = 's'; return 1;
664 case 357: /* &tcaron; */
665 case 0x163: /* &tcedil; */
666 *p = 't'; return 1;
667 case 240 /* ð */: case 254 /* þ */:
668 *p = 't'; p[1] = 'h'; return 2;
669 case 242 /* ò */: case 243 /* ó */: case 244 /* ô */: case 245 /* õ */:
670 *p = 'o'; return 1;
671 case 246 /* ö */: /* &ouml; */ case 0x153: /* &oelig; */
672 *p = 'o'; p[1] = 'e'; return 2;
673 case 249 /* ù */: case 250 /* ú */: case 251 /* û */:
674 case 367 /* &uring; */:
675 *p = 'u'; return 1;
676 case 252 /* ü */: /* &uuml; */
677 *p = 'u'; p[1] = 'e'; return 2;
678 case 253 /* ý */: case 255 /* ÿ */:
679 *p = 'y'; return 1;
680 case 382: /* &zcaron; */
681 *p = 'z'; return 1;
682 case 0x2019: /* &lsquo; */
683 *p = '\''; return 1;
684 case 171: /* « */ case 187: /* » */
685 case 0x201c: /* &ldquo; */ case 0x201d: /* &rdquo; */
686 *p = '"'; return 1;
687 case 0x2026: /* &hellip; */
688 *p = '.'; p[1] = '.'; p[2] = '.'; return 3;
689 case 0x2192: /* &rarr; */
690 *p = '-'; p[1] = '>'; return 2;
691 case 0x1d4d: /* gradient symbol */
692 *p = 'g'; p[1] = 'r'; p[2] = 'd'; return 3;
693 case 0x221e: /* infinity symbol */
694 *p = 'i'; p[1] = 'n'; p[2] = 'f'; return 3;
696 #ifdef DEBUG
697 /* 169 is reported (copyright symbol), but there isn't a good <= 2 ASCII
698 * character transliteration for that, so we handle that elsewhere. */
699 fprintf(stderr, "failed to transliterate codepoint %d\n", value);
700 #endif
701 return 0;
704 #if OS_UNIX && defined DATADIR && defined PACKAGE
705 /* Under Unix, we compile in the configured path */
706 static const char *pth_cfg_files = DATADIR "/" PACKAGE;
707 #else
708 /* On other platforms, we fall back on looking in the current directory */
709 static const char *pth_cfg_files = "";
710 #endif
712 static int num_msgs = 0;
713 static char **msg_array = NULL;
715 static bool msg_lang_explicit = false;
716 const char *msg_lang = NULL;
717 const char *msg_lang2 = NULL;
719 static char **
720 parse_msgs(int n, unsigned char *p, int charset_code) {
721 int i;
723 char **msgs = osmalloc(n * sizeof(char *));
725 for (i = 0; i < n; i++) {
726 unsigned char *to = p;
727 int ch;
728 msgs[i] = (char *)p;
730 /* If we want UTF8 anyway, we just need to find the start of each
731 * message */
732 if (charset_code == CHARSET_UTF8) {
733 p += strlen((char *)p) + 1;
734 continue;
737 while ((ch = *p++) != 0) {
738 /* A byte in the range 0x80-0xbf or 0xf0-0xff isn't valid in
739 * this state, (0xf0-0xfd mean values > 0xffff) so treat as
740 * literal and try to resync so we cope better when fed
741 * non-utf-8 data. Similarly we abandon a multibyte sequence
742 * if we hit an invalid character. */
743 if (ch >= 0xc0 && ch < 0xf0) {
744 int ch1 = *p;
745 if ((ch1 & 0xc0) != 0x80) goto resync;
747 if (ch < 0xe0) {
748 /* 2 byte sequence */
749 ch = ((ch & 0x1f) << 6) | (ch1 & 0x3f);
750 p++;
751 } else {
752 /* 3 byte sequence */
753 int ch2 = p[1];
754 if ((ch2 & 0xc0) != 0x80) goto resync;
755 ch = ((ch & 0x1f) << 12) | ((ch1 & 0x3f) << 6) | (ch2 & 0x3f);
756 p += 2;
760 resync:
762 if (ch < 127) {
763 *to++ = (char)ch;
764 } else {
765 /* We assume an N byte UTF-8 code never transliterates to more
766 * than N characters (so we can't transliterate © to (C) or
767 * ® to (R) for example) */
768 to += add_unicode(charset_code, to, ch);
771 *to++ = '\0';
773 return msgs;
776 /* This is the name of the default language, which can be set like so:
777 * ./configure --enable-defaultlang=fr
779 #ifdef DEFAULTLANG
780 /* No point extracting these errors as they won't get used if file opens */
781 # include "../lib/defaultlang.h"
782 #else
783 #define N_DONTEXTRACTMSGS 5
784 static unsigned char dontextractmsgs[] =
785 "Can't open message file \"%s\" using path \"%s\"\0"/*1000*/
786 "Problem with message file \"%s\"\0"/*1001*/
787 "I don't understand this message file version\0"/*1002*/
788 "Message file truncated?\0"/*1003*/
789 "Out of memory (couldn't find %lu bytes).\0"/*1004*/;
790 #endif
792 static char **dontextract = NULL;
794 static void
795 parse_msg_file(int charset_code)
797 FILE *fh;
798 unsigned char header[20];
799 int i;
800 unsigned len;
801 unsigned char *p;
802 char *fnm, *s;
803 int n;
805 #ifdef DEBUG
806 fprintf(stderr, "parse_msg_file(%d)\n", charset_code);
807 #endif
809 /* sort out messages we need to print if we can't open the message file */
810 dontextract = parse_msgs(N_DONTEXTRACTMSGS, dontextractmsgs, charset_code);
812 fnm = osstrdup(msg_lang);
813 /* trim off charset from stuff like "de_DE.iso8859_1" */
814 s = strchr(fnm, '.');
815 if (s) *s = '\0';
816 /* trim off any "@<something>" modifier. */
817 s = strchr(fnm, '@');
818 if (s) *s = '\0';
820 fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
822 if (!fh && strlen(fnm) > 3 && fnm[2] == '_') {
823 /* e.g. if 'en_GB' is unknown, see if we know 'en' */
824 fnm[2] = '\0';
825 fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
826 if (!fh) fnm[2] = '_'; /* for error reporting */
829 if (!fh && !msg_lang_explicit) {
830 /* If msg_lang wasn't specified using environment variable SURVEXLANG,
831 * then default to 'en' if we don't find messages for language msg_lang.
833 if (fnm[0] && fnm[1]) {
834 strcpy(fnm, "en");
835 } else {
836 osfree(fnm);
837 fnm = osstrdup("en");
839 fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
842 if (!fh) {
843 fatalerror(/*Can't open message file “%s” using path “%s”*/1000,
844 fnm, pth_cfg_files);
847 if (fread(header, 1, 20, fh) < 20 ||
848 memcmp(header, "Svx\nMsg\r\n\xfe\xff", 12) != 0) {
849 fatalerror(/*Problem with message file “%s”*/1001, fnm);
852 if (header[12] != 0)
853 fatalerror(/*I don't understand this message file version*/1002);
855 n = (header[14] << 8) | header[15];
857 len = 0;
858 for (i = 16; i < 20; i++) len = (len << 8) | header[i];
860 p = osmalloc(len);
861 if (fread(p, 1, len, fh) < len)
862 fatalerror(/*Message file truncated?*/1003);
864 fclose(fh);
866 #ifdef DEBUG
867 fprintf(stderr, "fnm = “%s”, n = %d, len = %d\n", fnm, n, len);
868 #endif
869 osfree(fnm);
871 msg_array = parse_msgs(n, p, charset_code);
872 num_msgs = n;
875 const char *
876 msg_cfgpth(void)
878 return pth_cfg_files;
881 const char *
882 msg_exepth(void)
884 return exe_pth;
887 const char *
888 msg_appname(void)
890 return appname_copy;
893 void
894 (msg_init)(char * const *argv)
896 char *p;
897 SVX_ASSERT(argv);
899 /* Point to argv[0] itself so we report a more helpful error if the
900 * code to work out the clean appname generates a signal */
901 appname_copy = argv[0];
902 #if OS_UNIX
903 /* use name as-is on Unix - programs run from path get name as supplied */
904 appname_copy = osstrdup(argv[0]);
905 #else
906 /* use the lower-cased leafname on other platforms */
907 p = leaf_from_fnm(argv[0]);
908 appname_copy = p;
909 while (*p) {
910 *p = tolower((unsigned char)*p);
911 ++p;
913 #endif
915 /* shortcut --version so you can check the version number even when the
916 * correct message file can't be found... */
917 if (argv[1] && strcmp(argv[1], "--version") == 0) {
918 cmdline_version();
919 exit(0);
921 if (argv[0]) {
922 exe_pth = path_from_fnm(argv[0]);
923 #if OS_UNIX && defined DATADIR && defined PACKAGE
924 bool free_pth = false;
925 char *pth = getenv("srcdir");
926 if (!pth || !pth[0]) {
927 pth = path_from_fnm(argv[0]);
928 free_pth = true;
930 if (pth[0]) {
931 struct stat buf;
932 #if OS_UNIX_MACOS
933 # ifndef AVEN
934 /* On macOS the programs may be installed anywhere, with the
935 * share directory and the binaries in the same directory. */
936 p = use_path(pth, "share/survex/en.msg");
937 if (stat(p, &buf) == 0 && S_ISREG(buf.st_mode)) {
938 pth_cfg_files = use_path(pth, "share/survex");
939 goto macos_got_msg;
941 osfree(p);
942 # endif
943 /* In the diskimage package, this case is used for aven, and for
944 * the hardlinked copies of cavern and extend alongside the aven
945 * binary, which are the ones which aven runs.
947 p = use_path(pth, "../Resources/en.msg");
948 if (stat(p, &buf) == 0 && S_ISREG(buf.st_mode)) {
949 pth_cfg_files = use_path(pth, "../Resources");
950 goto macos_got_msg;
952 osfree(p);
953 #endif
954 /* If we're run with an explicit path, check if "../lib/en.msg"
955 * from the program's path exists, and if so look there for
956 * support files - this allows us to test binaries in the build
957 * tree easily. */
958 p = use_path(pth, "../lib/en.msg");
959 if (stat(p, &buf) == 0) {
960 #ifdef S_ISREG
961 /* POSIX way */
962 if (S_ISREG(buf.st_mode)) {
963 pth_cfg_files = use_path(pth, "../lib");
965 #else
966 /* BSD way */
967 if ((buf.st_mode & S_IFMT) == S_IFREG) {
968 pth_cfg_files = use_path(pth, "../lib");
970 #endif
972 #if defined(__GNUC__) && defined(__APPLE_CC__)
973 macos_got_msg:
974 #endif
975 osfree(p);
978 if (free_pth) osfree(pth);
979 #elif OS_WIN32
980 DWORD len = 256;
981 char *buf = NULL, *modname;
982 while (1) {
983 DWORD got;
984 buf = osrealloc(buf, len);
985 got = GetModuleFileName(NULL, buf, len);
986 if (got < len) break;
987 len += len;
989 modname = buf;
990 /* Strange Win32 nastiness - strip prefix "\\?\" if present */
991 if (strncmp(modname, "\\\\?\\", 4) == 0) modname += 4;
992 pth_cfg_files = path_from_fnm(modname);
993 osfree(buf);
994 #else
995 /* Get the path to the support files from argv[0] */
996 pth_cfg_files = path_from_fnm(argv[0]);
997 #endif
1000 msg_lang = getenv("SURVEXLANG");
1001 #ifdef DEBUG
1002 fprintf(stderr, "msg_lang = %p (= \"%s\")\n", msg_lang, msg_lang?msg_lang:"(null)");
1003 #endif
1005 msg_lang_explicit = true;
1006 if (!msg_lang || !*msg_lang) {
1007 msg_lang_explicit = false;
1008 msg_lang = getenv("LC_ALL");
1010 if (!msg_lang || !*msg_lang) {
1011 msg_lang = getenv("LC_MESSAGES");
1012 if (!msg_lang || !*msg_lang) {
1013 msg_lang = getenv("LANG");
1014 /* Something (AutoCAD?) on Microsoft Windows sets LANG to a number. */
1015 if (msg_lang && !isalpha(msg_lang[0])) msg_lang = NULL;
1017 if (!msg_lang || !*msg_lang) {
1018 #if OS_WIN32
1019 LCID locid;
1020 #endif
1021 #ifdef DEFAULTLANG
1022 msg_lang = STRING(DEFAULTLANG);
1023 #else
1024 msg_lang = "en";
1025 #endif
1026 #if OS_WIN32
1027 /* GetUserDefaultUILanguage() requires Microsoft Windows 2000 or
1028 * newer, but we don't support anything earlier than Vista.
1030 locid = GetUserDefaultUILanguage();
1031 if (locid) {
1032 WORD langid = LANGIDFROMLCID(locid);
1033 switch (PRIMARYLANGID(langid)) {
1034 case LANG_BULGARIAN:
1035 msg_lang = "bg";
1036 break;
1037 /* older mingw compilers don't seem to supply this value */
1038 #ifndef LANG_CATALAN
1039 # define LANG_CATALAN 0x03
1040 #endif
1041 case LANG_CATALAN:
1042 msg_lang = "ca";
1043 break;
1044 case LANG_CHINESE:
1045 msg_lang = "zh_CN";
1046 break;
1047 case LANG_ENGLISH:
1048 if (SUBLANGID(langid) == SUBLANG_ENGLISH_US)
1049 msg_lang = "en_US";
1050 else
1051 msg_lang = "en";
1052 break;
1053 case LANG_FRENCH:
1054 msg_lang = "fr";
1055 break;
1056 case LANG_GERMAN:
1057 switch (SUBLANGID(langid)) {
1058 case SUBLANG_GERMAN_SWISS:
1059 msg_lang = "de_CH";
1060 break;
1061 default:
1062 msg_lang = "de";
1064 break;
1065 case LANG_GREEK:
1066 msg_lang = "el";
1067 break;
1068 case LANG_HUNGARIAN:
1069 msg_lang = "hu";
1070 break;
1071 case LANG_INDONESIAN:
1072 msg_lang = "id";
1073 break;
1074 case LANG_ITALIAN:
1075 msg_lang = "it";
1076 break;
1077 case LANG_POLISH:
1078 msg_lang = "pl";
1079 break;
1080 case LANG_PORTUGUESE:
1081 if (SUBLANGID(langid) == SUBLANG_PORTUGUESE_BRAZILIAN)
1082 msg_lang = "pt_BR";
1083 else
1084 msg_lang = "pt";
1085 break;
1086 case LANG_ROMANIAN:
1087 msg_lang = "ro";
1088 break;
1089 case LANG_RUSSIAN:
1090 msg_lang = "ru";
1091 break;
1092 case LANG_SLOVAK:
1093 msg_lang = "sk";
1094 break;
1095 case LANG_SPANISH:
1096 msg_lang = "es";
1097 break;
1100 #endif
1103 #ifdef DEBUG
1104 fprintf(stderr, "msg_lang = %p (= \"%s\")\n", msg_lang, msg_lang?msg_lang:"(null)");
1105 #endif
1107 /* On Mandrake LANG defaults to C */
1108 if (strcmp(msg_lang, "C") == 0) msg_lang = "en";
1110 { /* If msg_lang has a country code, snip it out to give msg_lang2. */
1111 size_t b = 0;
1112 while (isalpha((unsigned char)msg_lang[b])) {
1113 ++b;
1115 if (msg_lang[b] == '_') {
1116 char * tmp;
1117 size_t e = b + 1;
1118 while (isalpha((unsigned char)msg_lang[e])) {
1119 ++e;
1121 tmp = osstrdup(msg_lang);
1122 memmove(tmp + b, tmp + e, strlen(tmp + e) + 1);
1123 msg_lang2 = tmp;
1127 #ifdef LC_MESSAGES
1128 /* try to setlocale() appropriately too */
1129 if (!setlocale(LC_MESSAGES, msg_lang)) {
1130 if (msg_lang2) {
1131 (void)setlocale(LC_MESSAGES, msg_lang2);
1134 #endif
1136 select_charset(default_charset());
1139 #ifndef AVEN
1140 /* Return message if messages available, else a fallback value. */
1141 static const char *
1142 msg_opt(int en, const char * fallback)
1144 /* NB can't use SVX_ASSERT here! */
1145 if (!msg_array || en <= 0 || en >= num_msgs) {
1146 return fallback;
1149 return msg_array[en];
1151 #endif
1153 const char *
1154 msg(int en)
1156 /* NB can't use SVX_ASSERT here! */
1157 if (dontextract && en >= 1000 && en < 1000 + N_DONTEXTRACTMSGS)
1158 return dontextract[en - 1000];
1159 if (!msg_array) {
1160 if (en != 1) {
1161 fprintf(STDERR, "Message %d requested before fully initialised\n", en);
1162 return "Message requested before fully initialised\n";
1164 /* this should be the only other message which can be requested before
1165 * the message file is opened and read... */
1166 if (!dontextract) return "Out of memory (couldn't find %lu bytes).";
1167 return dontextract[(/*Out of memory (couldn't find %lu bytes).*/1004)
1168 - 1000];
1171 if (en < 0 || en >= num_msgs) {
1172 fprintf(STDERR, "Message %d out of range\n", en);
1173 return "Message out of range\n";
1176 if (en == 0) {
1177 const char *p = msg_array[0];
1178 if (!*p) p = "(C)";
1179 return p;
1182 return msg_array[en];
1185 void
1186 v_report(int severity, const char *fnm, int line, int col, int en, va_list ap)
1188 #ifdef AVEN
1189 (void)col;
1190 aven_v_report(severity, fnm, line, en, ap);
1191 #else
1192 const char * level;
1193 if (fnm) {
1194 fputs(fnm, STDERR);
1195 if (line) fprintf(STDERR, ":%d", line);
1196 if (col > 0) fprintf(STDERR, ":%d", col);
1197 } else {
1198 fputs(appname_copy, STDERR);
1200 fputs(": ", STDERR);
1202 switch (severity) {
1203 case DIAG_INFO:
1204 /* TRANSLATORS: Indicates a informational message e.g.:
1205 * "spoon.svx:12: info: Declination: [...]" */
1206 level = msg_opt(/*info*/485, "info");
1207 break;
1208 case DIAG_WARN:
1209 /* TRANSLATORS: Indicates a warning message e.g.:
1210 * "spoon.svx:12: warning: *prefix is deprecated" */
1211 level = msg_opt(/*warning*/4, "warning");
1212 break;
1213 default:
1214 /* TRANSLATORS: Indicates an error message e.g.:
1215 * "spoon.svx:13:4: error: Field may not be omitted" */
1216 level = msg_opt(/*error*/93, "error");
1217 break;
1219 fputs(level, STDERR);
1220 fputs(": ", STDERR);
1222 vfprintf(STDERR, msg(en), ap);
1223 fputnl(STDERR);
1224 #endif
1226 switch (severity) {
1227 case DIAG_WARN:
1228 msg_warnings++;
1229 break;
1230 case DIAG_ERR:
1231 msg_errors++;
1232 if (msg_errors == 50)
1233 fatalerror_in_file(fnm, 0, /*Too many errors - giving up*/19);
1234 break;
1235 case DIAG_FATAL:
1236 exit(EXIT_FAILURE);
1240 void
1241 diag(int severity, int en, ...)
1243 va_list ap;
1244 va_start(ap, en);
1245 v_report(severity, NULL, 0, 0, en, ap);
1246 va_end(ap);
1249 void
1250 diag_in_file(int severity, const char *fnm, int line, int en, ...)
1252 va_list ap;
1253 va_start(ap, en);
1254 v_report(severity, fnm, line, 0, en, ap);
1255 va_end(ap);
1258 /* Code to support switching character set at runtime (e.g. for a printer
1259 * driver to support different character sets on screen and on the printer)
1261 typedef struct charset_li {
1262 struct charset_li *next;
1263 int code;
1264 char **msg_array;
1265 } charset_li;
1267 static charset_li *charset_head = NULL;
1269 static int charset = CHARSET_BAD;
1272 select_charset(int charset_code)
1274 int old_charset = charset;
1275 charset_li *p;
1277 #ifdef DEBUG
1278 fprintf(stderr, "select_charset(%d), old charset = %d\n", charset_code,
1279 charset);
1280 #endif
1282 charset = charset_code;
1284 /* check if we've already parsed messages for new charset */
1285 for (p = charset_head; p; p = p->next) {
1286 #ifdef DEBUG
1287 printf("%p: code %d msg_array %p\n", p, p->code, p->msg_array);
1288 #endif
1289 if (p->code == charset) {
1290 msg_array = p->msg_array;
1291 return old_charset;
1295 /* nope, got to reparse message file */
1296 parse_msg_file(charset_code);
1298 /* add to list */
1299 p = osnew(charset_li);
1300 p->code = charset;
1301 p->msg_array = msg_array;
1302 p->next = charset_head;
1303 charset_head = p;
1305 return old_charset;