Show location for backsights out of tolerance
[survex.git] / src / message.c
blob7a90a5965610f2860f2fc361d48d0a37790990b3
1 /* message.c
2 * Fairly general purpose message and error routines
3 * Copyright (C) 1993-2022 Olly Betts
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 /*#define DEBUG 1*/
22 #ifdef HAVE_CONFIG_H
23 # include <config.h>
24 #endif
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <limits.h>
31 #include <errno.h>
32 #include <locale.h>
34 #include "cmdline.h"
35 #include "whichos.h"
36 #include "filename.h"
37 #include "message.h"
38 #include "filelist.h"
39 #include "debug.h"
40 #include "str.h"
42 #ifdef AVEN
43 # include "aven.h"
44 #endif
46 #if OS_WIN32
47 # define WIN32_LEAN_AND_MEAN
48 # include <windows.h>
49 #elif OS_UNIX
50 # include <sys/types.h>
51 #endif
53 #include <sys/stat.h>
55 /* For funcs which want to be immune from messing around with different
56 * calling conventions */
57 #ifndef CDECL
58 # define CDECL
59 #endif
61 int msg_warnings = 0; /* keep track of how many warnings we've given */
62 int msg_errors = 0; /* and how many (non-fatal) errors */
64 /* in case osmalloc() fails before appname_copy is set up */
65 static const char *appname_copy = "anonymous program";
67 /* Path to use to look for executables (used by aven to find cavern). */
68 static const char *exe_pth = "";
70 /* error code for failed osmalloc and osrealloc calls */
71 static void
72 outofmem(OSSIZE_T size)
74 /* TRANSLATORS: "%lu" is a placeholder for the number of bytes which Survex
75 * was trying to allocate space for. */
76 fatalerror(/*Out of memory (couldn’t find %lu bytes).*/1,
77 (unsigned long)size);
80 #ifdef TOMBSTONES
81 #define TOMBSTONE_SIZE 16
82 static const char tombstone[TOMBSTONE_SIZE] = "012345\xfftombstone";
83 #endif
85 /* malloc with error catching if it fails. Also allows us to write special
86 * versions easily eg for MS Windows.
88 void *
89 osmalloc(OSSIZE_T size)
91 void *p;
92 #ifdef TOMBSTONES
93 size += TOMBSTONE_SIZE * 2;
94 p = malloc(size);
95 #else
96 p = xosmalloc(size);
97 #endif
98 if (p == NULL) outofmem(size);
99 #ifdef TOMBSTONES
100 printf("osmalloc truep=%p truesize=%d\n", p, size);
101 memcpy(p, tombstone, TOMBSTONE_SIZE);
102 memcpy(p + size - TOMBSTONE_SIZE, tombstone, TOMBSTONE_SIZE);
103 *(size_t *)p = size;
104 p += TOMBSTONE_SIZE;
105 #endif
106 return p;
109 /* realloc with error catching if it fails. */
110 void *
111 osrealloc(void *p, OSSIZE_T size)
113 /* some pre-ANSI realloc implementations don't cope with a NULL pointer */
114 if (p == NULL) {
115 p = xosmalloc(size);
116 } else {
117 #ifdef TOMBSTONES
118 int true_size;
119 size += TOMBSTONE_SIZE * 2;
120 p -= TOMBSTONE_SIZE;
121 true_size = *(size_t *)p;
122 printf("osrealloc (in truep=%p truesize=%d)\n", p, true_size);
123 if (memcmp(p + sizeof(size_t), tombstone + sizeof(size_t),
124 TOMBSTONE_SIZE - sizeof(size_t)) != 0) {
125 printf("start tombstone for block %p, size %d corrupted!",
126 p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
128 if (memcmp(p + true_size - TOMBSTONE_SIZE, tombstone,
129 TOMBSTONE_SIZE) != 0) {
130 printf("end tombstone for block %p, size %d corrupted!",
131 p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
133 p = realloc(p, size);
134 if (p == NULL) outofmem(size);
135 printf("osrealloc truep=%p truesize=%d\n", p, size);
136 memcpy(p, tombstone, TOMBSTONE_SIZE);
137 memcpy(p + size - TOMBSTONE_SIZE, tombstone, TOMBSTONE_SIZE);
138 *(size_t *)p = size;
139 p += TOMBSTONE_SIZE;
140 #else
141 p = xosrealloc(p, size);
142 #endif
144 if (p == NULL) outofmem(size);
145 return p;
148 char *
149 osstrdup(const char *str)
151 char *p;
152 OSSIZE_T len;
153 len = strlen(str) + 1;
154 p = osmalloc(len);
155 memcpy(p, str, len);
156 return p;
159 /* osfree is usually just a macro in osalloc.h */
160 #ifdef TOMBSTONES
161 void
162 osfree(void *p)
164 int true_size;
165 if (!p) return;
166 p -= TOMBSTONE_SIZE;
167 true_size = *(size_t *)p;
168 printf("osfree truep=%p truesize=%d\n", p, true_size);
169 if (memcmp(p + sizeof(size_t), tombstone + sizeof(size_t),
170 TOMBSTONE_SIZE - sizeof(size_t)) != 0) {
171 printf("start tombstone for block %p, size %d corrupted!",
172 p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
174 if (memcmp(p + true_size - TOMBSTONE_SIZE, tombstone,
175 TOMBSTONE_SIZE) != 0) {
176 printf("end tombstone for block %p, size %d corrupted!",
177 p + TOMBSTONE_SIZE, true_size - TOMBSTONE_SIZE * 2);
179 free(p);
181 #endif
183 static int
184 default_charset(void)
186 if (getenv("SURVEX_UTF8")) return CHARSET_UTF8;
187 #if OS_WIN32
188 # ifdef AVEN
189 # define CODEPAGE GetACP()
190 # else
191 # define CODEPAGE GetConsoleOutputCP()
192 # endif
193 switch (CODEPAGE) {
194 case 0: return CHARSET_UTF8;
195 case 1252: return CHARSET_WINCP1252;
196 case 1250: return CHARSET_WINCP1250;
197 case 850: return CHARSET_DOSCP850;
199 return CHARSET_USASCII;
200 #elif OS_UNIX
201 #ifdef AVEN
202 return CHARSET_UTF8;
203 #else
204 const char *p = getenv("LC_ALL");
205 if (p == NULL || p[0] == '\0') {
206 p = getenv("LC_CTYPE");
207 if (p == NULL || p[0] == '\0') {
208 p = getenv("LANG");
209 /* Something (AutoCAD?) on Microsoft Windows sets LANG to a number. */
210 if (p == NULL || !isalpha((unsigned char)p[0]))
211 p = msg_lang;
215 if (p) {
216 const char *q = strchr(p, '.');
217 if (q) p = q + 1;
220 if (p) {
221 const char *chset = p;
222 size_t name_len;
224 while (*p != '\0' && *p != '@') p++;
226 name_len = p - chset;
228 if (name_len) {
229 int only_digit = 1;
230 size_t cnt;
232 for (cnt = 0; cnt < name_len; ++cnt)
233 if (isalpha((unsigned char)chset[cnt])) {
234 only_digit = 0;
235 break;
238 if (only_digit) goto iso;
240 switch (tolower(chset[0])) {
241 case 'i':
242 if (tolower(chset[1]) == 's' && tolower(chset[2]) == 'o') {
243 chset += 3;
244 iso:
245 if (strncmp(chset, "8859", 4) == 0) {
246 chset += 4;
247 while (chset < p && *chset && !isdigit((unsigned char)*chset))
248 chset++;
249 switch (atoi(chset)) {
250 case 1: return CHARSET_ISO_8859_1;
251 case 2: return CHARSET_ISO_8859_2;
252 case 15: return CHARSET_ISO_8859_15;
253 default: return CHARSET_USASCII;
257 break;
258 case 'u':
259 if (tolower(chset[1]) == 't' && tolower(chset[2]) == 'f') {
260 chset += 3;
261 while (chset < p && *chset && !isdigit((unsigned char)*chset))
262 chset++;
263 switch (atoi(chset)) {
264 case 8: return CHARSET_UTF8;
265 default: return CHARSET_USASCII;
271 return CHARSET_USASCII;
272 #endif
273 #else
274 # error Do not know operating system!
275 #endif
278 /* It seems that Swedish and maybe some other scandanavian languages don't
279 * transliterate &auml; to ae - but it seems there may be conflicting views
280 * on this...
282 #define umlaut_to_e() 1
284 /* values <= 127 already dealt with */
285 static int
286 add_unicode(int charset, unsigned char *p, int value)
288 #ifdef DEBUG
289 fprintf(stderr, "add_unicode(%d, %p, %d)\n", charset, p, value);
290 #endif
291 if (value == 0) return 0;
292 switch (charset) {
293 case CHARSET_USASCII:
294 if (value < 0x80) {
295 *p = value;
296 return 1;
298 break;
299 case CHARSET_ISO_8859_1:
300 if (value < 0x100) {
301 *p = value;
302 return 1;
304 break;
305 case CHARSET_ISO_8859_2:
306 if (value >= 0xa0) {
307 int v = 0;
308 switch (value) {
309 case 0xa0: case 0xa4: case 0xa7: case 0xa8: case 0xad: case 0xb0:
310 case 0xb4: case 0xb8: case 0xc1: case 0xc2: case 0xc4: case 0xc7:
311 case 0xc9: case 0xcb: case 0xcd: case 0xce: case 0xd3: case 0xd4:
312 case 0xd6: case 0xd7: case 0xda: case 0xdc: case 0xdd: case 0xdf:
313 case 0xe1: case 0xe2: case 0xe4: case 0xe7: case 0xe9: case 0xeb:
314 case 0xed: case 0xee: case 0xf3: case 0xf4: case 0xf6: case 0xf7:
315 case 0xfa: case 0xfc: case 0xfd:
316 v = value; break;
317 case 0x104: v = '\xa1'; break;
318 case 0x2d8: v = '\xa2'; break;
319 case 0x141: v = '\xa3'; break;
320 case 0x13d: v = '\xa5'; break;
321 case 0x15a: v = '\xa6'; break;
322 case 0x160: v = '\xa9'; break;
323 case 0x15e: v = '\xaa'; break; /* Scedil */
324 case 0x164: v = '\xab'; break;
325 case 0x179: v = '\xac'; break;
326 case 0x17d: v = '\xae'; break;
327 case 0x17b: v = '\xaf'; break;
328 case 0x105: v = '\xb1'; break;
329 case 0x2db: v = '\xb2'; break;
330 case 0x142: v = '\xb3'; break;
331 case 0x13e: v = '\xb5'; break;
332 case 0x15b: v = '\xb6'; break;
333 case 0x2c7: v = '\xb7'; break;
334 case 0x161: v = '\xb9'; break;
335 case 0x15f: v = '\xba'; break; /* scedil */
336 case 0x165: v = '\xbb'; break;
337 case 0x17a: v = '\xbc'; break;
338 case 0x2dd: v = '\xbd'; break;
339 case 0x17e: v = '\xbe'; break;
340 case 0x17c: v = '\xbf'; break;
341 case 0x154: v = '\xc0'; break;
342 case 0x102: v = '\xc3'; break;
343 case 0x139: v = '\xc5'; break;
344 case 0x106: v = '\xc6'; break;
345 case 0x10c: v = '\xc8'; break;
346 case 0x118: v = '\xca'; break;
347 case 0x11a: v = '\xcc'; break;
348 case 0x10e: v = '\xcf'; break;
349 case 0x110: v = '\xd0'; break;
350 case 0x143: v = '\xd1'; break;
351 case 0x147: v = '\xd2'; break;
352 case 0x150: v = '\xd5'; break;
353 case 0x158: v = '\xd8'; break;
354 case 0x16e: v = '\xd9'; break;
355 case 0x170: v = '\xdb'; break;
356 case 0x162: v = '\xde'; break; /* &Tcedil; */
357 case 0x155: v = '\xe0'; break;
358 case 0x103: v = '\xe3'; break;
359 case 0x13a: v = '\xe5'; break;
360 case 0x107: v = '\xe6'; break;
361 case 0x10d: v = '\xe8'; break;
362 case 0x119: v = '\xea'; break;
363 case 0x11b: v = '\xec'; break;
364 case 0x10f: v = '\xef'; break;
365 case 0x111: v = '\xf0'; break;
366 case 0x144: v = '\xf1'; break;
367 case 0x148: v = '\xf2'; break;
368 case 0x151: v = '\xf5'; break;
369 case 0x159: v = '\xf8'; break;
370 case 0x16f: v = '\xf9'; break;
371 case 0x171: v = '\xfb'; break;
372 case 0x163: v = '\xfe'; break; /* tcedil */
373 case 0x2d9: v = '\xff'; break;
375 if (v == 0) break;
376 value = v;
378 *p = value;
379 return 1;
380 case CHARSET_ISO_8859_15:
381 switch (value) {
382 case 0xa4: case 0xa6: case 0xb0: case 0xc4:
383 case 0xd0: case 0xd4: case 0xd5: case 0xd6:
384 goto donthave;
385 case 0x152: value = 0xd4; break; /* &OElig; */
386 case 0x153: value = 0xd5; break; /* &oelig; */
387 #if 0
388 case 0x0: value = 0xa4; break; /* euro */
389 #endif
390 case 0x160: value = 0xa6; break; /* Scaron */
391 case 0x161: value = 0xb0; break; /* scaron */
392 case 0x17d: value = 0xc4; break; /* Zcaron */
393 case 0x17e: value = 0xd0; break; /* zcaron */
394 #if 0
395 case 0x0: value = 0xd6; break; /* Ydiersis */
396 #endif
398 if (value < 0x100) {
399 *p = value;
400 return 1;
402 donthave:
403 break;
404 #if OS_WIN32
405 case CHARSET_WINCP1250:
406 /* MS Windows rough equivalent to ISO-8859-2 */
407 if (value >= 0x80) {
408 int v = 0;
409 /* This mapping is complete - there are 5 unused positions:
410 * 0x81 0x83 0x88 0x90 0x98 */
411 switch (value) {
412 case 0xa0: case 0xa4: case 0xa6: case 0xa7: case 0xa8: case 0xa9:
413 case 0xab: case 0xac: case 0xad: case 0xae: case 0xb0: case 0xb1:
414 case 0xb4: case 0xb5: case 0xb6: case 0xb7: case 0xb8: case 0xbb:
415 case 0xc1: case 0xc2: case 0xc4: case 0xc7: case 0xc9: case 0xcb:
416 case 0xcd: case 0xce: case 0xd3: case 0xd4: case 0xd6: case 0xd7:
417 case 0xda: case 0xdc: case 0xdd: case 0xdf: case 0xe1: case 0xe2:
418 case 0xe4: case 0xe7: case 0xe9: case 0xeb: case 0xed: case 0xee:
419 case 0xf3: case 0xf4: case 0xf6: case 0xf7: case 0xfa: case 0xfc:
420 case 0xfd:
421 v = value; break;
422 case 0x20ac: v = '\x80'; break;
423 case 0x201a: v = '\x82'; break;
424 case 0x201e: v = '\x84'; break;
425 case 0x2026: v = '\x85'; break;
426 case 0x2020: v = '\x86'; break;
427 case 0x2021: v = '\x87'; break;
428 case 0x2030: v = '\x89'; break;
429 case 0x0160: v = '\x8a'; break;
430 case 0x2039: v = '\x8b'; break;
431 case 0x015a: v = '\x8c'; break;
432 case 0x0164: v = '\x8d'; break;
433 case 0x017d: v = '\x8e'; break;
434 case 0x0179: v = '\x8f'; break;
435 case 0x2018: v = '\x91'; break;
436 case 0x2019: v = '\x92'; break;
437 case 0x201c: v = '\x93'; break;
438 case 0x201d: v = '\x94'; break;
439 case 0x2022: v = '\x95'; break;
440 case 0x2013: v = '\x96'; break;
441 case 0x2014: v = '\x97'; break;
442 case 0x2122: v = '\x99'; break;
443 case 0x0161: v = '\x9a'; break;
444 case 0x203a: v = '\x9b'; break;
445 case 0x015b: v = '\x9c'; break;
446 case 0x0165: v = '\x9d'; break;
447 case 0x017e: v = '\x9e'; break;
448 case 0x017a: v = '\x9f'; break;
449 case 0x02c7: v = '\xa1'; break;
450 case 0x02d8: v = '\xa2'; break;
451 case 0x0141: v = '\xa3'; break;
452 case 0x0104: v = '\xa5'; break;
453 case 0x015e: v = '\xaa'; break; /* Scedil */
454 case 0x017b: v = '\xaf'; break;
455 case 0x02db: v = '\xb2'; break;
456 case 0x0142: v = '\xb3'; break;
457 case 0x0105: v = '\xb9'; break;
458 case 0x015f: v = '\xba'; break; /* scedil */
459 case 0x013d: v = '\xbc'; break;
460 case 0x02dd: v = '\xbd'; break;
461 case 0x013e: v = '\xbe'; break;
462 case 0x017c: v = '\xbf'; break;
463 case 0x0154: v = '\xc0'; break;
464 case 0x0102: v = '\xc3'; break;
465 case 0x0139: v = '\xc5'; break;
466 case 0x0106: v = '\xc6'; break;
467 case 0x010c: v = '\xc8'; break;
468 case 0x0118: v = '\xca'; break;
469 case 0x011a: v = '\xcc'; break;
470 case 0x010e: v = '\xcf'; break;
471 case 0x0110: v = '\xd0'; break;
472 case 0x0143: v = '\xd1'; break;
473 case 0x0147: v = '\xd2'; break;
474 case 0x0150: v = '\xd5'; break;
475 case 0x0158: v = '\xd8'; break;
476 case 0x016e: v = '\xd9'; break;
477 case 0x0170: v = '\xdb'; break;
478 case 0x0162: v = '\xde'; break; /* &Tcedil; */
479 case 0x0155: v = '\xe0'; break;
480 case 0x0103: v = '\xe3'; break;
481 case 0x013a: v = '\xe5'; break;
482 case 0x0107: v = '\xe6'; break;
483 case 0x010d: v = '\xe8'; break;
484 case 0x0119: v = '\xea'; break;
485 case 0x011b: v = '\xec'; break;
486 case 0x010f: v = '\xef'; break;
487 case 0x0111: v = '\xf0'; break;
488 case 0x0144: v = '\xf1'; break;
489 case 0x0148: v = '\xf2'; break;
490 case 0x0151: v = '\xf5'; break;
491 case 0x0159: v = '\xf8'; break;
492 case 0x016f: v = '\xf9'; break;
493 case 0x0171: v = '\xfb'; break;
494 case 0x0163: v = '\xfe'; break; /* tcedil */
495 case 0x02d9: v = '\xff'; break;
497 if (v == 0) break;
498 value = v;
500 *p = value;
501 return 1;
502 case CHARSET_WINCP1252:
503 /* MS Windows extensions to ISO-8859-1 */
504 /* This mapping is complete - there are 5 unused positions:
505 * 0x81 0x8d 0x8f 0x90 0x9d */
506 switch (value) {
507 case 0x2026: value = 0x85; break; /* hellip */
508 case 0x0160: value = 0x8a; break; /* Scaron */
509 case 0x0152: value = 0x8c; break; /* OElig */
510 case 0x017d: value = 0x8e; break; /* Zcaron */
511 case 0x2019: value = 0x92; break; /* rsquo */
512 case 0x201c: value = 0x93; break; /* ldquo */
513 case 0x201d: value = 0x94; break; /* rdquo */
514 case 0x0161: value = 0x9a; break; /* scaron */
515 case 0x0153: value = 0x9c; break; /* oelig */
516 case 0x017e: value = 0x9e; break; /* zcaron */
517 #if 0
518 /* there are a few other obscure ones we don't currently need */
519 case 0x20ac: value = 0x80; break; /* euro */
520 case 0x201a: value = 0x82; break; /* sbquo */
521 case 0x0192: value = 0x83; break; /* fnof */
522 case 0x201e: value = 0x84; break; /* bdquo */
523 case 0x2020: value = 0x86; break; /* dagger */
524 case 0x2021: value = 0x87; break; /* Dagger */
525 case 0x02c6: value = 0x88; break; /* circ */
526 case 0x2030: value = 0x89; break; /* permil */
527 case 0x2039: value = 0x8b; break; /* lsaquo */
528 case 0x2018: value = 0x91; break; /* lsquo */
529 case 0x2022: value = 0x95; break; /* bull */
530 case 0x2013: value = 0x96; break; /* ndash */
531 case 0x2014: value = 0x97; break; /* mdash */
532 case 0x02dc: value = 0x98; break; /* tilde */
533 case 0x2122: value = 0x99; break; /* trade */
534 case 0x203a: value = 0x9b; break; /* rsaquo */
535 case 0x0178: value = 0x9f; break; /* Yuml */
536 #endif
538 if (value < 0x100) {
539 *p = value;
540 return 1;
542 break;
543 #endif
544 #if OS_WIN32
545 case CHARSET_DOSCP850: {
546 static const unsigned char uni2dostab[] = {
547 255, 173, 189, 156, 207, 190, 221, 245,
548 249, 184, 166, 174, 170, 240, 169, 238,
549 248, 241, 253, 252, 239, 230, 244, 250,
550 247, 251, 167, 175, 172, 171, 243, 168,
551 183, 181, 182, 199, 142, 143, 146, 128,
552 212, 144, 210, 211, 222, 214, 215, 216,
553 209, 165, 227, 224, 226, 229, 153, 158,
554 157, 235, 233, 234, 154, 237, 232, 225,
555 133, 160, 131, 198, 132, 134, 145, 135,
556 138, 130, 136, 137, 141, 161, 140, 139,
557 208, 164, 149, 162, 147, 228, 148, 246,
558 155, 151, 163, 150, 129, 236, 231, 152
560 if (value >= 160 && value < 256) {
561 *p = (int)uni2dostab[value - 160];
562 return 1;
564 #if 0
565 if (value == 305) { /* LATIN SMALL LETTER DOTLESS I */
566 *p = 213; /* "Modified CP850" has the Euro sign here. */
567 return 1;
569 if (value == 402) { /* LATIN SMALL LETTER F WITH HOOK */
570 *p = 159;
571 return 1;
573 #endif
574 break;
576 #endif
578 /* Transliterate characters we can't represent */
579 #ifdef DEBUG
580 fprintf(stderr, "transliterate “%c” 0x%x\n", value, value);
581 #endif
582 switch (value) {
583 case 160:
584 *p = ' '; return 1;
585 case 161 /* ¡ */:
586 *p = '!'; return 1;
587 case 176 /* ° */:
588 *p = 'd'; p[1] = 'g'; return 2;
589 case 191 /* ¿ */:
590 *p = '?'; return 1;
591 case 192 /* À */: case 193 /* Á */: case 194 /* Â */: case 195 /* Ã */:
592 *p = 'A'; return 1;
593 case 197 /* Å */:
594 p[1] = *p = 'A'; return 2;
595 case 196 /* Ä */: /* &Auml; */
596 *p = 'A';
597 if (!umlaut_to_e()) return 1;
598 p[1] = 'E'; return 2;
599 case 198 /* Æ */:
600 *p = 'A'; p[1] = 'E'; return 2;
601 case 199 /* Ç */: case 268: /* &Ccaron; */
602 *p = 'C'; return 1;
603 case 270: /* &Dcaron; */
604 *p = 'D'; return 1;
605 case 200 /* È */: case 201 /* É */: case 202 /* Ê */: case 203 /* Ë */:
606 *p = 'E'; return 1;
607 case 204 /* Ì */: case 205 /* Í */: case 206 /* Î */: case 207 /* Ï */:
608 *p = 'I'; return 1;
609 case 208 /* Ð */: case 222 /* Þ */:
610 *p = 'T'; p[1] = 'H'; return 2;
611 case 315: /* &Lacute; */
612 case 317: /* &Lcaron; */
613 *p = 'L'; return 1;
614 case 209 /* Ñ */:
615 *p = 'N'; return 1;
616 case 210 /* Ò */: case 211 /* Ó */: case 212 /* Ô */: case 213 /* Õ */:
617 *p = 'O'; return 1;
618 case 214 /* Ö */: /* &Ouml; */ case 0x152: /* &OElig; */
619 *p = 'O'; p[1] = 'E'; return 2;
620 case 352: /* &Scaron; */
621 case 0x15e: /* &Scedil; */
622 *p = 'S'; return 1;
623 case 0x162: /* &Tcedil; */
624 case 0x164: /* &Tcaron; */
625 *p = 'T'; return 1;
626 case 217 /* Ù */: case 218 /* Ú */: case 219 /* Û */:
627 *p = 'U'; return 1;
628 case 220 /* Ü */: /* &Uuml; */
629 *p = 'U'; p[1] = 'E'; return 2;
630 case 221 /* Ý */:
631 *p = 'Y'; return 1;
632 case 381: /* &Zcaron; */
633 *p = 'Z'; return 1;
634 case 223 /* ß */:
635 p[1] = *p = 's'; return 2;
636 case 224 /* à */: case 225 /* á */: case 226 /* â */: case 227 /* ã */:
637 case 259: /* &abreve; */
638 *p = 'a'; return 1;
639 case 228 /* ä */: /* &auml; */ case 230 /* æ */:
640 *p = 'a'; p[1] = 'e'; return 2;
641 case 229 /* å */:
642 p[1] = *p = 'a'; return 2;
643 case 231 /* ç */: case 269 /* &ccaron; */:
644 *p = 'c'; return 1;
645 case 271: /* &dcaron; */
646 *p = 'd'; return 1;
647 case 232 /* è */: case 233 /* é */: case 234 /* ê */: case 235 /* ë */:
648 case 283 /* &ecaron; */:
649 *p = 'e'; return 1;
650 case 236 /* ì */: case 237 /* í */: case 238 /* î */: case 239 /* ï */:
651 *p = 'i'; return 1;
652 case 316 /* &lacute; */:
653 case 318 /* &lcaron; */:
654 *p = 'l'; return 1;
655 case 241 /* ñ */: case 328 /* &ncaron; */:
656 *p = 'n'; return 1;
657 case 345: /* &rcaron; */
658 *p = 'r'; return 1;
659 case 353: /* &scaron; */
660 case 0x15f: /* &scedil; */
661 *p = 's'; return 1;
662 case 357: /* &tcaron; */
663 case 0x163: /* &tcedil; */
664 *p = 't'; return 1;
665 case 240 /* ð */: case 254 /* þ */:
666 *p = 't'; p[1] = 'h'; return 2;
667 case 242 /* ò */: case 243 /* ó */: case 244 /* ô */: case 245 /* õ */:
668 *p = 'o'; return 1;
669 case 246 /* ö */: /* &ouml; */ case 0x153: /* &oelig; */
670 *p = 'o'; p[1] = 'e'; return 2;
671 case 249 /* ù */: case 250 /* ú */: case 251 /* û */:
672 case 367 /* &uring; */:
673 *p = 'u'; return 1;
674 case 252 /* ü */: /* &uuml; */
675 *p = 'u'; p[1] = 'e'; return 2;
676 case 253 /* ý */: case 255 /* ÿ */:
677 *p = 'y'; return 1;
678 case 382: /* &zcaron; */
679 *p = 'z'; return 1;
680 case 0x2019: /* &lsquo; */
681 *p = '\''; return 1;
682 case 171: /* « */ case 187: /* » */
683 case 0x201c: /* &ldquo; */ case 0x201d: /* &rdquo; */
684 *p = '"'; return 1;
685 case 0x2026: /* &hellip; */
686 *p = '.'; p[1] = '.'; p[2] = '.'; return 3;
687 case 0x2192: /* &rarr; */
688 *p = '-'; p[1] = '>'; return 2;
689 case 0x1d4d: /* gradient symbol */
690 *p = 'g'; p[1] = 'r'; p[2] = 'd'; return 3;
691 case 0x221e: /* infinity symbol */
692 *p = 'i'; p[1] = 'n'; p[2] = 'f'; return 3;
694 #ifdef DEBUG
695 /* 169 is reported (copyright symbol), but there isn't a good <= 2 ASCII
696 * character transliteration for that, so we handle that elsewhere. */
697 fprintf(stderr, "failed to transliterate codepoint %d\n", value);
698 #endif
699 return 0;
702 #if OS_UNIX && defined DATADIR && defined PACKAGE
703 /* Under Unix, we compile in the configured path */
704 static const char *pth_cfg_files = DATADIR "/" PACKAGE;
705 #else
706 /* On other platforms, we fall back on looking in the current directory */
707 static const char *pth_cfg_files = "";
708 #endif
710 static int num_msgs = 0;
711 static char **msg_array = NULL;
713 static bool msg_lang_explicit = false;
714 const char *msg_lang = NULL;
715 const char *msg_lang2 = NULL;
717 static char **
718 parse_msgs(int n, unsigned char *p, int charset_code) {
719 int i;
721 char **msgs = osmalloc(n * sizeof(char *));
723 for (i = 0; i < n; i++) {
724 unsigned char *to = p;
725 int ch;
726 msgs[i] = (char *)p;
728 /* If we want UTF8 anyway, we just need to find the start of each
729 * message */
730 if (charset_code == CHARSET_UTF8) {
731 p += strlen((char *)p) + 1;
732 continue;
735 while ((ch = *p++) != 0) {
736 /* A byte in the range 0x80-0xbf or 0xf0-0xff isn't valid in
737 * this state, (0xf0-0xfd mean values > 0xffff) so treat as
738 * literal and try to resync so we cope better when fed
739 * non-utf-8 data. Similarly we abandon a multibyte sequence
740 * if we hit an invalid character. */
741 if (ch >= 0xc0 && ch < 0xf0) {
742 int ch1 = *p;
743 if ((ch1 & 0xc0) != 0x80) goto resync;
745 if (ch < 0xe0) {
746 /* 2 byte sequence */
747 ch = ((ch & 0x1f) << 6) | (ch1 & 0x3f);
748 p++;
749 } else {
750 /* 3 byte sequence */
751 int ch2 = p[1];
752 if ((ch2 & 0xc0) != 0x80) goto resync;
753 ch = ((ch & 0x1f) << 12) | ((ch1 & 0x3f) << 6) | (ch2 & 0x3f);
754 p += 2;
758 resync:
760 if (ch < 127) {
761 *to++ = (char)ch;
762 } else {
763 /* We assume an N byte UTF-8 code never transliterates to more
764 * than N characters (so we can't transliterate © to (C) or
765 * ® to (R) for example) */
766 to += add_unicode(charset_code, to, ch);
769 *to++ = '\0';
771 return msgs;
774 /* This is the name of the default language, which can be set like so:
775 * ./configure --enable-defaultlang=fr
777 #ifdef DEFAULTLANG
778 /* No point extracting these errors as they won't get used if file opens */
779 # include "../lib/defaultlang.h"
780 #else
781 #define N_DONTEXTRACTMSGS 5
782 static unsigned char dontextractmsgs[] =
783 "Can't open message file \"%s\" using path \"%s\"\0"/*1000*/
784 "Problem with message file \"%s\"\0"/*1001*/
785 "I don't understand this message file version\0"/*1002*/
786 "Message file truncated?\0"/*1003*/
787 "Out of memory (couldn't find %lu bytes).\0"/*1004*/;
788 #endif
790 static char **dontextract = NULL;
792 static void
793 parse_msg_file(int charset_code)
795 FILE *fh;
796 unsigned char header[20];
797 int i;
798 unsigned len;
799 unsigned char *p;
800 char *fnm, *s;
801 int n;
803 #ifdef DEBUG
804 fprintf(stderr, "parse_msg_file(%d)\n", charset_code);
805 #endif
807 /* sort out messages we need to print if we can't open the message file */
808 dontextract = parse_msgs(N_DONTEXTRACTMSGS, dontextractmsgs, charset_code);
810 fnm = osstrdup(msg_lang);
811 /* trim off charset from stuff like "de_DE.iso8859_1" */
812 s = strchr(fnm, '.');
813 if (s) *s = '\0';
814 /* trim off any "@<something>" modifier. */
815 s = strchr(fnm, '@');
816 if (s) *s = '\0';
818 fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
820 if (!fh && strlen(fnm) > 3 && fnm[2] == '_') {
821 /* e.g. if 'en_GB' is unknown, see if we know 'en' */
822 fnm[2] = '\0';
823 fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
824 if (!fh) fnm[2] = '_'; /* for error reporting */
827 if (!fh && !msg_lang_explicit) {
828 /* If msg_lang wasn't specified using environment variable SURVEXLANG,
829 * then default to 'en' if we don't find messages for language msg_lang.
831 if (fnm[0] && fnm[1]) {
832 strcpy(fnm, "en");
833 } else {
834 osfree(fnm);
835 fnm = osstrdup("en");
837 fh = fopenWithPthAndExt(pth_cfg_files, fnm, EXT_SVX_MSG, "rb", NULL);
840 if (!fh) {
841 fatalerror(/*Can't open message file “%s” using path “%s”*/1000,
842 fnm, pth_cfg_files);
845 if (fread(header, 1, 20, fh) < 20 ||
846 memcmp(header, "Svx\nMsg\r\n\xfe\xff", 12) != 0) {
847 fatalerror(/*Problem with message file “%s”*/1001, fnm);
850 if (header[12] != 0)
851 fatalerror(/*I don't understand this message file version*/1002);
853 n = (header[14] << 8) | header[15];
855 len = 0;
856 for (i = 16; i < 20; i++) len = (len << 8) | header[i];
858 p = osmalloc(len);
859 if (fread(p, 1, len, fh) < len)
860 fatalerror(/*Message file truncated?*/1003);
862 fclose(fh);
864 #ifdef DEBUG
865 fprintf(stderr, "fnm = “%s”, n = %d, len = %d\n", fnm, n, len);
866 #endif
867 osfree(fnm);
869 msg_array = parse_msgs(n, p, charset_code);
870 num_msgs = n;
873 const char *
874 msg_cfgpth(void)
876 return pth_cfg_files;
879 const char *
880 msg_exepth(void)
882 return exe_pth;
885 const char *
886 msg_appname(void)
888 return appname_copy;
891 void
892 (msg_init)(char * const *argv)
894 char *p;
895 SVX_ASSERT(argv);
897 /* Point to argv[0] itself so we report a more helpful error if the
898 * code to work out the clean appname generates a signal */
899 appname_copy = argv[0];
900 #if OS_UNIX
901 /* use name as-is on Unix - programs run from path get name as supplied */
902 appname_copy = osstrdup(argv[0]);
903 #else
904 /* use the lower-cased leafname on other platforms */
905 p = leaf_from_fnm(argv[0]);
906 appname_copy = p;
907 while (*p) {
908 *p = tolower(*p);
909 ++p;
911 #endif
913 /* shortcut --version so you can check the version number even when the
914 * correct message file can't be found... */
915 if (argv[1] && strcmp(argv[1], "--version") == 0) {
916 cmdline_version();
917 exit(0);
919 if (argv[0]) {
920 exe_pth = path_from_fnm(argv[0]);
921 #if OS_UNIX && defined DATADIR && defined PACKAGE
922 bool free_pth = false;
923 char *pth = getenv("srcdir");
924 if (!pth || !pth[0]) {
925 pth = path_from_fnm(argv[0]);
926 free_pth = true;
928 if (pth[0]) {
929 struct stat buf;
930 #if OS_UNIX_MACOS
931 # ifndef AVEN
932 /* On macOS the programs may be installed anywhere, with the
933 * share directory and the binaries in the same directory. */
934 p = use_path(pth, "share/survex/en.msg");
935 if (stat(p, &buf) == 0 && S_ISREG(buf.st_mode)) {
936 pth_cfg_files = use_path(pth, "share/survex");
937 goto macos_got_msg;
939 osfree(p);
940 # endif
941 /* In the diskimage package, this case is used for aven, and for
942 * the hardlinked copies of cavern and extend alongside the aven
943 * binary, which are the ones which aven runs.
945 p = use_path(pth, "../Resources/en.msg");
946 if (stat(p, &buf) == 0 && S_ISREG(buf.st_mode)) {
947 pth_cfg_files = use_path(pth, "../Resources");
948 goto macos_got_msg;
950 osfree(p);
951 #endif
952 /* If we're run with an explicit path, check if "../lib/en.msg"
953 * from the program's path exists, and if so look there for
954 * support files - this allows us to test binaries in the build
955 * tree easily. */
956 p = use_path(pth, "../lib/en.msg");
957 if (stat(p, &buf) == 0) {
958 #ifdef S_ISREG
959 /* POSIX way */
960 if (S_ISREG(buf.st_mode)) {
961 pth_cfg_files = use_path(pth, "../lib");
963 #else
964 /* BSD way */
965 if ((buf.st_mode & S_IFMT) == S_IFREG) {
966 pth_cfg_files = use_path(pth, "../lib");
968 #endif
970 #if defined(__GNUC__) && defined(__APPLE_CC__)
971 macos_got_msg:
972 #endif
973 osfree(p);
976 if (free_pth) osfree(pth);
977 #elif OS_WIN32
978 DWORD len = 256;
979 char *buf = NULL, *modname;
980 while (1) {
981 DWORD got;
982 buf = osrealloc(buf, len);
983 got = GetModuleFileName(NULL, buf, len);
984 if (got < len) break;
985 len += len;
987 modname = buf;
988 /* Strange Win32 nastiness - strip prefix "\\?\" if present */
989 if (strncmp(modname, "\\\\?\\", 4) == 0) modname += 4;
990 pth_cfg_files = path_from_fnm(modname);
991 osfree(buf);
992 #else
993 /* Get the path to the support files from argv[0] */
994 pth_cfg_files = path_from_fnm(argv[0]);
995 #endif
998 msg_lang = getenv("SURVEXLANG");
999 #ifdef DEBUG
1000 fprintf(stderr, "msg_lang = %p (= \"%s\")\n", msg_lang, msg_lang?msg_lang:"(null)");
1001 #endif
1003 msg_lang_explicit = true;
1004 if (!msg_lang || !*msg_lang) {
1005 msg_lang_explicit = false;
1006 msg_lang = getenv("LC_ALL");
1008 if (!msg_lang || !*msg_lang) {
1009 msg_lang = getenv("LC_MESSAGES");
1010 if (!msg_lang || !*msg_lang) {
1011 msg_lang = getenv("LANG");
1012 /* Something (AutoCAD?) on Microsoft Windows sets LANG to a number. */
1013 if (msg_lang && !isalpha(msg_lang[0])) msg_lang = NULL;
1015 if (!msg_lang || !*msg_lang) {
1016 #if OS_WIN32
1017 LCID locid;
1018 #endif
1019 #ifdef DEFAULTLANG
1020 msg_lang = STRING(DEFAULTLANG);
1021 #else
1022 msg_lang = "en";
1023 #endif
1024 #if OS_WIN32
1025 /* GetUserDefaultUILanguage() requires Microsoft Windows 2000 or
1026 * newer, but we don't support anything earlier than Vista.
1028 locid = GetUserDefaultUILanguage();
1029 if (locid) {
1030 WORD langid = LANGIDFROMLCID(locid);
1031 switch (PRIMARYLANGID(langid)) {
1032 case LANG_BULGARIAN:
1033 msg_lang = "bg";
1034 break;
1035 /* older mingw compilers don't seem to supply this value */
1036 #ifndef LANG_CATALAN
1037 # define LANG_CATALAN 0x03
1038 #endif
1039 case LANG_CATALAN:
1040 msg_lang = "ca";
1041 break;
1042 case LANG_CHINESE:
1043 msg_lang = "zh_CN";
1044 break;
1045 case LANG_ENGLISH:
1046 if (SUBLANGID(langid) == SUBLANG_ENGLISH_US)
1047 msg_lang = "en_US";
1048 else
1049 msg_lang = "en";
1050 break;
1051 case LANG_FRENCH:
1052 msg_lang = "fr";
1053 break;
1054 case LANG_GERMAN:
1055 switch (SUBLANGID(langid)) {
1056 case SUBLANG_GERMAN_SWISS:
1057 msg_lang = "de_CH";
1058 break;
1059 default:
1060 msg_lang = "de";
1062 break;
1063 case LANG_GREEK:
1064 msg_lang = "el";
1065 break;
1066 case LANG_HUNGARIAN:
1067 msg_lang = "hu";
1068 break;
1069 case LANG_INDONESIAN:
1070 msg_lang = "id";
1071 break;
1072 case LANG_ITALIAN:
1073 msg_lang = "it";
1074 break;
1075 case LANG_POLISH:
1076 msg_lang = "pl";
1077 break;
1078 case LANG_PORTUGUESE:
1079 if (SUBLANGID(langid) == SUBLANG_PORTUGUESE_BRAZILIAN)
1080 msg_lang = "pt_BR";
1081 else
1082 msg_lang = "pt";
1083 break;
1084 case LANG_ROMANIAN:
1085 msg_lang = "ro";
1086 break;
1087 case LANG_RUSSIAN:
1088 msg_lang = "ru";
1089 break;
1090 case LANG_SLOVAK:
1091 msg_lang = "sk";
1092 break;
1093 case LANG_SPANISH:
1094 msg_lang = "es";
1095 break;
1098 #endif
1101 #ifdef DEBUG
1102 fprintf(stderr, "msg_lang = %p (= \"%s\")\n", msg_lang, msg_lang?msg_lang:"(null)");
1103 #endif
1105 /* On Mandrake LANG defaults to C */
1106 if (strcmp(msg_lang, "C") == 0) msg_lang = "en";
1108 { /* If msg_lang has a country code, snip it out to give msg_lang2. */
1109 size_t b = 0;
1110 while (isalpha((unsigned char)msg_lang[b])) {
1111 ++b;
1113 if (msg_lang[b] == '_') {
1114 char * tmp;
1115 size_t e = b + 1;
1116 while (isalpha((unsigned char)msg_lang[e])) {
1117 ++e;
1119 tmp = osstrdup(msg_lang);
1120 memmove(tmp + b, tmp + e, strlen(tmp + e) + 1);
1121 msg_lang2 = tmp;
1125 #ifdef LC_MESSAGES
1126 /* try to setlocale() appropriately too */
1127 if (!setlocale(LC_MESSAGES, msg_lang)) {
1128 if (msg_lang2) {
1129 (void)setlocale(LC_MESSAGES, msg_lang2);
1132 #endif
1134 select_charset(default_charset());
1137 #ifndef AVEN
1138 /* Return message if messages available, else a fallback value. */
1139 static const char *
1140 msg_opt(int en, const char * fallback)
1142 /* NB can't use SVX_ASSERT here! */
1143 if (!msg_array || en <= 0 || en >= num_msgs) {
1144 return fallback;
1147 return msg_array[en];
1149 #endif
1151 const char *
1152 msg(int en)
1154 /* NB can't use SVX_ASSERT here! */
1155 if (dontextract && en >= 1000 && en < 1000 + N_DONTEXTRACTMSGS)
1156 return dontextract[en - 1000];
1157 if (!msg_array) {
1158 if (en != 1) {
1159 fprintf(STDERR, "Message %d requested before fully initialised\n", en);
1160 return "Message requested before fully initialised\n";
1162 /* this should be the only other message which can be requested before
1163 * the message file is opened and read... */
1164 if (!dontextract) return "Out of memory (couldn't find %lu bytes).";
1165 return dontextract[(/*Out of memory (couldn't find %lu bytes).*/1004)
1166 - 1000];
1169 if (en < 0 || en >= num_msgs) {
1170 fprintf(STDERR, "Message %d out of range\n", en);
1171 return "Message out of range\n";
1174 if (en == 0) {
1175 const char *p = msg_array[0];
1176 if (!*p) p = "(C)";
1177 return p;
1180 return msg_array[en];
1183 void
1184 v_report(int severity, const char *fnm, int line, int col, int en, va_list ap)
1186 #ifdef AVEN
1187 (void)col;
1188 aven_v_report(severity, fnm, line, en, ap);
1189 #else
1190 const char * level;
1191 if (fnm) {
1192 fputs(fnm, STDERR);
1193 if (line) fprintf(STDERR, ":%d", line);
1194 if (col > 0) fprintf(STDERR, ":%d", col);
1195 } else {
1196 fputs(appname_copy, STDERR);
1198 fputs(": ", STDERR);
1200 switch (severity) {
1201 case DIAG_INFO:
1202 /* TRANSLATORS: Indicates a informational message e.g.:
1203 * "spoon.svx:12: info: Declination: [...]" */
1204 level = msg_opt(/*info*/485, "info");
1205 break;
1206 case DIAG_WARN:
1207 /* TRANSLATORS: Indicates a warning message e.g.:
1208 * "spoon.svx:12: warning: *prefix is deprecated" */
1209 level = msg_opt(/*warning*/4, "warning");
1210 break;
1211 default:
1212 /* TRANSLATORS: Indicates an error message e.g.:
1213 * "spoon.svx:13:4: error: Field may not be omitted" */
1214 level = msg_opt(/*error*/93, "error");
1215 break;
1217 fputs(level, STDERR);
1218 fputs(": ", STDERR);
1220 vfprintf(STDERR, msg(en), ap);
1221 fputnl(STDERR);
1222 #endif
1224 switch (severity) {
1225 case DIAG_WARN:
1226 msg_warnings++;
1227 break;
1228 case DIAG_ERR:
1229 msg_errors++;
1230 if (msg_errors == 50)
1231 fatalerror_in_file(fnm, 0, /*Too many errors - giving up*/19);
1232 break;
1233 case DIAG_FATAL:
1234 exit(EXIT_FAILURE);
1238 void
1239 diag(int severity, int en, ...)
1241 va_list ap;
1242 va_start(ap, en);
1243 v_report(severity, NULL, 0, 0, en, ap);
1244 va_end(ap);
1247 void
1248 diag_in_file(int severity, const char *fnm, int line, int en, ...)
1250 va_list ap;
1251 va_start(ap, en);
1252 v_report(severity, fnm, line, 0, en, ap);
1253 va_end(ap);
1256 /* Code to support switching character set at runtime (e.g. for a printer
1257 * driver to support different character sets on screen and on the printer)
1259 typedef struct charset_li {
1260 struct charset_li *next;
1261 int code;
1262 char **msg_array;
1263 } charset_li;
1265 static charset_li *charset_head = NULL;
1267 static int charset = CHARSET_BAD;
1270 select_charset(int charset_code)
1272 int old_charset = charset;
1273 charset_li *p;
1275 #ifdef DEBUG
1276 fprintf(stderr, "select_charset(%d), old charset = %d\n", charset_code,
1277 charset);
1278 #endif
1280 charset = charset_code;
1282 /* check if we've already parsed messages for new charset */
1283 for (p = charset_head; p; p = p->next) {
1284 #ifdef DEBUG
1285 printf("%p: code %d msg_array %p\n", p, p->code, p->msg_array);
1286 #endif
1287 if (p->code == charset) {
1288 msg_array = p->msg_array;
1289 return old_charset;
1293 /* nope, got to reparse message file */
1294 parse_msg_file(charset_code);
1296 /* add to list */
1297 p = osnew(charset_li);
1298 p->code = charset;
1299 p->msg_array = msg_array;
1300 p->next = charset_head;
1301 charset_head = p;
1303 return old_charset;