new beta-0.90.0
[luatex.git] / source / texk / web2c / luatexdir / lang / hyphen.w
blobc661b6c8213ef19df4422e2c3d9701521169e7f9
1 % hyphen.w
3 % Libhnj is dual licensed under LGPL and MPL. Boilerplate for both
4 % licenses follows.
7 % LibHnj - a library for high quality hyphenation and justification
8 % Copyright (C) 1998 Raph Levien,
9 % (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org),
10 % (C) 2001 Peter Novodvorsky (nidd@@cs.msu.su)
12 % This library is free software; you can redistribute it and/or
13 % modify it under the terms of the GNU Library General Public
14 % License as published by the Free Software Foundation; either
15 % version 2 of the License, or (at your option) any later version.
17 % This library is distributed in the hope that it will be useful,
18 % but WITHOUT ANY WARRANTY; without even the implied warranty of
19 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 % Library General Public License for more details.
22 % You should have received a copy of the GNU Library General Public
23 % License along with this library; if not, write to the
24 % Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 % Boston, MA 02111-1307 USA.
29 % The contents of this file are subject to the Mozilla Public License
30 % Version 1.0 (the "MPL"); you may not use this file except in
31 % compliance with the MPL. You may obtain a copy of the MPL at
32 % http://www.mozilla.org/MPL/
34 % Software distributed under the MPL is distributed on an "AS IS" basis,
35 % WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
36 % for the specific language governing rights and limitations under the
37 % MPL.
40 @ @c
43 #include "ptexlib.h"
44 #include "lua/luatex-api.h"
46 #include <stdlib.h> /* for NULL, malloc */
47 #include <stdio.h> /* for fprintf */
48 #include <string.h> /* for strdup */
49 #include <stdlib.h> /* for malloc used by substring inclusion */
51 #define MAXPATHS 40960
53 #ifdef UNX
54 # include <unistd.h> /* for exit */
55 #endif
57 #include <kpathsea/c-ctype.h>
59 #define noVERBOSE
61 #include "lang/hnjalloc.h"
63 @ TODO: should be moved to separate library
66 static unsigned char *hnj_strdup(const unsigned char *s)
68 unsigned char *new;
69 size_t l;
71 l = strlen((const char *) s);
72 new = hnj_malloc((int) l + 1);
73 memcpy(new, s, l);
74 new[l] = 0;
75 return new;
78 @* Type definitions.
80 @ a little bit of a hash table implementation. This simply maps strings
81 to state numbers
84 typedef struct _HashTab HashTab;
85 typedef struct _HashEntry HashEntry;
86 typedef struct _HashIter HashIter;
87 typedef union _HashVal HashVal;
89 /* A cheap, but effective, hack. */
90 #define HASH_SIZE 31627
92 struct _HashTab {
93 HashEntry *entries[HASH_SIZE];
96 union _HashVal {
97 int state;
98 char *hyppat;
101 struct _HashEntry {
102 HashEntry *next;
103 unsigned char *key;
104 HashVal u;
107 struct _HashIter {
108 HashEntry **e;
109 HashEntry *cur;
110 int ndx;
113 @ State machine
116 typedef struct _HyphenState HyphenState;
117 typedef struct _HyphenTrans HyphenTrans;
118 #define MAX_CHARS 256
119 #define MAX_NAME 20
121 struct _HyphenDict {
122 int num_states;
123 int pat_length;
124 char cset[MAX_NAME];
125 HyphenState *states;
126 HashTab *patterns;
127 HashTab *merged;
128 HashTab *state_num;
131 struct _HyphenState {
132 char *match;
133 /*char *repl; */
134 /*signed char replindex; */
135 /*signed char replcut; */
136 int fallback_state;
137 int num_trans;
138 HyphenTrans *trans;
141 struct _HyphenTrans {
142 int uni_ch;
143 int new_state;
147 @ Combine two right-aligned number patterns, 04000 + 020 becomes 04020
150 static char *combine(char *expr, const char *subexpr)
152 size_t l1 = strlen(expr);
153 size_t l2 = strlen(subexpr);
154 size_t off = l1 - l2;
155 unsigned j;
156 /* this works also for utf8 sequences because the substring is identical
157 to the last substring-length bytes of expr except for the (single byte)
158 hyphenation encoders
160 for (j = 0; j < l2; j++) {
161 if (expr[off + j] < subexpr[j])
162 expr[off + j] = subexpr[j];
164 return expr;
168 @ ORIGINAL CODE
170 static HashIter *new_HashIter(HashTab * h)
172 HashIter *i = hnj_malloc(sizeof(HashIter));
173 i->e = h->entries;
174 i->cur = NULL;
175 i->ndx = -1;
176 return i;
180 static int nextHashStealPattern(HashIter * i, unsigned char **word, char **pattern)
182 while (i->cur == NULL) {
183 if (i->ndx >= HASH_SIZE - 1)
184 return 0;
185 i->cur = i->e[++i->ndx];
187 *word = i->cur->key;
188 *pattern = i->cur->u.hyppat;
189 i->cur->u.hyppat = NULL;
190 i->cur = i->cur->next;
191 return 1;
195 static int nextHash(HashIter * i, unsigned char **word)
197 while (i->cur == NULL) {
198 if (i->ndx >= HASH_SIZE - 1)
199 return 0;
200 i->cur = i->e[++i->ndx];
202 *word = i->cur->key;
203 i->cur = i->cur->next;
204 return 1;
208 static int eachHash(HashIter * i, unsigned char **word, char **pattern)
210 while (i->cur == NULL) {
211 if (i->ndx >= HASH_SIZE - 1)
212 return 0;
213 i->cur = i->e[++i->ndx];
215 *word = i->cur->key;
216 *pattern = i->cur->u.hyppat;
217 i->cur = i->cur->next;
218 return 1;
222 static void delete_HashIter(HashIter * i)
224 hnj_free(i);
228 @ a |char*| hash function from ASU - adapted from Gtk+
231 static unsigned int hnj_string_hash(const unsigned char *s)
233 const unsigned char *p;
234 unsigned int h = 0, g;
236 for (p = s; *p != '\0'; p += 1) {
237 h = (h << 4) + *p;
238 if ((g = (h & 0xf0000000))) {
239 h = h ^ (g >> 24);
240 h = h ^ g;
243 return h /* \% M */ ;
247 @ assumes that key is not already present!
250 static void state_insert(HashTab * hashtab, unsigned char *key, int state)
252 int i;
253 HashEntry *e;
255 i = (int) (hnj_string_hash(key) % HASH_SIZE);
256 e = hnj_malloc(sizeof(HashEntry));
257 e->next = hashtab->entries[i];
258 e->key = key;
259 e->u.state = state;
260 hashtab->entries[i] = e;
264 @ assumes that key is not already present!
267 static void hyppat_insert(HashTab * hashtab, unsigned char *key, char *hyppat)
269 int i;
270 HashEntry *e;
272 i = (int) (hnj_string_hash(key) % HASH_SIZE);
273 for (e = hashtab->entries[i]; e; e = e->next) {
274 if (strcmp((char *) e->key, (char *) key) == 0) {
275 if (e->u.hyppat) {
276 if (hyppat
277 && strcmp((char *) e->u.hyppat, (char *) hyppat) != 0) {
278 print_err("Conflicting pattern ignored");
279 error();
281 hnj_free(e->u.hyppat);
283 e->u.hyppat = hyppat;
284 hnj_free(key);
285 return;
288 e = hnj_malloc(sizeof(HashEntry));
289 e->next = hashtab->entries[i];
290 e->key = key;
291 e->u.hyppat = hyppat;
292 hashtab->entries[i] = e;
296 @ return state if found, otherwise $-1$
299 static int state_lookup(HashTab * hashtab, const unsigned char *key)
301 int i;
302 HashEntry *e;
304 i = (int) (hnj_string_hash(key) % HASH_SIZE);
305 for (e = hashtab->entries[i]; e; e = e->next) {
306 if (!strcmp((const char *) key, (const char *) e->key)) {
307 return e->u.state;
310 return -1;
314 @ return state if found, otherwise $-1$
317 static char *hyppat_lookup(HashTab * hashtab, const unsigned char *chars, int l)
319 int i;
320 HashEntry *e;
321 unsigned char key[256]; /* should be ample */
322 strncpy((char *) key, (const char *) chars, (size_t) l);
323 key[l] = 0;
324 i = (int) (hnj_string_hash(key) % HASH_SIZE);
325 for (e = hashtab->entries[i]; e; e = e->next) {
326 if (!strcmp((char *) key, (char *) e->key)) {
327 return e->u.hyppat;
330 return NULL;
334 @ Get the state number, allocating a new state if necessary.
337 static int hnj_get_state(HyphenDict * dict,
338 const unsigned char *str, int *state_num)
340 *state_num = state_lookup(dict->state_num, str);
342 if (*state_num >= 0)
343 return *state_num;
345 state_insert(dict->state_num, hnj_strdup(str), dict->num_states);
346 /* predicate is true if |dict->num_states| is a power of two */
347 if (!(dict->num_states & (dict->num_states - 1))) {
348 dict->states = hnj_realloc(dict->states,
349 (int) ((dict->num_states << 1) *
350 (int) sizeof(HyphenState)));
352 dict->states[dict->num_states].match = NULL;
353 dict->states[dict->num_states].fallback_state = -1;
354 dict->states[dict->num_states].num_trans = 0;
355 dict->states[dict->num_states].trans = NULL;
356 return dict->num_states++;
360 @ Add a transition from state1 to state2 through ch - assumes that the
361 transition does not already exist
364 static void hnj_add_trans(HyphenDict * dict, int state1, int state2, int uni_ch)
366 int num_trans;
367 /* TH: this test was a bit too strict, it is quite normal for old
368 patterns to have chars in the range 0-31 or 127-159 (inclusive).
369 To ease the transition, let's only disallow NUL for now
370 (this is probably a requirement of the code anyway).
372 if (uni_ch == 0) {
373 char errmsg[256]; /* temp hack ... we will have a formatted error */
374 snprintf(errmsg, 255, "character out of bounds: u%04x", uni_ch);
375 errmsg[255] = '\0';
376 normal_error("hyphenation",errmsg); /* todo */
378 num_trans = dict->states[state1].num_trans;
379 if (num_trans == 0) {
380 dict->states[state1].trans = hnj_malloc(sizeof(HyphenTrans));
381 } else {
382 /* TH: The old version did
383 } else if (!(num_trans & (num_trans - 1))) {
384 ... hnj_realloc(dict->states[state1].trans,
385 (int) ((num_trans << 1) *
386 sizeof(HyphenTrans)));
387 but that is incredibly nasty when adding patters one-at-a-time.
388 Controlled growth would be nicer than the current +1, but if
389 noone complains, this is good enough ;)
391 dict->states[state1].trans = hnj_realloc(dict->states[state1].trans,
392 (int) ((num_trans + 1) *
393 sizeof(HyphenTrans)));
395 dict->states[state1].trans[num_trans].uni_ch = uni_ch;
396 dict->states[state1].trans[num_trans].new_state = state2;
397 dict->states[state1].num_trans++;
401 #ifdef VERBOSE
403 static unsigned char *get_state_str(int state)
405 int i;
406 HashEntry *e;
408 for (i = 0; i < HASH_SIZE; i++)
409 for (e = global->entries[i]; e; e = e->next)
410 if (e->u.state == state)
411 return e->key;
412 return NULL;
414 #endif
417 @ I've changed the semantics a bit here: |hnj_hyphen_load| used to
418 operate on a file, but now the argument is a string buffer.
421 static const unsigned char *next_pattern(size_t * length,
422 const unsigned char **buf)
424 const unsigned char *here, *rover = *buf;
425 while (*rover && isspace(*rover))
426 rover++;
427 here = rover;
428 while (*rover) {
429 if (isspace(*rover)) {
430 *length = (size_t) (rover - here);
431 *buf = rover;
432 return here;
434 rover++;
436 *length = (size_t) (rover - here);
437 *buf = rover;
438 return *length ? here : NULL; /* zero sensed */
441 static void init_hash(HashTab ** h)
443 int i;
444 if (*h)
445 return;
446 *h = hnj_malloc(sizeof(HashTab));
447 for (i = 0; i < HASH_SIZE; i++)
448 (*h)->entries[i] = NULL;
452 static void clear_state_hash(HashTab ** h)
454 int i;
455 if (*h == NULL)
456 return;
457 for (i = 0; i < HASH_SIZE; i++) {
458 HashEntry *e, *next;
459 for (e = (*h)->entries[i]; e; e = next) {
460 next = e->next;
461 hnj_free(e->key);
462 hnj_free(e);
465 hnj_free(*h);
466 *h = NULL;
470 static void clear_hyppat_hash(HashTab ** h)
472 int i;
473 if (*h == NULL)
474 return;
475 for (i = 0; i < HASH_SIZE; i++) {
476 HashEntry *e, *next;
477 for (e = (*h)->entries[i]; e; e = next) {
478 next = e->next;
479 hnj_free(e->key);
480 if (e->u.hyppat)
481 hnj_free(e->u.hyppat);
482 hnj_free(e);
485 hnj_free(*h);
486 *h = NULL;
490 static void init_dict(HyphenDict * dict)
492 dict->num_states = 1;
493 dict->pat_length = 0;
494 dict->states = hnj_malloc(sizeof(HyphenState));
495 dict->states[0].match = NULL;
496 dict->states[0].fallback_state = -1;
497 dict->states[0].num_trans = 0;
498 dict->states[0].trans = NULL;
499 dict->patterns = NULL;
500 dict->merged = NULL;
501 dict->state_num = NULL;
502 init_hash(&dict->patterns);
506 static void clear_dict(HyphenDict * dict)
508 int state_num;
509 for (state_num = 0; state_num < dict->num_states; state_num++) {
510 HyphenState *hstate = &dict->states[state_num];
511 if (hstate->match)
512 hnj_free(hstate->match);
513 if (hstate->trans)
514 hnj_free(hstate->trans);
516 hnj_free(dict->states);
517 clear_hyppat_hash(&dict->patterns);
518 clear_hyppat_hash(&dict->merged);
519 clear_state_hash(&dict->state_num);
524 HyphenDict *hnj_hyphen_new(void)
526 HyphenDict *dict = hnj_malloc(sizeof(HyphenDict));
527 init_dict(dict);
528 return dict;
532 void hnj_hyphen_clear(HyphenDict * dict)
534 clear_dict(dict);
535 init_dict(dict);
539 void hnj_hyphen_free(HyphenDict * dict)
541 clear_dict(dict);
542 hnj_free(dict);
545 unsigned char *hnj_serialize(HyphenDict * dict)
547 HashIter *v;
548 unsigned char *word;
549 char *pattern;
550 unsigned char *buf = hnj_malloc(dict->pat_length);
551 unsigned char *cur = buf;
552 v = new_HashIter(dict->patterns);
553 while (eachHash(v, &word, &pattern)) {
554 int i = 0, e = 0;
555 while (word[e + i]) {
556 if (pattern[i] != '0')
557 *cur++ = (unsigned char) pattern[i];
558 *cur++ = word[e + i++];
559 while (is_utf8_follow(word[e + i]))
560 *cur++ = word[i + e++];
562 if (pattern[i] != '0')
563 *cur++ = (unsigned char) pattern[i];
564 *cur++ = ' ';
566 delete_HashIter(v);
567 *cur = 0;
568 return buf;
572 void hnj_free_serialize(unsigned char *c)
574 hnj_free(c);
578 @ hyphenation pattern:
580 signed bytes
582 0 indicates end (actually any negative number)
584 : prio(1+),startpos,length,len1,[replace],len2,[replace]
586 most basic example is:
588 p n 0 0 0
590 for a hyphenation point between characters
594 void hnj_hyphen_load(HyphenDict * dict, const unsigned char *f)
596 int state_num, last_state;
597 int ch;
598 int found;
599 HashEntry *e;
600 HashIter *v;
601 unsigned char *word;
602 char *pattern;
603 size_t l = 0;
605 const unsigned char *format;
606 const unsigned char *begin = f;
607 unsigned char *pat;
608 char *org;
609 while ((format = next_pattern(&l, &f)) != NULL) {
610 int i, j, e1;
611 if (l>=255) {
612 help1("Individual patterns should not be longer than 254 bytes total.");
613 print_err("Pattern of enormous length ignored");
614 error();
615 continue;
617 #if 0
618 printf("%s\n",format);
619 char* repl = strnchr(format, '/',l);
620 int replindex = 0;
621 int replcut = 0;
622 if (repl) {
623 int clen = l-(repl-format);
624 l = repl-format;
625 char * index = strnchr(repl + 1, ',',clen);
626 if (index) {
627 char * index2 = strnchr(index + 1, ',',clen-(index-repl));
628 if (index2) {
629 replindex = (signed char) atoi(index + 1) - 1;
630 replcut = (signed char) atoi(index2 + 1);
632 } else {
633 hnj_strchomp(repl + 1);
634 replindex = 0;
635 replcut = strlen(buf);
637 repl = hnj_strdup(repl + 1);
639 #endif
640 for (i = 0, j = 0, e1 = 0; (unsigned) i < l; i++) {
641 if (format[i] >= '0' && format[i] <= '9')
642 j++;
643 if (is_utf8_follow(format[i]))
644 e1++;
646 /* |l-e1| => number of {\it characters} not {\it bytes} */
647 /* |l-j| => number of pattern bytes */
648 /* |l-e1-j| => number of pattern characters */
649 pat = (unsigned char *) malloc((1 + l - (size_t) j));
650 org = (char *) malloc((size_t) (2 + l - (size_t) e1 - (size_t) j));
651 /* remove hyphenation encoders (digits) from pat */
652 org[0] = '0';
653 for (i = 0, j = 0, e1 = 0; (unsigned) i < l; i++) {
654 unsigned char c = format[i];
655 if (is_utf8_follow(c)) {
656 pat[j + e1++] = c;
657 } else if (c < '0' || c > '9') {
658 pat[e1 + j++] = c;
659 org[j] = '0';
660 } else {
661 org[j] = (char) c;
664 pat[e1 + j] = 0;
665 org[j + 1] = 0;
666 hyppat_insert(dict->patterns, pat, org);
668 dict->pat_length += (int) ((f - begin) + 2); /* 2 for spurious spaces */
669 init_hash(&dict->merged);
670 v = new_HashIter(dict->patterns);
671 while (nextHash(v, &word)) {
672 int wordsize = (int) strlen((char *) word);
673 int j1, l1;
674 for (l1 = 1; l1 <= wordsize; l1++) {
675 if (is_utf8_follow(word[l1]))
676 continue; /* Do not clip an utf8 sequence */
677 for (j1 = 1; j1 <= l1; j1++) {
678 char *subpat_pat;
679 int i1 = l1 - j1;
680 if (is_utf8_follow(word[i1]))
681 continue; /* Do not start halfway an utf8 sequence */
682 if ((subpat_pat =
683 hyppat_lookup(dict->patterns, word + i1, j1)) != NULL) {
684 char *newpat_pat;
685 if ((newpat_pat =
686 hyppat_lookup(dict->merged, word, l1)) == NULL) {
687 char *neworg;
688 unsigned char *newword =
689 (unsigned char *) malloc((size_t) (l1 + 1));
690 int e1 = 0;
691 strncpy((char *) newword, (char *) word, (size_t) l1);
692 newword[l1] = 0;
693 for (i1 = 0; i1 < l1; i1++)
694 if (is_utf8_follow(newword[i1]))
695 e1++;
696 neworg = malloc((size_t) (l1 + 2 - e1));
697 sprintf(neworg, "%0*d", l1 + 1 - e1, 0); /* fill with right amount of '0' */
698 hyppat_insert(dict->merged, newword, combine(neworg, subpat_pat));
699 } else {
700 combine(newpat_pat, subpat_pat);
706 delete_HashIter(v);
708 init_hash(&dict->state_num);
709 state_insert(dict->state_num, hnj_strdup((const unsigned char *) ""), 0);
710 v = new_HashIter(dict->merged);
711 while (nextHashStealPattern(v, &word, &pattern)) {
712 static unsigned char mask[] = { 0x3F, 0x1F, 0xF, 0x7 };
713 int j1 = (int) strlen((char *) word);
714 #ifdef VERBOSE
715 printf("word %s pattern %s, j = %d\n", word, pattern, j1);
716 #endif
717 state_num = hnj_get_state(dict, word, &found);
718 dict->states[state_num].match = pattern;
720 /* now, put in the prefix transitions */
721 while (found < 0) {
722 j1--;
723 last_state = state_num;
724 ch = word[j1];
725 if (ch >= 0x80) {
726 int m;
727 int i1 = 1;
728 while (is_utf8_follow(word[j1 - i1]))
729 i1++;
730 ch = word[j1 - i1] & mask[i1];
731 m = j1 - i1;
732 while (i1--) {
733 ch = (ch << 6) + (0x3F & word[j1 - i1]);
735 j1 = m;
737 word[j1] = '\0';
738 state_num = hnj_get_state(dict, word, &found);
739 hnj_add_trans(dict, state_num, last_state, ch);
742 delete_HashIter(v);
743 clear_hyppat_hash(&dict->merged);
745 /* put in the fallback states */
747 int i, j = 0;
748 for (i = 0; i < HASH_SIZE; i++) {
749 for (e = dict->state_num->entries[i]; e; e = e->next) {
750 /* do not do state==0 otherwise things get confused */
751 if (e->u.state) {
752 for (j = 1; 1; j++) {
753 state_num = state_lookup(dict->state_num, e->key + j);
754 if (state_num >= 0)
755 break;
757 dict->states[e->u.state].fallback_state = state_num;
761 #ifdef VERBOSE
762 for (i = 0; i < HASH_SIZE; i++) {
763 for (e = dict->state_num->entries[i]; e; e = e->next) {
764 printf("%d string %s state %d, fallback=%d\n",
765 i, e->key, e->u.state, dict->states[e->u.state].fallback_state);
766 for (j = 0; j < dict->states[e->u.state].num_trans; j++) {
767 printf(" u%4x->%d\n",
768 (int) dict->states[e->u.state].trans[j].uni_ch,
769 dict->states[e->u.state].trans[j].new_state);
773 #endif
775 clear_state_hash(&dict->state_num);
778 @ @c
779 void hnj_hyphen_hyphenate(HyphenDict * dict,
780 halfword first1,
781 halfword last1,
782 int length,
783 halfword left, halfword right, lang_variables * lan)
785 int char_num;
786 halfword here;
787 int state = 0;
788 /* +2 for dots at each end, +1 for points /outside/ characters */
789 int ext_word_len = length + 2;
790 int hyphen_len = ext_word_len + 1;
791 char *hyphens = hnj_malloc(hyphen_len + 1);
793 /* Add a '.' to beginning and end to facilitate matching */
794 set_vlink(begin_point, first1);
795 set_vlink(end_point, get_vlink(last1));
796 set_vlink(last1, end_point);
798 for (char_num = 0; char_num < hyphen_len; char_num++) {
799 hyphens[char_num] = '0';
801 hyphens[hyphen_len] = 0;
803 /* now, run the finite state machine */
804 for (char_num = 0, here = begin_point; here != get_vlink(end_point);
805 here = get_vlink(here)) {
807 int ch;
808 if (here == begin_point || here == end_point)
809 ch = '.';
810 else
811 ch = get_lc_code(get_character(here));
812 while (state != -1) {
813 #if 0
814 printf("%*s%s%c",char_num-strlen(get_state_str(state)),"",get_state_str(state),(char)ch);
815 #endif
816 HyphenState *hstate = &dict->states[state];
817 int k;
818 for (k = 0; k < hstate->num_trans; k++) {
819 if (hstate->trans[k].uni_ch == ch) {
820 char *match;
821 state = hstate->trans[k].new_state;
822 #if 0
823 printf(" state %d\n",state);
824 #endif
825 match = dict->states[state].match;
826 if (match) {
827 /* +2 because:
828 1 string length is one bigger than offset
829 1 hyphenation starts before first character
831 int offset = (int) (char_num + 2 - (int) strlen(match));
832 #if 0
833 printf("%*s%s\n", offset,"", match);
834 #endif
835 int m;
836 for (m = 0; match[m]; m++) {
837 if (hyphens[offset + m] < match[m])
838 hyphens[offset + m] = match[m];
841 goto try_next_letter;
844 state = hstate->fallback_state;
845 #if 0
846 printf(" back to %d\n", state);
847 #endif
849 /* nothing worked, let's go to the next character */
850 state = 0;
851 try_next_letter:;
852 char_num++;
855 /* restore the correct pointers */
856 set_vlink(last1, get_vlink(end_point));
858 /* pattern is \.{\^.\^w\^o\^r\^d\^.\^} |word_len|=4, |ext_word_len|=6, |hyphens|=7
859 * check \.{ \^ \^ \^ } so drop first two and stop after |word_len-1|
861 for (here = first1, char_num = 2; here != left; here = get_vlink(here))
862 char_num++;
863 for (; here != right; here = get_vlink(here)) {
864 if (hyphens[char_num] & 1)
865 here = insert_syllable_discretionary(here, lan);
866 char_num++;
868 hnj_free(hyphens);