boundary nodes made consistent (cleanup and document): WARNING: bump the format numbe...
[luatex.git] / source / texk / web2c / luatexdir / tex / dumpdata.w
blob993ad8a16c02b30d236c9f5b7a865ff151e961f0
1 % dumpdata.w
3 % Copyright 2009-2010 Taco Hoekwater <taco@@luatex.org>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software; you can redistribute it and/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation; either version 2 of the License, or (at your
10 % option) any later version.
12 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
13 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
20 @ @c
23 #include "ptexlib.h"
25 #define font_id_text(A) cs_text(font_id_base+(A))
26 #define prev_depth cur_list.prev_depth_field
28 /* we start with 907: the sum of the values of the bytes of "don knuth" */
30 #define FORMAT_ID (907+15)
31 #if ((FORMAT_ID>=0) && (FORMAT_ID<=256))
32 #error Wrong value for FORMAT_ID.
33 #endif
36 @ After \.{INITEX} has seen a collection of fonts and macros, it
37 can write all the necessary information on an auxiliary file so
38 that production versions of \TeX\ are able to initialize their
39 memory at high speed. The present section of the program takes
40 care of such output and input. We shall consider simultaneously
41 the processes of storing and restoring,
42 so that the inverse relation between them is clear.
43 @.INITEX@>
45 The global variable |format_ident| is a string that is printed right
46 after the |banner| line when \TeX\ is ready to start. For \.{INITEX} this
47 string says simply `\.{(INITEX)}'; for other versions of \TeX\ it says,
48 for example, `\.{(preloaded format=plain 1982.11.19)}', showing the year,
49 month, and day that the format file was created. We have |format_ident=0|
50 before \TeX's tables are loaded. |FORMAT_ID| is a new field of type int
51 suitable for the identification of a format: values between 0 and 256
52 (included) can not be used because in the previous format they are used
53 for the length of the name of the engine.
55 str_number format_ident;
56 str_number format_name; /* principal file name */
59 @ Format files consist of |memory_word| items, and we use the following
60 macros to dump words of different types:
63 FILE *fmt_file; /* for input or output of format information */
65 @ @c
66 void store_fmt_file(void)
68 int j, k, l; /* all-purpose indices */
69 halfword p; /* all-purpose pointer */
70 int x; /* something to dump */
71 char *format_engine;
72 int callback_id; /* |pre_dump| callback */
73 char *fmtname = NULL;
74 /* If dumping is not allowed, abort */
75 /* The user is not allowed to dump a format file unless |save_ptr=0|.
76 This condition implies that |cur_level=level_one|, hence
77 the |xeq_level| array is constant and it need not be dumped. */
78 if (save_ptr != 0) {
79 print_err("You can't dump inside a group");
80 help1("`{...\\dump}' is a no-no.");
81 succumb();
84 /* Create the |format_ident|, open the format file, and inform the user
85 that dumping has begun */
86 callback_id = callback_defined(pre_dump_callback);
87 if (callback_id > 0) {
88 (void) run_callback(callback_id, "->");
90 selector = new_string;
91 tprint(" (format=");
92 print(job_name);
93 print_char(' ');
94 print_int(int_par(year_code));
95 print_char('.');
96 print_int(int_par(month_code));
97 print_char('.');
98 print_int(int_par(day_code));
99 print_char(')');
100 str_room(2);
101 format_ident = make_string();
102 print(job_name);
103 format_name = make_string();
104 if (interaction == batch_mode)
105 selector = log_only;
106 else
107 selector = term_and_log;
109 fmtname = pack_job_name(format_extension);
110 while (!zopen_w_output(&fmt_file, fmtname, FOPEN_WBIN_MODE)) {
111 fmtname = prompt_file_name("format file name", format_extension);
113 tprint_nl("Beginning to dump on file ");
114 tprint(fmtname);
115 free(fmtname);
116 tprint_nl("");
117 print(format_ident);
119 /* Dump constants for consistency check */
120 /* The next few sections of the program should make it clear how we use the
121 dump/undump macros. */
123 dump_int(0x57325458); /* Web2C \TeX's magic constant: "W2TX" */
124 dump_int(FORMAT_ID);
126 /* Align engine to 4 bytes with one or more trailing NUL */
127 x = (int) strlen(engine_name);
128 format_engine = xmalloc((unsigned) (x + 4));
129 strcpy(format_engine, engine_name);
130 for (k = x; k <= x + 3; k++)
131 format_engine[k] = 0;
132 x = x + 4 - (x % 4);
133 dump_int(x);
134 dump_things(format_engine[0], x);
135 xfree(format_engine);
136 dump_int(0x57325458); /* TODO HM, what checksum would make sense? */
137 dump_int(max_halfword);
138 dump_int(hash_high);
139 dump_int(eqtb_size);
140 dump_int(hash_prime);
142 /* Dump the string pool */
143 k = dump_string_pool();
144 print_ln();
145 print_int(k);
146 tprint(" strings using ");
147 print_int((longinteger) pool_size);
148 tprint(" bytes");
150 /* Dump the dynamic memory */
151 /* By sorting the list of available spaces in the variable-size portion of
152 |mem|, we are usually able to get by without having to dump very much
153 of the dynamic memory.
155 We recompute |var_used| and |dyn_used|, so that \.{INITEX} dumps valid
156 information even when it has not been gathering statistics.
158 dump_node_mem();
159 dump_int(temp_token_head);
160 dump_int(hold_token_head);
161 dump_int(omit_template);
162 dump_int(null_list);
163 dump_int(backup_head);
164 dump_int(garbage);
165 x = (int) fix_mem_min;
166 dump_int(x);
167 x = (int) fix_mem_max;
168 dump_int(x);
169 x = (int) fix_mem_end;
170 dump_int(x);
171 dump_int(avail);
172 dyn_used = (int) fix_mem_end + 1;
173 dump_things(fixmem[fix_mem_min], fix_mem_end - fix_mem_min + 1);
174 x = x + (int) (fix_mem_end + 1 - fix_mem_min);
175 p = avail;
176 while (p != null) {
177 decr(dyn_used);
178 p = token_link(p);
180 dump_int(dyn_used);
181 print_ln();
182 print_int(x);
183 tprint(" memory locations dumped; current usage is ");
184 print_int(var_used);
185 print_char('&');
186 print_int(dyn_used);
188 /* Dump the table of equivalents */
189 /* Dump regions 1 to 4 of |eqtb| */
190 /*The table of equivalents usually contains repeated information, so we dump it
191 in compressed form: The sequence of $n+2$ values $(n,x_1,\ldots,x_n,m)$ in the
192 format file represents $n+m$ consecutive entries of |eqtb|, with |m| extra
193 copies of $x_n$, namely $(x_1,\ldots,x_n,x_n,\ldots,x_n)$.
195 k = null_cs;
196 do {
197 j = k;
198 while (j < int_base - 1) {
199 if ((equiv(j) == equiv(j + 1)) && (eq_type(j) == eq_type(j + 1)) &&
200 (eq_level(j) == eq_level(j + 1)))
201 goto FOUND1;
202 incr(j);
204 l = int_base;
205 goto DONE1; /* |j=int_base-1| */
206 FOUND1:
207 incr(j);
208 l = j;
209 while (j < int_base - 1) {
210 if ((equiv(j) != equiv(j + 1)) || (eq_type(j) != eq_type(j + 1)) ||
211 (eq_level(j) != eq_level(j + 1)))
212 goto DONE1;
213 incr(j);
215 DONE1:
216 dump_int(l - k);
217 dump_things(eqtb[k], l - k);
218 k = j + 1;
219 dump_int(k - l);
220 } while (k != int_base);
222 /* Dump regions 5 and 6 of |eqtb| */
223 do {
224 j = k;
225 while (j < eqtb_size) {
226 if (eqtb[j].cint == eqtb[j + 1].cint)
227 goto FOUND2;
228 incr(j);
230 l = eqtb_size + 1;
231 goto DONE2; /* |j=eqtb_size| */
232 FOUND2:
233 incr(j);
234 l = j;
235 while (j < eqtb_size) {
236 if (eqtb[j].cint != eqtb[j + 1].cint)
237 goto DONE2;
238 incr(j);
240 DONE2:
241 dump_int(l - k);
242 dump_things(eqtb[k], l - k);
243 k = j + 1;
244 dump_int(k - l);
245 } while (k <= eqtb_size);
246 if (hash_high > 0)
247 dump_things(eqtb[eqtb_size + 1], hash_high); /* dump |hash_extra| part */
249 dump_int(par_loc);
250 dump_int(write_loc);
251 dump_math_codes();
252 dump_text_codes();
253 /* Dump the hash table */
254 /* A different scheme is used to compress the hash table, since its lower
255 region is usually sparse. When |text(p)<>0| for |p<=hash_used|, we output
256 two words, |p| and |hash[p]|. The hash table is, of course, densely packed
257 for |p>=hash_used|, so the remaining entries are output in a~block.
259 dump_primitives();
260 dump_int(hash_used);
261 cs_count = frozen_control_sequence - 1 - hash_used + hash_high;
262 for (p = hash_base; p <= hash_used; p++) {
263 if (cs_text(p) != 0) {
264 dump_int(p);
265 dump_hh(hash[p]);
266 incr(cs_count);
269 dump_things(hash[hash_used + 1],
270 undefined_control_sequence - 1 - hash_used);
271 if (hash_high > 0)
272 dump_things(hash[eqtb_size + 1], hash_high);
273 dump_int(cs_count);
274 print_ln();
275 print_int(cs_count);
276 tprint(" multiletter control sequences");
278 /* Dump the font information */
279 dump_int(max_font_id());
280 for (k = 0; k <= max_font_id(); k++) {
281 /* Dump the array info for internal font number |k| */
282 dump_font(k);
283 tprint_nl("\\font");
284 print_esc(font_id_text(k));
285 print_char('=');
286 tprint_file_name((unsigned char *) font_name(k),
287 (unsigned char *) font_area(k), NULL);
288 if (font_size(k) != font_dsize(k)) {
289 tprint(" at ");
290 print_scaled(font_size(k));
291 tprint("pt");
294 print_ln();
295 print_int(max_font_id());
296 tprint(" preloaded font");
297 if (max_font_id() != 1)
298 print_char('s');
299 dump_math_data();
301 /* Dump the hyphenation tables */
302 dump_language_data();
304 /* Dump a couple more things and the closing check word */
305 dump_int(interaction);
306 dump_int(format_ident);
307 dump_int(format_name);
308 dump_int(69069);
309 /* We have already printed a lot of statistics, so we set |tracing_stats:=0|
310 to prevent them from appearing again. */
311 int_par(tracing_stats_code) = 0;
313 /* Dump the lua bytecodes */
314 dump_luac_registers();
316 /* Close the format file */
317 zwclose(fmt_file);
320 @ Corresponding to the procedure that dumps a format file, we have a function
321 that reads one in. The function returns |false| if the dumped format is
322 incompatible with the present \TeX\ table sizes, etc.
325 #define too_small(A) do { \
326 wake_up_terminal(); \
327 wterm_cr(); \
328 fprintf(term_out,"---! Must increase the %s",(A)); \
329 goto BAD_FMT; \
330 } while (0)
332 @ The inverse macros are slightly more complicated, since we need to check
333 the range of the values we are reading in. We say `|undump(a)(b)(x)|' to
334 read an integer value |x| that is supposed to be in the range |a<=x<=b|.
337 #define undump(A,B,C) do { \
338 undump_int(x); \
339 if (x<(A) || x>(B)) goto BAD_FMT; \
340 else (C) = x; \
341 } while (0)
344 #define format_debug(A,B) do { \
345 if (debug_format_file) { \
346 fprintf (stderr, "fmtdebug: %s=%d", (A), (int)(B)); \
348 } while (0)
350 #define undump_size(A,B,C,D) do { \
351 undump_int(x); \
352 if (x<(A)) goto BAD_FMT; \
353 if (x>(B)) too_small(C); \
354 else format_debug (C,x); \
355 (D) = x; \
356 } while (0)
359 @ @c
360 boolean load_fmt_file(const char *fmtname)
362 int j, k; /* all-purpose indices */
363 halfword p; /* all-purpose pointer */
364 int x; /* something undumped */
365 char *format_engine;
366 /* Undump constants for consistency check */
367 if (ini_version) {
368 libcfree(hash);
369 libcfree(eqtb);
370 libcfree(fixmem);
371 libcfree(varmem);
373 undump_int(x);
374 format_debug("format magic number", x);
375 if (x != 0x57325458)
376 goto BAD_FMT; /* not a format file */
378 undump_int(x);
379 format_debug("format id", x);
380 if (x != FORMAT_ID)
381 goto BAD_FMT; /* FORMAT_ID mismatch */
383 undump_int(x);
384 format_debug("engine name size", x);
385 if ((x < 0) || (x > 256))
386 goto BAD_FMT; /* corrupted format file */
388 format_engine = xmalloc((unsigned) x);
389 undump_things(format_engine[0], x);
390 format_engine[x - 1] = 0; /* force string termination, just in case */
391 if (strcmp(engine_name, format_engine)) {
392 wake_up_terminal();
393 wterm_cr();
394 fprintf(term_out, "---! %s was written by %s", fmtname, format_engine);
395 xfree(format_engine);
396 goto BAD_FMT;
398 xfree(format_engine);
399 undump_int(x);
400 format_debug("string pool checksum", x);
401 if (x != 0x57325458) { /* todo: @@\$ *//* check that strings are the same */
402 wake_up_terminal();
403 wterm_cr();
404 fprintf(term_out, "---! %s was written by a different version",
405 fmtname);
406 goto BAD_FMT;
408 undump_int(x);
409 if (x != max_halfword)
410 goto BAD_FMT; /* check |max_halfword| */
411 undump_int(hash_high);
412 if ((hash_high < 0) || (hash_high > sup_hash_extra))
413 goto BAD_FMT;
414 if (hash_extra < hash_high)
415 hash_extra = hash_high;
416 eqtb_top = eqtb_size + hash_extra;
417 if (hash_extra == 0)
418 hash_top = undefined_control_sequence;
419 else
420 hash_top = eqtb_top;
421 hash = xmallocarray(two_halves, (unsigned) (1 + hash_top));
422 memset(hash, 0, sizeof(two_halves) * (unsigned) (hash_top + 1));
423 eqtb = xmallocarray(memory_word, (unsigned) (eqtb_top + 1));
424 set_eq_type(undefined_control_sequence, undefined_cs_cmd);
425 set_equiv(undefined_control_sequence, null);
426 set_eq_level(undefined_control_sequence, level_zero);
427 for (x = eqtb_size + 1; x <= eqtb_top; x++)
428 eqtb[x] = eqtb[undefined_control_sequence];
429 undump_int(x);
430 if (x != eqtb_size)
431 goto BAD_FMT;
432 undump_int(x);
433 if (x != hash_prime)
434 goto BAD_FMT;
436 /* Undump the string pool */
437 str_ptr = undump_string_pool();
438 /* Undump the dynamic memory */
439 undump_node_mem();
440 undump_int(temp_token_head);
441 undump_int(hold_token_head);
442 undump_int(omit_template);
443 undump_int(null_list);
444 undump_int(backup_head);
445 undump_int(garbage);
446 undump_int(fix_mem_min);
447 undump_int(fix_mem_max);
448 fixmem = xmallocarray(smemory_word, fix_mem_max + 1);
449 memset(voidcast(fixmem), 0, (fix_mem_max + 1) * sizeof(smemory_word));
450 undump_int(fix_mem_end);
451 undump_int(avail);
452 undump_things(fixmem[fix_mem_min], fix_mem_end - fix_mem_min + 1);
453 undump_int(dyn_used);
455 /* Undump the table of equivalents */
456 /* Undump regions 1 to 6 of |eqtb| */
457 k = null_cs;
458 do {
459 undump_int(x);
460 if ((x < 1) || (k + x > eqtb_size + 1))
461 goto BAD_FMT;
462 undump_things(eqtb[k], x);
463 k = k + x;
464 undump_int(x);
465 if ((x < 0) || (k + x > eqtb_size + 1))
466 goto BAD_FMT;
467 for (j = k; j <= k + x - 1; j++)
468 eqtb[j] = eqtb[k - 1];
469 k = k + x;
470 } while (k <= eqtb_size);
471 if (hash_high > 0) /* undump |hash_extra| part */
472 undump_things(eqtb[eqtb_size + 1], hash_high);
474 undump(hash_base, hash_top, par_loc);
475 par_token = cs_token_flag + par_loc;
476 undump(hash_base, hash_top, write_loc);
477 undump_math_codes();
478 undump_text_codes();
479 /* Undump the hash table */
480 undump_primitives();
481 undump(hash_base, frozen_control_sequence, hash_used);
482 p = hash_base - 1;
483 do {
484 undump(p + 1, hash_used, p);
485 undump_hh(hash[p]);
486 } while (p != hash_used);
487 undump_things(hash[hash_used + 1],
488 undefined_control_sequence - 1 - hash_used);
489 if (debug_format_file)
490 print_csnames(hash_base, undefined_control_sequence - 1);
491 if (hash_high > 0) {
492 undump_things(hash[eqtb_size + 1], hash_high);
493 if (debug_format_file)
494 print_csnames(eqtb_size + 1, hash_high - (eqtb_size + 1));
496 undump_int(cs_count);
498 /* Undump the font information */
499 undump_int(x);
500 set_max_font_id(x);
501 for (k = 0; k <= max_font_id(); k++) {
502 /* Undump the array info for internal font number |k| */
503 undump_font(k);
505 undump_math_data();
507 /* Undump the hyphenation tables */
508 undump_language_data();
510 /* Undump a couple more things and the closing check word */
511 undump(batch_mode, error_stop_mode, interaction);
512 if (interactionoption != unspecified_mode)
513 interaction = interactionoption;
514 undump(0, str_ptr, format_ident);
515 undump(0, str_ptr, format_name);
516 undump_int(x);
517 if (x != 69069)
518 goto BAD_FMT;
520 /* Undump the lua bytecodes */
521 undump_luac_registers();
523 prev_depth = ignore_depth;
524 return true; /* it worked! */
525 BAD_FMT:
526 wake_up_terminal();
527 wterm_cr();
528 fprintf(term_out, "(Fatal format file error; I'm stymied)");
529 return false;