sync with experimental
[luatex.git] / source / texk / web2c / luatexdir / tex / dumpdata.w
blob85ff55f4e09915aa3f6888d4180c2f6f3426168a
1 % dumpdata.w
3 % Copyright 2009-2010 Taco Hoekwater <taco@@luatex.org>
5 % This file is part of LuaTeX.
7 % LuaTeX is free software; you can redistribute it and/or modify it under
8 % the terms of the GNU General Public License as published by the Free
9 % Software Foundation; either version 2 of the License, or (at your
10 % option) any later version.
12 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
13 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 % FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 % License for more details.
17 % You should have received a copy of the GNU General Public License along
18 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
20 @ @c
22 #include "ptexlib.h"
24 /* we start with 907: the sum of the values of the bytes of "don knuth" */
26 #define FORMAT_ID (907+18)
27 #if ((FORMAT_ID>=0) && (FORMAT_ID<=256))
28 #error Wrong value for FORMAT_ID.
29 #endif
32 @ After \.{INITEX} has seen a collection of fonts and macros, it
33 can write all the necessary information on an auxiliary file so
34 that production versions of \TeX\ are able to initialize their
35 memory at high speed. The present section of the program takes
36 care of such output and input. We shall consider simultaneously
37 the processes of storing and restoring,
38 so that the inverse relation between them is clear.
39 @.INITEX@>
41 The global variable |format_ident| is a string that is printed right
42 after the |banner| line when \TeX\ is ready to start. For \.{INITEX} this
43 string says simply `\.{(INITEX)}'; for other versions of \TeX\ it says,
44 for example, `\.{(preloaded format=plain 1982.11.19)}', showing the year,
45 month, and day that the format file was created. We have |format_ident=0|
46 before \TeX's tables are loaded. |FORMAT_ID| is a new field of type int
47 suitable for the identification of a format: values between 0 and 256
48 (included) can not be used because in the previous format they are used
49 for the length of the name of the engine.
51 str_number format_ident;
52 str_number format_name; /* principal file name */
55 @ Format files consist of |memory_word| items, and we use the following
56 macros to dump words of different types:
59 FILE *fmt_file; /* for input or output of format information */
61 @ @c
62 void store_fmt_file(void)
64 int j, k, l; /* all-purpose indices */
65 halfword p; /* all-purpose pointer */
66 int x; /* something to dump */
67 char *format_engine;
68 int callback_id; /* |pre_dump| callback */
69 char *fmtname = NULL;
70 /* If dumping is not allowed, abort */
71 /* The user is not allowed to dump a format file unless |save_ptr=0|.
72 This condition implies that |cur_level=level_one|, hence
73 the |xeq_level| array is constant and it need not be dumped. */
74 if (save_ptr != 0) {
75 print_err("You can't dump inside a group");
76 help1("`{...\\dump}' is a no-no.");
77 succumb();
80 /* Create the |format_ident|, open the format file, and inform the user
81 that dumping has begun */
82 callback_id = callback_defined(pre_dump_callback);
83 if (callback_id > 0) {
84 (void) run_callback(callback_id, "->");
86 selector = new_string;
87 tprint(" (format=");
88 print(job_name);
89 print_char(' ');
90 print_int(year_par);
91 print_char('.');
92 print_int(month_par);
93 print_char('.');
94 print_int(day_par);
95 print_char(')');
96 str_room(2);
97 format_ident = make_string();
98 print(job_name);
99 format_name = make_string();
100 if (interaction == batch_mode)
101 selector = log_only;
102 else
103 selector = term_and_log;
105 fmtname = pack_job_name(format_extension);
106 while (!zopen_w_output(&fmt_file, fmtname, FOPEN_WBIN_MODE)) {
107 fmtname = prompt_file_name("format file name", format_extension);
109 tprint_nl("Beginning to dump on file ");
110 tprint(fmtname);
111 free(fmtname);
112 tprint_nl("");
113 print(format_ident);
115 /* Dump constants for consistency check */
116 /* The next few sections of the program should make it clear how we use the
117 dump/undump macros. */
119 dump_int(0x57325458); /* Web2C \TeX's magic constant: "W2TX" */
120 dump_int(FORMAT_ID);
122 /* Align engine to 4 bytes with one or more trailing NUL */
123 x = (int) strlen(engine_name);
124 format_engine = xmalloc((unsigned) (x + 4));
125 strcpy(format_engine, engine_name);
126 for (k = x; k <= x + 3; k++)
127 format_engine[k] = 0;
128 x = x + 4 - (x % 4);
129 dump_int(x);
130 dump_things(format_engine[0], x);
131 xfree(format_engine);
132 dump_int(0x57325458); /* TODO HM, what checksum would make sense? */
133 dump_int(max_halfword);
134 dump_int(hash_high);
135 dump_int(eqtb_size);
136 dump_int(hash_prime);
138 /* Dump the string pool */
139 k = dump_string_pool();
140 print_ln();
141 print_int(k);
142 tprint(" strings using ");
143 print_int((longinteger) pool_size);
144 tprint(" bytes");
146 /* Dump the dynamic memory */
147 /* By sorting the list of available spaces in the variable-size portion of
148 |mem|, we are usually able to get by without having to dump very much
149 of the dynamic memory.
151 We recompute |var_used| and |dyn_used|, so that \.{INITEX} dumps valid
152 information even when it has not been gathering statistics.
154 dump_node_mem();
155 dump_int(temp_token_head);
156 dump_int(hold_token_head);
157 dump_int(omit_template);
158 dump_int(null_list);
159 dump_int(backup_head);
160 dump_int(garbage);
161 x = (int) fix_mem_min;
162 dump_int(x);
163 x = (int) fix_mem_max;
164 dump_int(x);
165 x = (int) fix_mem_end;
166 dump_int(x);
167 dump_int(avail);
168 dyn_used = (int) fix_mem_end + 1;
169 dump_things(fixmem[fix_mem_min], fix_mem_end - fix_mem_min + 1);
170 x = x + (int) (fix_mem_end + 1 - fix_mem_min);
171 p = avail;
172 while (p != null) {
173 decr(dyn_used);
174 p = token_link(p);
176 dump_int(dyn_used);
177 print_ln();
178 print_int(x);
179 tprint(" memory locations dumped; current usage is ");
180 print_int(var_used);
181 print_char('&');
182 print_int(dyn_used);
184 /* Dump the table of equivalents */
185 /* Dump regions 1 to 4 of |eqtb| */
186 /*The table of equivalents usually contains repeated information, so we dump it
187 in compressed form: The sequence of $n+2$ values $(n,x_1,\ldots,x_n,m)$ in the
188 format file represents $n+m$ consecutive entries of |eqtb|, with |m| extra
189 copies of $x_n$, namely $(x_1,\ldots,x_n,x_n,\ldots,x_n)$.
191 k = null_cs;
192 do {
193 j = k;
194 while (j < int_base - 1) {
195 if ((equiv(j) == equiv(j + 1)) && (eq_type(j) == eq_type(j + 1)) &&
196 (eq_level(j) == eq_level(j + 1)))
197 goto FOUND1;
198 incr(j);
200 l = int_base;
201 goto DONE1; /* |j=int_base-1| */
202 FOUND1:
203 incr(j);
204 l = j;
205 while (j < int_base - 1) {
206 if ((equiv(j) != equiv(j + 1)) || (eq_type(j) != eq_type(j + 1)) ||
207 (eq_level(j) != eq_level(j + 1)))
208 goto DONE1;
209 incr(j);
211 DONE1:
212 dump_int(l - k);
213 dump_things(eqtb[k], l - k);
214 k = j + 1;
215 dump_int(k - l);
216 } while (k != int_base);
218 /* Dump regions 5 and 6 of |eqtb| */
219 do {
220 j = k;
221 while (j < eqtb_size) {
222 if (eqtb[j].cint == eqtb[j + 1].cint)
223 goto FOUND2;
224 incr(j);
226 l = eqtb_size + 1;
227 goto DONE2; /* |j=eqtb_size| */
228 FOUND2:
229 incr(j);
230 l = j;
231 while (j < eqtb_size) {
232 if (eqtb[j].cint != eqtb[j + 1].cint)
233 goto DONE2;
234 incr(j);
236 DONE2:
237 dump_int(l - k);
238 dump_things(eqtb[k], l - k);
239 k = j + 1;
240 dump_int(k - l);
241 } while (k <= eqtb_size);
242 if (hash_high > 0)
243 dump_things(eqtb[eqtb_size + 1], hash_high); /* dump |hash_extra| part */
245 dump_int(par_loc);
246 dump_int(write_loc);
247 dump_math_codes();
248 dump_text_codes();
249 /* Dump the hash table */
250 /* A different scheme is used to compress the hash table, since its lower
251 region is usually sparse. When |text(p)<>0| for |p<=hash_used|, we output
252 two words, |p| and |hash[p]|. The hash table is, of course, densely packed
253 for |p>=hash_used|, so the remaining entries are output in a~block.
255 dump_primitives();
256 dump_int(hash_used);
257 cs_count = frozen_control_sequence - 1 - hash_used + hash_high;
258 for (p = hash_base; p <= hash_used; p++) {
259 if (cs_text(p) != 0) {
260 dump_int(p);
261 dump_hh(hash[p]);
262 incr(cs_count);
265 dump_things(hash[hash_used + 1],
266 undefined_control_sequence - 1 - hash_used);
267 if (hash_high > 0)
268 dump_things(hash[eqtb_size + 1], hash_high);
269 dump_int(cs_count);
270 print_ln();
271 print_int(cs_count);
272 tprint(" multiletter control sequences");
274 /* Dump the font information */
275 dump_int(max_font_id());
276 for (k = 0; k <= max_font_id(); k++) {
277 /* Dump the array info for internal font number |k| */
278 dump_font(k);
279 tprint_nl("\\font");
280 print_esc(font_id_text(k));
281 print_char('=');
282 tprint_file_name((unsigned char *) font_name(k),
283 (unsigned char *) font_area(k), NULL);
284 if (font_size(k) != font_dsize(k)) {
285 tprint(" at ");
286 print_scaled(font_size(k));
287 tprint("pt");
290 print_ln();
291 print_int(max_font_id());
292 tprint(" preloaded font");
293 if (max_font_id() != 1)
294 print_char('s');
295 dump_math_data();
297 /* Dump the hyphenation tables */
298 dump_language_data();
300 /* Dump a couple more things and the closing check word */
301 dump_int(interaction);
302 dump_int(format_ident);
303 dump_int(format_name);
304 dump_int(69069);
305 /* We have already printed a lot of statistics, so we set |tracing_stats:=0|
306 to prevent them from appearing again. */
307 tracing_stats_par = 0;
309 /* Dump the lua bytecodes */
310 dump_luac_registers();
312 /* Close the format file */
313 zwclose(fmt_file);
316 @ Corresponding to the procedure that dumps a format file, we have a function
317 that reads one in. The function returns |false| if the dumped format is
318 incompatible with the present \TeX\ table sizes, etc.
321 #define too_small(A) do { \
322 wake_up_terminal(); \
323 wterm_cr(); \
324 fprintf(term_out,"---! Must increase the %s",(A)); \
325 goto BAD_FMT; \
326 } while (0)
328 @ The inverse macros are slightly more complicated, since we need to check
329 the range of the values we are reading in. We say `|undump(a)(b)(x)|' to
330 read an integer value |x| that is supposed to be in the range |a<=x<=b|.
333 #define undump(A,B,C) do { \
334 undump_int(x); \
335 if (x<(A) || x>(B)) goto BAD_FMT; \
336 else (C) = x; \
337 } while (0)
340 #define format_debug(A,B) do { \
341 if (debug_format_file) { \
342 fprintf (stderr, "fmtdebug: %s=%d", (A), (int)(B)); \
344 } while (0)
346 #define undump_size(A,B,C,D) do { \
347 undump_int(x); \
348 if (x<(A)) goto BAD_FMT; \
349 if (x>(B)) too_small(C); \
350 else format_debug (C,x); \
351 (D) = x; \
352 } while (0)
355 @ @c
356 boolean load_fmt_file(const char *fmtname)
358 int j, k; /* all-purpose indices */
359 halfword p; /* all-purpose pointer */
360 int x; /* something undumped */
361 char *format_engine;
362 /* Undump constants for consistency check */
363 if (ini_version) {
364 libcfree(hash);
365 libcfree(eqtb);
366 libcfree(fixmem);
367 libcfree(varmem);
369 undump_int(x);
370 format_debug("format magic number", x);
371 if (x != 0x57325458)
372 goto BAD_FMT; /* not a format file */
374 undump_int(x);
375 format_debug("format id", x);
376 if (x != FORMAT_ID)
377 goto BAD_FMT; /* FORMAT_ID mismatch */
379 undump_int(x);
380 format_debug("engine name size", x);
381 if ((x < 0) || (x > 256))
382 goto BAD_FMT; /* corrupted format file */
384 format_engine = xmalloc((unsigned) x);
385 undump_things(format_engine[0], x);
386 format_engine[x - 1] = 0; /* force string termination, just in case */
387 if (strcmp(engine_name, format_engine)) {
388 wake_up_terminal();
389 wterm_cr();
390 fprintf(term_out, "---! %s was written by %s", fmtname, format_engine);
391 xfree(format_engine);
392 goto BAD_FMT;
394 xfree(format_engine);
395 undump_int(x);
396 format_debug("string pool checksum", x);
397 if (x != 0x57325458) { /* todo: @@\$ *//* check that strings are the same */
398 wake_up_terminal();
399 wterm_cr();
400 fprintf(term_out, "---! %s was written by a different version",
401 fmtname);
402 goto BAD_FMT;
404 undump_int(x);
405 if (x != max_halfword)
406 goto BAD_FMT; /* check |max_halfword| */
407 undump_int(hash_high);
408 if ((hash_high < 0) || (hash_high > sup_hash_extra))
409 goto BAD_FMT;
410 if (hash_extra < hash_high)
411 hash_extra = hash_high;
412 eqtb_top = eqtb_size + hash_extra;
413 if (hash_extra == 0)
414 hash_top = undefined_control_sequence;
415 else
416 hash_top = eqtb_top;
417 hash = xmallocarray(two_halves, (unsigned) (1 + hash_top));
418 memset(hash, 0, sizeof(two_halves) * (unsigned) (hash_top + 1));
419 eqtb = xmallocarray(memory_word, (unsigned) (eqtb_top + 1));
420 set_eq_type(undefined_control_sequence, undefined_cs_cmd);
421 set_equiv(undefined_control_sequence, null);
422 set_eq_level(undefined_control_sequence, level_zero);
423 for (x = eqtb_size + 1; x <= eqtb_top; x++)
424 eqtb[x] = eqtb[undefined_control_sequence];
425 undump_int(x);
426 if (x != eqtb_size)
427 goto BAD_FMT;
428 undump_int(x);
429 if (x != hash_prime)
430 goto BAD_FMT;
432 /* Undump the string pool */
433 str_ptr = undump_string_pool();
434 /* Undump the dynamic memory */
435 undump_node_mem();
436 undump_int(temp_token_head);
437 undump_int(hold_token_head);
438 undump_int(omit_template);
439 undump_int(null_list);
440 undump_int(backup_head);
441 undump_int(garbage);
442 undump_int(fix_mem_min);
443 undump_int(fix_mem_max);
444 fixmem = xmallocarray(smemory_word, fix_mem_max + 1);
445 memset(voidcast(fixmem), 0, (fix_mem_max + 1) * sizeof(smemory_word));
446 undump_int(fix_mem_end);
447 undump_int(avail);
448 undump_things(fixmem[fix_mem_min], fix_mem_end - fix_mem_min + 1);
449 undump_int(dyn_used);
451 /* Undump the table of equivalents */
452 /* Undump regions 1 to 6 of |eqtb| */
453 k = null_cs;
454 do {
455 undump_int(x);
456 if ((x < 1) || (k + x > eqtb_size + 1))
457 goto BAD_FMT;
458 undump_things(eqtb[k], x);
459 k = k + x;
460 undump_int(x);
461 if ((x < 0) || (k + x > eqtb_size + 1))
462 goto BAD_FMT;
463 for (j = k; j <= k + x - 1; j++)
464 eqtb[j] = eqtb[k - 1];
465 k = k + x;
466 } while (k <= eqtb_size);
467 if (hash_high > 0) /* undump |hash_extra| part */
468 undump_things(eqtb[eqtb_size + 1], hash_high);
470 undump(hash_base, hash_top, par_loc);
471 par_token = cs_token_flag + par_loc;
472 undump(hash_base, hash_top, write_loc);
473 undump_math_codes();
474 undump_text_codes();
475 /* Undump the hash table */
476 undump_primitives();
477 undump(hash_base, frozen_control_sequence, hash_used);
478 p = hash_base - 1;
479 do {
480 undump(p + 1, hash_used, p);
481 undump_hh(hash[p]);
482 } while (p != hash_used);
483 undump_things(hash[hash_used + 1],
484 undefined_control_sequence - 1 - hash_used);
485 if (debug_format_file)
486 print_csnames(hash_base, undefined_control_sequence - 1);
487 if (hash_high > 0) {
488 undump_things(hash[eqtb_size + 1], hash_high);
489 if (debug_format_file)
490 print_csnames(eqtb_size + 1, hash_high - (eqtb_size + 1));
492 undump_int(cs_count);
494 /* Undump the font information */
495 undump_int(x);
496 set_max_font_id(x);
497 for (k = 0; k <= max_font_id(); k++) {
498 /* Undump the array info for internal font number |k| */
499 undump_font(k);
501 undump_math_data();
503 /* Undump the hyphenation tables */
504 undump_language_data();
506 /* Undump a couple more things and the closing check word */
507 undump(batch_mode, error_stop_mode, interaction);
508 if (interactionoption != unspecified_mode)
509 interaction = interactionoption;
510 undump(0, str_ptr, format_ident);
511 undump(0, str_ptr, format_name);
512 undump_int(x);
513 if (x != 69069)
514 goto BAD_FMT;
516 /* Undump the lua bytecodes */
517 undump_luac_registers();
519 prev_depth_par = ignore_depth;
520 return true; /* it worked! */
521 BAD_FMT:
522 wake_up_terminal();
523 wterm_cr();
524 fprintf(term_out, "(Fatal format file error; I'm stymied)");
525 return false;