added support for AbsVar, fixed bug with tail-call and multiplt expressions
[bugg-scheme-compiler.git] / src / sml / cg.sml
blob9d2c61cff9e2927ceaf569b1641e5d948212e991
1 (* Code Generation *)
3 (* Data Segment - initialized data segment (constants) *)
4 structure DataSegment: sig
5 val reset: unit -> unit (* reset symbol and constant tables between code generations *)
6 val add: Sexpr -> string (* register a new constant *)
7 val emit: unit -> string (* emit code for all constants *)
8 val symbols: (string*string) list (* fetch all constant symbols *)
9 end = struct
10 (* new_name - generate unique names *)
11 val initial_names =
12 ["a", "boot",
13 "sc_undef", "sc_void", "sc_nil", "sc_false_data", "sc_true_data"]
14 val (names:string list ref) = ref initial_names
16 local
17 fun is_new name = not (List.exists (fn n => n=name) (!names)) (* allocate new distinct name *)
18 fun add name = names := name :: !names (* add names to the table of existing ones *)
20 (* if the name is not in the table, register and return it,
21 otherwise, add an increasing number to it until a distinct name is found *)
22 fun new_name name =
23 if is_new name then
24 ( add name
25 ; name )
26 else
27 let fun subname name i =
28 let val name' = name ^ (Int.toString i)
29 in if is_new name' then
30 ( add name'
31 ; name' )
32 else
33 subname name (i + 1) end
34 in subname name 0 end
35 end; (* local *)
37 (* list of constants *)
38 val consts = ref [] : (string*Sexpr) list ref;
40 (* convert the collected consts to global variables in C *)
42 (* table of symbol names and representations;
43 used to
44 - maintain a single representation for all occurences of a symbol
45 - register symbols in run-time symbol table for symbol->string and string->symbol *)
46 val (symbols: (string*string) list ref) = ref []
48 (* return existing name for singletons, otherwise generate a new one;
49 the singletons are hardcoded once and forever, so that they are eq? comparable *)
50 fun const_name (x) =
51 case x of
52 Void => "sc_void"
53 | Nil => "sc_nil"
54 | Bool false => "sc_false"
55 | Bool true => "sc_true"
56 | Symbol s => (* symbols must be eq? comparable *)
57 (case List.find (fn sc => (#1 sc)=s) (!symbols)
58 of SOME sc => #2 sc
59 | NONE => let val name = new_name ("sc_symbol")
60 in symbols := (s, name) :: (!symbols);
61 name
62 end)
63 | _ => (* anything else is just new constant every time *)
64 (new_name (case x
65 of (Pair _) => "sc_pair"
66 | (Vector _) => "sc_vector"
67 | (String _) => "sc_string"
68 | (Number _) => "sc_number"
69 | (Char _) => "sc_char"))
71 (* generate code for a single constant:
72 data definition
73 followed by
74 constant definition - a value of SchemeObject type *)
76 fun const_code (name, scheme_type, data_name, data_variant, data_value) =
77 "SchemeObjectData " ^ data_name ^ " = {" ^ "." ^ data_variant ^ " = {" ^ data_value ^ "}};\n" ^
78 "SchemeObject " ^ name ^ " = {" ^ scheme_type ^ ", &" ^ data_name ^ "};\n"
80 (* symbol representation is written once for every symbol *)
82 val (written_symbols: string list ref) = ref []
84 fun emit_const (name, value) =
85 case value
86 of Pair (a,d) => emit_pair (name, a, d)
87 | Vector es => emit_vector (name, es)
88 | Symbol s => (* the same symbol may appear several times, but must be emitted only once *)
89 if (List.exists (fn n=>n=name) (!written_symbols))
90 then ""
91 else ( written_symbols := name :: (!written_symbols)
92 ; emit_symbol (name, s) )
93 | String s => emit_string (name, s)
94 | Number i => emit_number (name, i)
95 | Char c => emit_char (name, c)
96 | _ => "" (* singletons *)
98 and emit_pair (name, a, d) =
99 let
100 val a_name = const_name (a)
101 val d_name = const_name (d)
102 val data_name = new_name (name ^ "_data")
103 (* recusrively define car and cdr *)
104 val a_code = emit_const (a_name, a)
105 val d_code = emit_const (d_name, d)
106 in (a_code ^ d_code ^ (const_code (name, "SCHEME_PAIR", data_name,
107 "spd", "&" ^ a_name ^ ", &" ^ d_name))) end
109 (* generates names and statements for a list of constants.
110 returns a list of names and a (matching) list of statements *)
111 and sexprs_to_stmts [] names stmts = (names,stmts)
112 | sexprs_to_stmts (em :: rest) names stmts =
113 let val name = const_name em (* generate name *)
114 val stmt = emit_const (name,em) (* generate code *)
116 sexprs_to_stmts rest (names @ [name]) (stmts @ [stmt])
119 and emit_vector (name, es) =
121 (* generate names and statements for the vector elements *)
122 val (em_names,em_stmts) = sexprs_to_stmts es [] []
123 (* generate name,statement for the array that holds the elements *)
124 val arr_name = new_name (name^"_arr")
125 val arr_stmt = "SchemeObject* "^arr_name^"[] = {&"^
126 (String.concatWith ", &" em_names)^"};\n"
127 val data_name = new_name (name^"_data")
129 (String.concat em_stmts)^
130 arr_stmt^
131 const_code (name, "SCHEME_VECTOR", data_name, "svd",
132 (Int.toString (List.length es))^", "^arr_name)
135 and emit_symbol (name, s) =
136 let val data_name = new_name (name ^ "_data")
137 val syment_name = new_name (name ^ "_syment")
138 in ("SymbolEntry "^syment_name^" = {\""^(String.toCString s)^"\",0,NULL};\n"^
139 const_code (name, "SCHEME_SYMBOL", data_name, "smd",
140 "&"^syment_name))
143 and emit_string (name, s) =
144 let val data_name = new_name (name ^ "_data")
145 in (const_code (name, "SCHEME_STRING", data_name, "ssd",
146 (Int.toString (String.size s))^", \""^(String.toCString s)^"\""))
149 and emit_number (name, i) =
150 let val data_name = new_name (name ^ "_data")
151 in (const_code (name, "SCHEME_INT", data_name, "sid",
152 "(int)" ^ (if i<0 then "-"^(Int.toString (~i))
153 else (Int.toString i))))
156 and emit_char (name, c) =
157 let val data_name = new_name (name ^ "_data")
158 in (const_code (name, "SCHEME_CHAR", data_name,
159 "scd", "(char)" ^ (Int.toString (Char.ord c)))) end
161 fun reset () =
162 ( consts := []
163 ; symbols := []
164 ; written_symbols := [] )
166 fun add x =
167 let val name = const_name x
168 in consts := (name, x) :: (!consts) (* repeated symbols are detected in emit_const *)
169 ; name end
171 fun emit () =
172 "SchemeObject sc_undef = {-1, NULL};\n" ^
173 "SchemeObject sc_void = {SCHEME_VOID, NULL};\n" ^
174 "SchemeObject sc_nil = {SCHEME_NIL, NULL};\n" ^
175 "SchemeObjectData sc_false_data = {.sbd = {0}};\n" ^
176 "SchemeObject sc_false = {SCHEME_BOOL, &sc_false_data};\n" ^
177 "SchemeObjectData sc_true_data = {.sbd = {1}};\n" ^
178 "SchemeObject sc_true = {SCHEME_BOOL, &sc_true_data};\n" ^
179 String.concat (map emit_const (List.rev (!consts)))
181 (* freeze the symbols *)
182 val symbols = !symbols
184 end; (* DataSegment *)
186 structure ErrType = struct
187 datatype Type =
188 None
189 | ArgsCount of int
190 | AppNonProc
191 | NotAPair
192 | UndefinedSymbol of string;
194 fun toString None = "\"\""
195 | toString (ArgsCount expected) = "MSG_ERR_ARGCOUNT("^(intToCString expected)^")"
196 | toString AppNonProc = "MSG_ERR_APPNONPROC"
197 | toString NotAPair = "MSG_ERR_NOTPAIR"
198 | toString (UndefinedSymbol name) = "\"Symbol "^(String.toCString name)^" not defined\""
199 end;
201 structure CodeSegment (* : sig
202 val ErrType;
203 type StatementType;
204 val reset : unit -> unit;
205 val add : StatementType -> unit;
206 val emit : unit -> string;
207 end *) = struct
208 datatype StatementType =
209 Comment of string
210 | Label of string
211 | Assertion of string * ErrType.Type
212 | Error of ErrType.Type
213 | ErrorIf of string * ErrType.Type
214 | Branch of string
215 | BranchIf of string * string
216 | Set of string * string
217 | Push of string
218 | Pop of string
219 | Return
220 | Statement of string;
222 fun statementToString (Statement stmt) = "\t"^stmt^";"
223 | statementToString (Set (n,v)) = "\t"^n^" = "^v^";"
224 | statementToString (Branch l) = "\tgoto "^l^";"
225 | statementToString (BranchIf (c,l)) = "\tif ("^c^") goto "^l^";"
226 | statementToString (Comment s) = "\t/* "^s^" */"
227 | statementToString (Label s) = s^":"
228 | statementToString (Assertion (p,e)) = "\tASSERT_ALWAYS("^p^","^(ErrType.toString e)^");"
229 | statementToString (Error e) = "\tfprintf(stderr,"^(ErrType.toString e)^"); exit(-1);"
230 | statementToString (ErrorIf (p,e)) = "\tif ("^p^") {fprintf(stderr,"^(ErrType.toString e)^"); exit(-1);}"
231 | statementToString (Push s) = "\tpush("^s^");"
232 | statementToString (Pop s) = "\t"^s^" = pop();"
233 | statementToString Return = "\tRETURN();"
236 val statements = ref [] : StatementType list ref;
238 fun reset () =
239 statements := [];
241 fun add stmt = statements := !statements @ [stmt];
243 fun emit () =
245 (String.concatWith "\n" (List.map statementToString (!statements)))^
246 "\n\t;";
248 end; (* CodeSegment *)
250 structure Program: sig
251 val reset : unit -> unit;
252 val gen : Expr -> int -> unit;
253 val emit : int * int -> string;
254 end = struct
256 fun makeLabeler prefix =
258 val number = ref 0;
260 fn () => (number:= !number + 1
261 ;prefix^(Int.toString (!number)))
262 end;
264 val makeLabelElse = makeLabeler "else";
265 val makeLabelEndif = makeLabeler "endIf";
266 val makeLabelEndOr = makeLabeler "endOr";
267 val makeLabelSkipBody = makeLabeler "skipBody";
268 val makeLabelBody = makeLabeler "body";
269 val makeLabelRet = makeLabeler "ret";
270 val makeLabelApp = makeLabeler "app";
272 val CSadd = CodeSegment.add;
274 fun addProlog () =
275 (CSadd (CodeSegment.Comment "<push an initial activation frame>")
276 ;CSadd (CodeSegment.Comment "no arguments")
277 ;CSadd (CodeSegment.Push "0")
278 ;CSadd (CodeSegment.Comment "no enviroment")
279 ;CSadd (CodeSegment.Push "(int)NULL")
280 ;CSadd (CodeSegment.Comment "no return address (yet. will be set later)")
281 ;CSadd (CodeSegment.Push "(int)NULL")
282 ;CSadd (CodeSegment.Push "fp")
283 ;CSadd (CodeSegment.Set ("fp","sp"))
284 ;CSadd (CodeSegment.Comment "</push an initial activation frame>")
287 fun addEpilog () =
288 (CSadd (CodeSegment.Comment "<pop the initial activation frame>")
289 ;CSadd (CodeSegment.Pop "fp")
290 ;CSadd (CodeSegment.Comment "</pop the initial activation frame>")
293 fun reset () =
294 (DataSegment.reset ()
295 ;CodeSegment.reset ()
296 ;addProlog ()
299 fun maprtl f l = map f (List.rev l);
301 (* Generate code for a given expression
302 THE INVARIANT: r_res contains the value of the
303 expression after execution
305 fun genExpr (Const se) absDepth =
307 val lblConst = DataSegment.add se
309 CSadd (CodeSegment.Set ("r_res","(int)&"^lblConst))
311 | genExpr (Var _) absDepth = raise Match (* shouldn't be here *)
312 | genExpr (VarFree name) absDepth =
314 val lblElse = makeLabelElse ()
315 val lblEndif = makeLabelEndif ()
316 in (* probe for symbol in runtime data-structure *)
317 (CSadd (CodeSegment.Set ("r_res","(int)probeSymbolDefined(\""^name^"\",topLevel)"))
318 ;CSadd (CodeSegment.BranchIf ("r_res==0",lblElse))
319 ;CSadd (CodeSegment.BranchIf ("! ((SymbolEntry*)r_res)->isDefined",lblElse))
320 ;CSadd (CodeSegment.Set ("r_res","(int)((SymbolEntry*)r_res)->sob"))
321 ;CSadd (CodeSegment.Branch lblEndif)
322 ;CSadd (CodeSegment.Label lblElse)
323 ;CSadd (CodeSegment.Error (ErrType.UndefinedSymbol name))
324 ;CSadd (CodeSegment.Label lblEndif)
327 | genExpr (VarParam (name,ndx)) absDepth =
328 (CSadd (CodeSegment.Assertion ("("^(intToCString ndx)^">=0) & ("^(intToCString ndx)^"<ST_ARG_COUNT())",ErrType.None))
329 ;CSadd (CodeSegment.Set ("r_res",("ST_ARG("^(intToCString ndx)^")")))
331 | genExpr (VarBound (name,major,minor)) absDepth =
332 CSadd (CodeSegment.Set ("r_res",("((int**)ST_ENV())["^(intToCString major)^"]["^(intToCString minor)^"]")))
333 | genExpr (If (test,dit,dif)) absDepth =
335 val lblElse = makeLabelElse ()
336 val lblEndif = makeLabelEndif ()
337 val lblFalse = DataSegment.add (Bool false)
339 (genExpr test absDepth
340 ;CSadd (CodeSegment.BranchIf ("(SchemeObject*)r_res==&"^lblFalse,lblElse))
341 ;genExpr dit absDepth
342 ;CSadd (CodeSegment.Branch lblEndif)
343 ;CSadd (CodeSegment.Label lblElse)
344 ;genExpr dif absDepth
345 ;CSadd (CodeSegment.Label lblEndif)
348 | genExpr (abs as Abs _) absDepth = genAbs abs absDepth
349 | genExpr (abs as AbsOpt _) absDepth = genAbs abs absDepth
350 | genExpr (abs as AbsVar _) absDepth = genAbs abs absDepth
351 | genExpr (App (proc,args)) absDepth =
353 val lblRet = makeLabelRet ()
354 val lblApp = makeLabelApp ()
356 (* for each arg in args (backwards) do:
357 evaluate arg
358 push r_res to stack *)
359 ((maprtl (fn arg => (genExpr arg absDepth;
360 CSadd (CodeSegment.Push "r_res")))
361 args)
362 (* push length(args) to stack *)
363 ;CSadd (CodeSegment.Push (intToCString (List.length args)))
364 (* evaluate proc *)
365 ;genExpr proc absDepth
366 (* if r_res is not a closure then: error *)
367 ;CSadd (CodeSegment.BranchIf ("IS_SOB_CLOSURE(r_res)",lblApp))
368 ;CSadd (CodeSegment.Error ErrType.AppNonProc)
369 ;CSadd (CodeSegment.Label lblApp)
370 (* push proc.env to stack *)
371 ;CSadd (CodeSegment.Push "(int)SOB_CLOSURE_ENV(r_res)")
372 (* push return address *)
373 ;CSadd (CodeSegment.Push ("(int)&&"^lblRet))
374 (* goto proc.code *)
375 ;CSadd (CodeSegment.Branch "*(SOB_CLOSURE_CODE(r_res))")
376 (* return address *)
377 ;CSadd (CodeSegment.Label lblRet)
378 ;CSadd (CodeSegment.Set ("sp","fp"))
381 | genExpr (AppTP (proc,args)) absDepth =
382 (* for each arg in args (backwards) do:
383 evaluate arg
384 push r_res to stack *)
385 ((maprtl (fn arg => (genExpr arg absDepth;
386 CSadd (CodeSegment.Push "r_res")))
387 args)
388 (* push length(args) to stack *)
389 ;CSadd (CodeSegment.Push (intToCString (List.length args)))
390 (* evaluate proc *)
391 ;genExpr proc absDepth
392 (* if r_res is not a closure then: error *)
393 ;CSadd (CodeSegment.ErrorIf ("! IS_SOB_CLOSURE(r_res)",ErrType.AppNonProc))
394 (* push proc.env to stack *)
395 ;CSadd (CodeSegment.Push "(int)SOB_CLOSURE_ENV(r_res)")
396 (* push return address (of current activation frame) *)
397 ;CSadd (CodeSegment.Push "ST_RET()")
398 (* override current activation frame *)
399 ;CSadd (CodeSegment.Statement ("shiftActFrmDown()"))
400 (* goto proc.code *)
401 ;CSadd (CodeSegment.Branch "*(SOB_CLOSURE_CODE(r_res))")
403 | genExpr (Seq []) absDepth = ()
404 | genExpr (Seq (e :: rest)) absDepth = (genExpr e absDepth; genExpr (Seq rest) absDepth)
405 | genExpr (Or preds) absDepth =
407 val lblEndOr = makeLabelEndOr ()
409 (genOrPreds lblEndOr preds
410 ;CSadd (CodeSegment.Label lblEndOr)
413 | genExpr (Set ((VarFree name),value)) absDepth =
414 (* Set on VarFree is just the same as Def on VarFree *)
415 genExpr (Def ((VarFree name),value)) absDepth
416 | genExpr (Set ((VarParam (name,ndx)),value)) absDepth =
417 let val lblVoid = DataSegment.add Void
419 (genExpr value absDepth
420 ;CSadd (CodeSegment.Assertion ("("^(intToCString ndx)^">=0) & ("^(intToCString ndx)^"<ST_ARG_COUNT())",ErrType.None))
421 ;CSadd (CodeSegment.Set (("ST_ARG("^(intToCString ndx)^")"),"r_res"))
422 ;CSadd (CodeSegment.Set ("r_res","(int)&"^lblVoid))
425 | genExpr (Set ((VarBound (name,major,minor)),value)) absDepth =
426 let val lblVoid = DataSegment.add Void
428 (genExpr value absDepth
429 ;CSadd (CodeSegment.Set (("((int**)ST_ENV())["^(intToCString major)^"]["^(intToCString minor)^"]"),"r_res"))
430 ;CSadd (CodeSegment.Set ("r_res","(int)&"^lblVoid))
433 | genExpr (Def ((VarFree name),value)) absDepth =
434 let val lblVoid = DataSegment.add Void
436 (genExpr value absDepth
437 ;CSadd (CodeSegment.Set ("r[0]","(int)getSymbol(\""^name^"\",topLevel)"))
438 ;CSadd (CodeSegment.Set ("((SymbolEntry*)r[0])->isDefined","1"))
439 ;CSadd (CodeSegment.Set ("((SymbolEntry*)r[0])->sob","(SchemeObject*)r_res"))
440 ;CSadd (CodeSegment.Set ("r_res","(int)&"^lblVoid))
443 | genExpr (Def (_,_)) absDepth = raise Match (* shouldn't be here *)
444 and genOrPreds _ [] absDepth = ()
445 | genOrPreds lblEndOr (p :: rest) absDepth =
447 val lblFalse = DataSegment.add (Bool false)
449 (genExpr p absDepth
450 ;CSadd (CodeSegment.BranchIf ("(SchemeObject*)r_res!=&"^lblFalse,lblEndOr))
451 ;genOrPreds lblEndOr rest absDepth
454 and genAbs abs absDepth =
456 val formalParams = case abs of
457 Abs (params,_) => List.length params
458 | AbsOpt (params,_,_) => List.length params + 1
459 | AbsVar (_,_) => 1
460 | _ => raise Match (* shouldn't be here *)
461 val body = case abs of
462 Abs (_,body) => body
463 | AbsOpt (_,_,body) => body
464 | AbsVar (_,body) => body
465 | _ => raise Match (* shouldn't be here *)
466 val lblSkipBody = makeLabelSkipBody ()
467 val lblBody = makeLabelBody ()
469 (* 1. extend enviroment *)
470 (CSadd (CodeSegment.Set ("r[0]","(int)extendEnviroment( (int**)"^
471 (if absDepth=0 then "NULL"
472 else "ST_ENV()")^
473 ", "^
474 (intToCString absDepth)^
475 ")"))
476 (* 2. prepare code *)
477 ;CSadd (CodeSegment.Branch lblSkipBody)
478 ;CSadd (CodeSegment.Label lblBody)
479 (* prolog *)
480 ;CSadd (CodeSegment.Push "fp")
481 ;CSadd (CodeSegment.Set ("fp","sp"))
482 (* fix stack if needed *)
483 ;case abs of
484 (Abs _) => ()
485 | (AbsOpt _) => CSadd (CodeSegment.Statement ("prepareStackForAbsOpt("^(intToCString formalParams)^")"))
486 | (AbsVar _) => CSadd (CodeSegment.Statement ("prepareStackForAbsOpt("^(intToCString formalParams)^")"))
487 | _ => raise Match (* shouldn't be here *)
488 (* verify number of actual arguments *)
489 ;CSadd (CodeSegment.ErrorIf ("ST_ARG_COUNT()!="^(intToCString formalParams),
490 (ErrType.ArgsCount formalParams)))
491 (* body *)
492 ;genExpr body (absDepth+1)
493 (* epilog *)
494 ;CSadd (CodeSegment.Pop ("fp"))
495 ;CSadd CodeSegment.Return
496 ;CSadd (CodeSegment.Label lblSkipBody)
497 (* 3. create closure *)
498 ;CSadd (CodeSegment.Set ("r_res","(int)makeSchemeClosure((void*)r[0],&&"^lblBody^")"))
503 fun gen expr absDepth =
505 val lblRet = makeLabelRet ()
507 (* Set return address - the initial frame contains a dummy
508 return address. Here we set it to instruction after `expr`.
509 In case of several expressions compiled one after the other,
510 we could pop the initial activation frame (it was pushed in
511 addProlog) and push a new one. But since the only thing that
512 changes is the return address, we just hack it.
514 (CSadd (CodeSegment.Comment "set return address")
515 ;CSadd (CodeSegment.Set ("ST_RET()","(int)&&"^lblRet))
516 (* Compile the expression *)
517 ;genExpr expr absDepth
518 (* Don't forget the return address.. *)
519 ;CSadd (CodeSegment.Label lblRet)
524 fun emit (nregs,stacksize) =
525 (addEpilog ();
526 "/* COMP091 Scheme->C Compiler Generated Code */\n\n" ^
527 "#include \"scheme.h\"\n" ^
528 "#include \"assertions.h\"\n" ^
529 "#include \"arch.h\"\n" ^
530 "#include \"rtemgr.h\"\n" ^
531 "#include \"strings.h\"\n" ^
532 "extern SymbolNode *topLevel;\n"^
533 "\n/* Data Segment */\n" ^
534 (DataSegment.emit ()) ^
535 "\n/* Code Segment */\n" ^
536 "void schemeCompiledFunction() {\n" ^
537 "\t#include \"builtins.c\"\n\n"^
538 "\tinitArchitecture("^Int.toString(stacksize)^","^Int.toString(nregs)^");\n" ^
539 "\n" ^
540 (CodeSegment.emit ()) ^
541 "\n}\n"
544 end; (* Program *)