gcc config
[prop.git] / prop-src / lexer.pcc
bloba1a3d7c0088e1579e9e4896a9b4689cee721ef9f
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 //  This is the implementation of the Prop lexical scanner.
4 //
5 ///////////////////////////////////////////////////////////////////////////////
6 #include <new>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <iostream>
10 #include <strstream>
11 #include <ctype.h>
12 #include <AD/strings/quark.h>
13 #include "basics.ph"
14 #include "keywords.ph"
15 #include "parser.ph"
16 #include "type.h"
19 // int PropParser::get_token()
20 // {  int c = get_token2();
21 //    if (verbosity > 3)
22 //    {  if (c < 256) cerr << "[" << (char)c << "]";
23 //       else cerr << "[" << (PropToken)c << c << "]";
24 //    }
25 //    return c;
26 // }
28 ///////////////////////////////////////////////////////////////////////////////
30 //  The definition of the lexical scanner.
32 ///////////////////////////////////////////////////////////////////////////////
33 int PropParser::get_token()
34
35    for (;;) 
36    {
37       matchscan[LexicalContext] while (lexbuf)
38       {  <<C,PROP>> lexeme class MainKeywords:    
39          {  if (lexbuf.context() == C) start_sc(PROP); 
40             return ?lexeme; 
41          }
42       |  <<C>> /\(/:   { start_quote('(',')'); emit(); }
43       |  <<C>> /\[/:   { start_quote('[',']'); emit(); }
44       |  <<C>> /\{/:   { start_quote('{','}'); emit(); }
45       |  <<C>> /\(\|/: { start_quote('(',')'); start_sc(PROP); return ."(|"; }
46       |  <<C>> /\[\|/: { start_quote('[',']'); start_sc(PROP); return ."[|"; }
47       |  <<C>> /\{\|/: { start_quote('{','}'); start_sc(PROP); return ."{|"; }
48       |  <<C>> /\.\(/: { start_quote('(',')'); start_sc(PROP); return .".("; }
49       |  <<C>> /\.\[/: { start_quote('[',']'); start_sc(PROP); return .".["; }
50       |  <<C>> /\.#{integer}/: { emit("._"); emit(lexbuf.text()+2); }
51       |  <<C>> /\-\>#{integer}/: { emit("->_"); emit(lexbuf.text()+3); }
52       |  <<C>> /#[\(\{\[]/: 
53          { lexbuf.push_back(1); start_sc(PROP); return '#'; }
54       |  <<C>> /[\)\}\]]/:               
55          {  char c = end_quote(lexbuf[0]); 
56             if (levels[levels_top-1] > quote_top)
57             {  end_sc(); return c; }
58             else 
59             {  emit(); }
60          }
61       |  <<C>> lexeme class SepKeywords:
62          {  if (levels[levels_top-1] >= quote_top)
63             {  end_sc(); return ?lexeme; }
64             else 
65             {  emit(); }
66          }
67       |  <<C>> /\.{string}/: { emit_cons(lexbuf.text()+1); } 
68       |  <<C>> QUARK_TOK:   { start_sc(PROP); return QUARK_TOK; }
69       |  <<C>> BIGINT_TOK:  { start_sc(PROP); return BIGINT_TOK; }
70       |  <<C,PROP>> lexeme class Symbols:     
71          {  if (lexbuf.context() == C) emit(); else return ?lexeme; }
72       |  <<C,PROP>> lexeme class Literals: 
73          {  if (lexbuf.context() == C) emit(); else return ?lexeme; }
74       |  <<C>> ID_TOK:                   
75          {  Quark id(lexbuf.text());
76             Bool  is_from_current_scope;
77             my_exp = pv_env.lookup(id,&is_from_current_scope);
78             if (my_exp != NOexp)
79             {  emit(my_exp); 
80             }  else 
81             {  if (lexbuf.lookahead() == '<' && 
82                    is_poly_datatype(lookup_ty(id)))
83                {  start_sc(PROP); return POLY_DATATYPE; }
84                else
85                {  if (lexbuf[lexbuf.length()-1] == '\'')
86                   {  int i;
87                      for (i = lexbuf.length()-1; i >= 0; i--)
88                         if (lexbuf[i] != '\'') break;
89                      lexbuf[i+1] = '\0';
90                      if (my_cons = find_cons(lexbuf.text()))
91                      {  start_sc(PROP); return CONS_EXP; }
92                      lexbuf[i+1] = '\'';
93                   }
94                   if (lexbuf[0] == '?' || lexbuf[0] == '$' || 
95                       lexbuf[0] == '#' && in_rewrite ||
96                       lexbuf[lexbuf.length()-1] == '\'')
97                      error("%Lpattern variable '%s' has no binding at this point\n",
98                            lexbuf.text());
99                   emit(); 
100                }
101             }
102          }
103       |  <<C>> PUNCTUATIONS:             { emit(); }
104       |  <<C>> /^#[ \t]*include[ \t]*(\"|\<)[^\"\>\n]*\.[pP][^\\\/\n]*(\"|\>).*/:
105          {  char name_buffer[256], * q;
106             const char * p; 
107             // locate < or "
108             for (p = lexbuf.text(); *p != '<' && *p != '"'; p++);
109             // copy the filename to the buffer
110             for (p++, q = name_buffer; *p != '"' && *p != '>'; p++, q++) *q = *p;
111             *q = '\0'; 
112             Quark file_name(name_buffer);
113             // emit the filename sans [pP]
114             for ( ;*p != '.'; p--);
115             emit (lexbuf.text(), p - lexbuf.text() + 1);
116             emit (p+2); 
117             open_include_file(file_name);
118          }
119       |  <<C>> /[ \t\\\014]/: { emit(); }
120       |  <<C>> /(\/\/.*)?\n/: { emit(); line++; }
121       |  <<C,PROP>> /^##.*\n/:{ emit_header(); line++; }
122       |  <<C>> /^#.*/:        { emit(); }
123       |  <<C,PROP>> /`/:      { start_quote('`','`'); start_sc(QUOTE); }
124       |  <<PROP>> /[#_]/:     { return lexbuf[0]; }
125       |  <<PROP>> /\(/:       { start_quote('(',')'); return '('; }
126       |  <<PROP>> /\{/:       { start_quote('{','}'); return '{'; }
127       |  <<PROP>> /\[/:       { start_quote('[',']'); return '['; }
128       |  <<PROP>> /\(\|/:     { start_quote('(',')'); return ."(|"; }
129       |  <<PROP>> /\{\|/:     { start_quote('{','}'); return ."{|"; }
130       |  <<PROP>> /\[\|/:     { start_quote('[',']'); return ."[|"; }
131       |  <<PROP>> /\.\(/:     { start_quote('(',')'); return .".("; }
132       |  <<PROP>> /\.\[/:     { start_quote('[',']'); return .".["; }
133       |  <<PROP>> /[\)\}\]]/: { return end_quote(lexbuf[0]); }
134       |  <<PROP>> /\|\)/:     { end_quote(')'); return ."|)"; }
135       |  <<PROP>> /\|\}/:     { end_quote('}'); return ."|}"; }
136       |  <<PROP>> /\|\]/:     { end_quote(']'); return ."|]"; }
137       |  <<PROP>> lexeme class Keywords:    { return ?lexeme; }
138       |  <<PROP>> lexeme class SepKeywords: { return ?lexeme; }
139       |  <<PROP>> /\.{string}/: 
140          {  my_cons = lookup_cons(lexbuf.text()+1); 
141             return CONS_TOK;
142          }
143       |  <<PROP>> ID_TOK:                
144          {  if ((my_cons = find_cons(lexbuf.text())) != NOcons) 
145                return CONS_TOK;
146             if (lexbuf[lexbuf.length()-1] == '\'')
147             {  int i;
148                for (i = lexbuf.length()-1; i >= 0; i--)
149                   if (lexbuf[i] != '\'') break;
150                lexbuf[i+1] = '\0';
151                if (my_cons = find_cons(lexbuf.text()))
152                   return CONS_EXP;
153                lexbuf[i+1] = '\'';
154             }
155             return ID_TOK;
156          }
157       |  <<PROP>> QUARK_TOK:             { return QUARK_TOK; }
158       |  <<PROP>> BIGINT_TOK:            { return BIGINT_TOK; }
159       |  <<PROP>> REGEXP_TOK:            { return REGEXP_TOK; }
160       |  <<PROP>> PUNCTUATIONS:          { return lexbuf[0]; }
161       |  <<PROP>> /[ \t\014]/:           { /* skip */ }
162       |  <<PROP>> /(\/\/.*)?\n/:         { line++; }
163       |  /\/\*/:                         { emit(); start_sc(COMMENT); }
164       |  <<COMMENT>> /\*\//:             { emit(); end_sc(); }
165       |  <<COMMENT>> /\n/:               { emit(); line++; }
166       |  <<COMMENT>> /./:                { emit(); }
167       |  /^#[ \t]*(line[ \t]+)?{digits}[ \t]*{string}\n/:
168          {  char buffer[1024];
169             const char * p;
170             char * q;
171             for (p = lexbuf.text(); ! isdigit(*p); p++);
172             line = atol(p);
173             for ( ; *p != '"'; p++);
174             for (p++, q = buffer; (*q = *p) != '"'; p++, q++);
175             *q = '\0';
176             file = Quark(buffer);
177             debug_msg("[%s, %i]", file, line);
178             emit();
179          }
180       |  <<QUOTE>>/`/:   { meta.emit('\0'); end_quote('`'); end_sc(); 
181                            return META_QUOTE; 
182                          }
183       |  <<QUOTE>>/./:   { meta.emit(lexbuf[0]); }
184       |  <<QUOTE>>/\n/:  { line++; meta.emit(lexbuf[0]); }
185       |  /^%%.*\n/:      { line++; emit_doc(); }
186       |  /./: { error("%Lillegal character %c\n", lexbuf[0]); }
187       }
188       if (includes_top != 0) close_include_file();
189       else return EOF;
190    }