src/parser.y

   1 %{
   2 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
   3 /*
   4  * Gnumeric Parser
   5  *
   6  * (C) 1998-2002 GNOME Foundation
   7  * Copyright (C) 2002-2009 Morten Welinder
   8  *
   9  * Authors:
  10  *    Miguel de Icaza (miguel@gnu.org)
  11  *    Jody Goldberg (jody@gnome.org)
  12  *    Morten Welinder (terra@diku.dk)
  13  *    Almer S. Tigelaar (almer@gnome.org)
  14  */
  15 #include <gnumeric-config.h>
  16 #include <glib/gi18n-lib.h>
  17 #include "gnumeric.h"
  18 #include "number-match.h"
  19 #include "expr.h"
  20 #include "expr-impl.h"
  21 #include "expr-name.h"
  22 #include "func.h"
  23 #include "workbook.h"
  24 #include "sheet.h"
  25 #include "gnm-format.h"
  26 #include "application.h"
  27 #include "parse-util.h"
  28 #include "gutils.h"
  29 #include "style.h"
  30 #include "value.h"
  31 #include <goffice/goffice.h>
  32
  33 #include <string.h>
  34 #include <errno.h>
  35 #include <stdlib.h>
  36
  37 #define YYDEBUG 1
  38
  39 /* ------------------------------------------------------------------------- */
  40 /* Allocation with disposal-on-error */
  41
  42 /*
  43  * If some dork enters "=1+2+2*(1+" we have already allocated space for
  44  * "1+2", "2", and "1" before the parser sees the syntax error and warps
  45  * us to the error production in the "line" non-terminal.
  46  *
  47  * To make sure we can clean up, we register every allocation.  On success,
  48  * nothing should be left (except the final expression which is unregistered),
  49  * but on failure we must free everything allocated.
  50  *
  51  * Note: there is some room left for optimisation here.  Talk to terra@diku.dk
  52  * before you set out to do it.
  53  */
  54
  55 static void
  56 free_expr_list_list (GSList *list)
  57 {
  58         GSList *l;
  59         for (l = list; l; l = l->next)
  60                 gnm_expr_list_unref (l->data);
  61         g_slist_free (list);
  62 }
  63
  64 typedef void (*ParseDeallocator) (void *);
  65 static GPtrArray *deallocate_stack;
  66
  67 static void
  68 deallocate_init (void)
  69 {
  70         deallocate_stack = g_ptr_array_new ();
  71 }
  72
  73 static void
  74 deallocate_uninit (void)
  75 {
  76         g_ptr_array_free (deallocate_stack, TRUE);
  77         deallocate_stack = NULL;
  78 }
  79
  80 static void
  81 deallocate_all (void)
  82 {
  83         int i;
  84
  85         for (i = 0; i < (int)deallocate_stack->len; i += 2) {
  86                 ParseDeallocator freer = g_ptr_array_index (deallocate_stack, i + 1);
  87                 freer (g_ptr_array_index (deallocate_stack, i));
  88         }
  89
  90         g_ptr_array_set_size (deallocate_stack, 0);
  91 }
  92
  93 static void
  94 deallocate_assert_empty (void)
  95 {
  96         if (deallocate_stack->len == 0)
  97                 return;
  98
  99         g_warning ("deallocate_stack not empty as expected.");
 100         deallocate_all ();
 101 }
 102
 103 static void *
 104 register_allocation (gpointer data, ParseDeallocator freer)
 105 {
 106         /* It's handy to be able to register and unregister NULLs.  */
 107         if (data) {
 108                 int len;
 109                 /*
 110                  * There are really only a few different freers, so we
 111                  * could encode the freer in the lower bits of the data
 112                  * pointer.  Unfortunately, no-one can predict how high
 113                  * Miguel would jump when he found out.
 114                  */
 115                 len = deallocate_stack->len;
 116                 g_ptr_array_set_size (deallocate_stack, len + 2);
 117                 g_ptr_array_index (deallocate_stack, len) = data;
 118                 g_ptr_array_index (deallocate_stack, len + 1) = freer;
 119         }
 120
 121         /* Returning the pointer here improved readability of the caller.  */
 122         return data;
 123 }
 124
 125 #define register_expr_allocation(expr) \
 126   register_allocation ((gpointer)(expr), (ParseDeallocator)&gnm_expr_free)
 127
 128 #define register_expr_list_allocation(list) \
 129   register_allocation ((list), (ParseDeallocator)&gnm_expr_list_unref)
 130
 131 #define register_expr_list_list_allocation(list) \
 132   register_allocation ((list), (ParseDeallocator)&free_expr_list_list)
 133
 134 static void
 135 unregister_allocation (void const *data)
 136 {
 137         int i, pos;
 138
 139         /* It's handy to be able to register and unregister NULLs.  */
 140         if (!data)
 141                 return;
 142
 143         pos = deallocate_stack->len - 2;
 144         if (pos >= 0 && data == g_ptr_array_index (deallocate_stack, pos)) {
 145                 g_ptr_array_set_size (deallocate_stack, pos);
 146                 return;
 147         }
 148
 149         /*
 150          * Bummer.  In certain error cases, it is possible that the parser
 151          * will reduce after it has discovered a token that will lead to an
 152          * error.  "2/16/1800 00:00" (without the quotes) is an example.
 153          * The first "00" is registered before the second division is
 154          * reduced.
 155          *
 156          * Another example is 564077 where we deallocate out of order.
 157          *
 158          * This isn't a big deal -- we will just look at the entries below
 159          * the top.
 160          */
 161         for (i = pos - 2; i >= 0; i -= 2) {
 162                 if (data == g_ptr_array_index (deallocate_stack, i)) {
 163                         g_ptr_array_remove_index (deallocate_stack, i);
 164                         g_ptr_array_remove_index (deallocate_stack, i);
 165                         return;
 166                 }
 167         }
 168
 169         g_warning ("Unbalanced allocation registration");
 170 }
 171
 172 /* ------------------------------------------------------------------------- */
 173
 174 /* Bison/Yacc internals */
 175 static int yylex (void);
 176 static int yyerror (char const *s);
 177
 178 typedef struct {
 179         char const *ptr;        /* current position of the lexer */
 180         char const *start;      /* start of the expression */
 181
 182         /* Location where the parsing is taking place */
 183         GnmParsePos const *pos;
 184
 185         /* loaded from convs with locale specific mappings */
 186         gunichar decimal_point;
 187         gunichar arg_sep;
 188         gunichar array_col_sep;
 189         gunichar array_row_sep;
 190         /* if arg_sep conflicts with array_col_sep or array_row_sep */
 191         int in_array_sep_is;    /* token id */
 192
 193         GnmExprParseFlags     flags;
 194         GnmConventions const *convs;
 195
 196         /* dynamic state */
 197         int in_array; /* toggled in the lexer for '{' and '}' */
 198         GnmExprList *result;
 199
 200         GnmParseError *error;
 201 } ParserState;
 202
 203 /* The error returned from the */
 204 static ParserState *state;
 205
 206 static void
 207 report_err (ParserState *state, GError *err,
 208             char const *last, int guesstimate_of_length)
 209 {
 210         if (state->error != NULL) {
 211                 state->error->err        = err;
 212                 state->error->end_char   = last - state->start;
 213                 state->error->begin_char = state->error->end_char - guesstimate_of_length;
 214                 if (state->error->begin_char < 0)
 215                         state->error->begin_char = 0;
 216         } else
 217                 g_error_free (err);
 218 }
 219
 220 static gboolean
 221 is_signed (const GnmExpr *expr)
 222 {
 223         if (GNM_EXPR_GET_OPER (expr) == GNM_EXPR_OP_UNARY_NEG)
 224                 return TRUE;
 225
 226         if (GNM_EXPR_GET_OPER (expr) == GNM_EXPR_OP_UNARY_PLUS)
 227                 return TRUE;
 228
 229         if (GNM_EXPR_GET_OPER (expr) == GNM_EXPR_OP_CONSTANT) {
 230                 GnmValue const *v = expr->constant.value;
 231                 return VALUE_IS_FLOAT (v) && value_get_as_float (v) < 0;
 232         }
 233
 234         return FALSE;
 235 }
 236
 237 /* Handle -cst for use in arrays.  Don't handle other types here.  */
 238 static GnmExpr *
 239 fold_negative_constant (GnmExpr *expr)
 240 {
 241         if (expr && GNM_EXPR_GET_OPER (expr) == GNM_EXPR_OP_CONSTANT) {
 242                 GnmValue *v = (GnmValue *)expr->constant.value;
 243
 244                 if (VALUE_IS_FLOAT (v)) {
 245                         gnm_float f = value_get_as_float (v);
 246                         expr->constant.value = value_new_float (0 - f);
 247                         value_release (v);
 248                         return expr;
 249                 }
 250         }
 251
 252         return NULL;
 253 }
 254
 255 /* Handle +cst for use in arrays.  Don't handle other types here.  */
 256 static GnmExpr *
 257 fold_positive_constant (GnmExpr *expr)
 258 {
 259         if (expr && GNM_EXPR_GET_OPER (expr) == GNM_EXPR_OP_CONSTANT) {
 260                 const GnmValue *v = expr->constant.value;
 261                 if (VALUE_IS_FLOAT (v))
 262                         return expr;
 263         }
 264
 265         return NULL;
 266 }
 267
 268 static GnmExpr *
 269 build_unary_op (GnmExprOp op, GnmExpr *expr)
 270 {
 271         if (!expr) return NULL;
 272
 273         unregister_allocation (expr);
 274         return register_expr_allocation (gnm_expr_new_unary (op, expr));
 275 }
 276
 277 static GnmExpr *
 278 build_binop (GnmExpr *l, GnmExprOp op, GnmExpr *r)
 279 {
 280         if (!l || !r) return NULL;
 281
 282         unregister_allocation (r);
 283         unregister_allocation (l);
 284         return register_expr_allocation (gnm_expr_new_binary (l, op, r));
 285 }
 286
 287 static GnmExpr *
 288 build_logical (GnmExpr *l, gboolean is_and, GnmExpr *r)
 289 {
 290         static GnmFunc *and_func = NULL, *or_func = NULL;
 291
 292         if (!l || !r) return NULL;
 293
 294         if (and_func == NULL)
 295                 and_func = gnm_func_lookup ("AND", NULL);
 296         if (or_func == NULL)
 297                 or_func = gnm_func_lookup ("OR", NULL);
 298
 299         unregister_allocation (r);
 300         unregister_allocation (l);
 301         return register_expr_allocation
 302                 (gnm_expr_new_funcall2 (is_and ? and_func : or_func, l, r));
 303 }
 304
 305 static GnmExpr *
 306 build_not (GnmExpr *expr)
 307 {
 308         static GnmFunc *not_func = NULL;
 309
 310         if (!expr) return NULL;
 311
 312         if (not_func == NULL)
 313                 not_func = gnm_func_lookup ("NOT", NULL);
 314         unregister_allocation (expr);
 315         return register_expr_allocation
 316                 (gnm_expr_new_funcall1 (not_func, expr));
 317 }
 318
 319 static GnmExpr *
 320 build_exp (GnmExpr *l, GnmExpr *r)
 321 {
 322         if (is_signed (l)) {
 323                 /* See bug 115941 */
 324                 l = build_unary_op (GNM_EXPR_OP_PAREN, l);
 325         }
 326
 327         if (GNM_EXPR_GET_OPER (l) == GNM_EXPR_OP_EXP) {
 328                 /* Add ()s to x^y^z */
 329                 l = build_unary_op (GNM_EXPR_OP_PAREN, l);
 330         }
 331
 332         if (GNM_EXPR_GET_OPER (r) == GNM_EXPR_OP_EXP) {
 333                 /* Add ()s to x^y^z */
 334                 r = build_unary_op (GNM_EXPR_OP_PAREN, r);
 335         }
 336
 337         return build_binop (l, GNM_EXPR_OP_EXP, r);
 338 }
 339
 340 /*
 341  * Build an array expression.
 342  *
 343  * Returns NULL on failure.  Caller must YYERROR in that case.
 344  */
 345 static GnmExpr *
 346 build_array (GSList *cols)
 347 {
 348         GnmValue *array;
 349         int mx, y;
 350
 351         if (!cols) {
 352                 report_err (state, g_error_new (1, PERR_INVALID_EMPTY,
 353                         _("An array must have at least 1 element")),
 354                         state->ptr, 0);
 355                 return NULL;
 356         }
 357
 358         mx = g_list_length (cols->data);
 359         array = value_new_array_empty (mx, g_slist_length (cols));
 360
 361         y = 0;
 362         while (cols) {
 363                 GSList *row = cols->data;
 364                 int x = 0;
 365                 while (row && x < mx) {
 366                         GnmExpr const *expr = row->data;
 367                         GnmValue const *v = expr->constant.value;
 368
 369                         g_assert (expr && GNM_EXPR_GET_OPER (expr) == GNM_EXPR_OP_CONSTANT);
 370
 371                         value_array_set (array, x, y, value_dup (v));
 372
 373                         x++;
 374                         row = row->next;
 375                 }
 376                 if (x < mx || row) {
 377                         /* parser_error = PARSE_ERR_SYNTAX; */
 378                         report_err (state, g_error_new (1, PERR_ASYMETRIC_ARRAY,
 379                                 _("Arrays must be rectangular")),
 380                                 state->ptr, 0);
 381                         value_release (array);
 382                         return NULL;
 383                 }
 384                 y++;
 385                 cols = cols->next;
 386         }
 387
 388         return register_expr_allocation (gnm_expr_new_constant (array));
 389 }
 390
 391 /*
 392  * Build a range constructor.
 393  *
 394  * Returns NULL on failure.  Caller must YYERROR in that case.
 395  */
 396 static GnmExpr *
 397 build_range_ctor (GnmExpr *l, GnmExpr *r, GnmExpr *validate)
 398 {
 399         if (!l || !r) return NULL;
 400
 401         if (validate != NULL) {
 402                 if (GNM_EXPR_GET_OPER (validate) != GNM_EXPR_OP_CELLREF ||
 403                     validate->cellref.ref.sheet != NULL) {
 404                         report_err (state, g_error_new (1, PERR_UNEXPECTED_TOKEN,
 405                                 _("Constructed ranges use simple references")),
 406                                 state->ptr, 0);
 407                         return NULL;
 408                     }
 409         }
 410
 411         unregister_allocation (r);
 412         unregister_allocation (l);
 413         return register_expr_allocation (gnm_expr_new_range_ctor (l, r));
 414 }
 415
 416 /*
 417  * Build an intersection expression.
 418  *
 419  * Returns NULL on failure.  Caller must YYERROR in that case.
 420  */
 421 static GnmExpr *
 422 build_intersect (GnmExpr *l, GnmExpr *r)
 423 {
 424         if (!l || !r) return NULL;
 425
 426         if (gnm_expr_is_rangeref (l) && gnm_expr_is_rangeref (r))
 427                 return build_binop (l, GNM_EXPR_OP_INTERSECT, r);
 428         report_err (state, g_error_new (1, PERR_SET_CONTENT_MUST_BE_RANGE,
 429                 _("All entries in the set must be references")),
 430                 state->ptr, 0);
 431         return NULL;
 432 }
 433
 434 /*
 435  * Build a set expression.
 436  *
 437  * Returns NULL on failure.  Caller must YYERROR in that case.
 438  */
 439 static GnmExpr *
 440 build_set (GnmExprList *list)
 441 {
 442         /* verify that every thing is a ref */
 443         GnmExprList *ptr;
 444         for (ptr = list; ptr != NULL ; ptr = ptr->next) {
 445                 GnmExpr const *expr = ptr->data;
 446                 if (!expr || !gnm_expr_is_rangeref (expr)) {
 447                         report_err (state, g_error_new (1, PERR_SET_CONTENT_MUST_BE_RANGE,
 448                                 _("All entries in the set must be references")),
 449                                 state->ptr, 0);
 450                         return NULL;
 451                 }
 452         }
 453
 454         unregister_allocation (list);
 455         return register_expr_allocation (gnm_expr_new_set (list));
 456 }
 457
 458 /**
 459  * parse_string_as_value :
 460  *
 461  * Try to parse the entered text as a basic value (empty, bool, int,
 462  * gnm_float, err) if this succeeds, we store this as a GnmValue otherwise, we
 463  * return a string.
 464  */
 465 static GnmExpr *
 466 parse_string_as_value (GnmExpr *str)
 467 {
 468         GnmValue *v = format_match_simple (value_peek_string (str->constant.value));
 469
 470         if (v != NULL) {
 471                 unregister_allocation (str);
 472                 gnm_expr_free (str);
 473                 return register_expr_allocation (gnm_expr_new_constant (v));
 474         }
 475         return str;
 476 }
 477
 478 /**
 479  * parser_simple_val_or_name :
 480  * @str : An expression with oper constant, whose value is a string.
 481  *
 482  * Check to see if a string is a simple value or failing that a named
 483  * expression, if it is not create a placeholder name for it.
 484  */
 485 static GnmExpr *
 486 parser_simple_val_or_name (GnmExpr *str_expr)
 487 {
 488         GnmExpr const *res;
 489         char const *str = value_peek_string (str_expr->constant.value);
 490         GnmValue *v = format_match_simple (str);
 491
 492         /* if it is not a simple value see if it is a name */
 493         if (v == NULL) {
 494                 GnmNamedExpr *nexpr = expr_name_lookup (state->pos, str);
 495                 if (nexpr == NULL) {
 496                         if (state->flags & GNM_EXPR_PARSE_UNKNOWN_NAMES_ARE_INVALID) {
 497                                 report_err (state, g_error_new (1, PERR_UNKNOWN_NAME,
 498                                                                 _("Name '%s' does not exist"),
 499                                                                 str),
 500                                             state->ptr, 0);
 501                                 res = NULL;
 502                         } else if (state->flags & GNM_EXPR_PARSE_UNKNOWN_NAMES_ARE_STRINGS) {
 503                                 res = gnm_expr_new_constant (value_new_string (str));
 504                         } else if (state->convs->input.name_validate (str)) {
 505                                 GnmParsePos pp = *state->pos;
 506                                 pp.sheet = NULL;
 507                                 /* Create a place holder */
 508                                 nexpr = expr_name_add (&pp, str, NULL, NULL, TRUE, NULL);
 509                                 res = gnm_expr_new_name (nexpr, NULL, NULL);
 510                         } else {
 511                                 report_err (state, g_error_new (1, PERR_UNKNOWN_NAME,
 512                                                                 _("'%s' cannot be used as a name"),
 513                                                                 str),
 514                                             state->ptr, 0);
 515                                 res = NULL;
 516                         }
 517                 } else
 518                         res = gnm_expr_new_name (nexpr, NULL, NULL);
 519         } else
 520                 res = gnm_expr_new_constant (v);
 521
 522         unregister_allocation (str_expr);
 523         gnm_expr_free (str_expr);
 524         return register_expr_allocation (res);
 525 }
 526
 527 static Sheet *
 528 parser_sheet_by_name (Workbook *wb, GnmExpr *name_expr)
 529 {
 530         char const *name = value_peek_string (name_expr->constant.value);
 531         Sheet *sheet = NULL;
 532
 533         if (wb == NULL)
 534                 return NULL;
 535
 536         sheet = workbook_sheet_by_name (wb, name);
 537
 538         /* Applix has absolute and relative sheet references */
 539         if (sheet == NULL && *name == '$' &&
 540             state->convs->allow_absolute_sheet_references)
 541                 sheet = workbook_sheet_by_name (wb, name + 1);
 542
 543         if (sheet == NULL)
 544                 /* TODO : length is broken in the context of quoted names or
 545                  * names with escaped character */
 546                 /* -1 is a kludge.  We know that this routine is only called
 547                  * when the last token was SHEET_SEP */
 548                 report_err (state, g_error_new (1, PERR_UNKNOWN_SHEET,
 549                         _("Unknown sheet '%s'"), name),
 550                         state->ptr-1, strlen (name));
 551
 552         return sheet;
 553 }
 554
 555 /* Make byacc happier */
 556 static int yyparse (void);
 557
 558 %}
 559
 560 %union {
 561         GnmExpr         *expr;
 562         GnmValue        *value;
 563         GnmCellRef      *cell;
 564         GnmExprList     *list;
 565         Sheet           *sheet;
 566         Workbook        *wb;
 567 }
 568 %type  <list>   opt_exp arg_list array_row array_rows
 569 %type  <expr>   exp array_exp function string_opt_quote cellref
 570 %token <expr>   STRING QUOTED_STRING CONSTANT RANGEREF tok_GTE tok_LTE tok_NE tok_AND tok_OR tok_NOT INTERSECT
 571 %token          ARG_SEP ARRAY_COL_SEP ARRAY_ROW_SEP SHEET_SEP INVALID_TOKEN
 572 %type  <sheet>  sheetref
 573 %type  <wb>     workbookref
 574 %token <wb>     tok_WORKBOOKREF
 575
 576 %left '<' '>' '=' tok_GTE tok_LTE tok_NE
 577 %left '&'
 578 %left '-' '+'
 579 %left '*' '/'
 580 %right tok_RIGHT_EXP
 581 %left  tok_LEFT_EXP
 582 %nonassoc '%'
 583 %nonassoc tok_NEG tok_PLUS tok_NOT
 584 %left tok_AND tok_OR
 585 %left ','
 586 %left RANGE_INTERSECT
 587 %left RANGE_SEP
 588
 589 %%
 590 line:   opt_exp exp {
 591                 unregister_allocation ($2);
 592                 unregister_allocation ($1);
 593                 state->result = gnm_expr_list_prepend ($1, $2);
 594         }
 595
 596         | error         {
 597                 if (state->result != NULL) {
 598                         gnm_expr_list_unref (state->result);
 599                         state->result = NULL;
 600                 }
 601         }
 602         ;
 603
 604 opt_exp : opt_exp exp  ARG_SEP {
 605                unregister_allocation ($2);
 606                unregister_allocation ($1);
 607                $$ = gnm_expr_list_prepend ($1, $2);
 608                register_expr_list_allocation ($$);
 609         }
 610         | { $$ = NULL; register_expr_list_allocation ($$); }
 611         ;
 612
 613 exp:      CONSTANT      { $$ = $1; }
 614         | QUOTED_STRING { $$ = $1; }
 615         | STRING        {
 616                 $$ = parser_simple_val_or_name ($1);
 617                 if ($$ == NULL) { YYERROR; }
 618         }
 619         | cellref       { $$ = $1; }
 620         | exp '+' exp   { $$ = build_binop ($1, GNM_EXPR_OP_ADD,        $3); }
 621         | exp '-' exp   { $$ = build_binop ($1, GNM_EXPR_OP_SUB,        $3); }
 622         | exp '*' exp   { $$ = build_binop ($1, GNM_EXPR_OP_MULT,       $3); }
 623         | exp '/' exp   { $$ = build_binop ($1, GNM_EXPR_OP_DIV,        $3); }
 624         | exp tok_RIGHT_EXP exp { $$ = build_exp ($1, $3); }
 625         | exp tok_LEFT_EXP  exp { $$ = build_exp ($1, $3); }
 626         | exp '&' exp   { $$ = build_binop ($1, GNM_EXPR_OP_CAT,        $3); }
 627         | exp '=' exp   { $$ = build_binop ($1, GNM_EXPR_OP_EQUAL,      $3); }
 628         | exp '<' exp   { $$ = build_binop ($1, GNM_EXPR_OP_LT,         $3); }
 629         | exp '>' exp   { $$ = build_binop ($1, GNM_EXPR_OP_GT,         $3); }
 630         | exp tok_GTE exp       { $$ = build_binop ($1, GNM_EXPR_OP_GTE,        $3); }
 631         | exp tok_NE  exp       { $$ = build_binop ($1, GNM_EXPR_OP_NOT_EQUAL,  $3); }
 632         | exp tok_LTE exp       { $$ = build_binop ($1, GNM_EXPR_OP_LTE,        $3); }
 633         | exp tok_AND exp       { $$ = build_logical ($1, TRUE, $3); }
 634         | exp tok_OR  exp       { $$ = build_logical ($1, FALSE, $3); }
 635         | exp RANGE_INTERSECT exp {
 636                 $$ = build_intersect ($1, $3);
 637                 if ($$ == NULL) { YYERROR; }
 638         }
 639
 640         | '-' exp %prec tok_NEG {
 641                 GnmExpr *tmp = fold_negative_constant ($2);
 642                 $$ = tmp ? tmp : build_unary_op (GNM_EXPR_OP_UNARY_NEG, $2);
 643         }
 644         | '+' exp %prec tok_PLUS {
 645                 /* Don't fold here.  */
 646                 $$ = build_unary_op (GNM_EXPR_OP_UNARY_PLUS, $2);
 647         }
 648         | tok_NOT exp { $$ = build_not ($2); }
 649         | exp '%' { $$ = build_unary_op (GNM_EXPR_OP_PERCENTAGE, $1); }
 650
 651         | '(' arg_list ')' {
 652                 if ($2 == NULL) {
 653                         report_err (state, g_error_new (1, PERR_INVALID_EMPTY,
 654                                 _("() is an invalid expression")),
 655                                 state->ptr-2, 2);
 656                         YYERROR;
 657                 } else {
 658                         if ($2->next == NULL) {
 659                                 unregister_allocation ($2);
 660                                 $$ = register_expr_allocation (gnm_expr_new_unary (GNM_EXPR_OP_PAREN, $2->data));
 661                                 /* NOTE : free list not content */
 662                                 gnm_expr_list_free ($2);
 663                         } else {
 664                                 $$ = build_set ($2);
 665                                 if ($$ == NULL) { YYERROR; }
 666                         }
 667                 }
 668         }
 669         | '{' array_rows '}' {
 670                 unregister_allocation ($2);
 671                 $$ = build_array ($2);
 672                 free_expr_list_list ($2);
 673                 if ($$ == NULL) { YYERROR; }
 674         }
 675
 676         | function
 677         | sheetref STRING {
 678                 GnmNamedExpr *nexpr = NULL;
 679                 char const *name = value_peek_string ($2->constant.value);
 680                 GnmParsePos pos = *state->pos;
 681
 682                 pos.sheet = $1;
 683                 nexpr = expr_name_lookup (&pos, name);
 684                 if (nexpr == NULL) {
 685                         report_err (state, g_error_new (1, PERR_UNKNOWN_NAME,
 686                                 _("Name '%s' does not exist in sheet '%s'"),
 687                                                 name, pos.sheet->name_quoted),
 688                                 state->ptr, strlen (name));
 689                         YYERROR;
 690                 } else {
 691                         unregister_allocation ($2); gnm_expr_free ($2);
 692                         $$ = register_expr_allocation (gnm_expr_new_name (nexpr, $1, NULL));
 693                 }
 694         }
 695         | workbookref STRING {
 696                 GnmNamedExpr *nexpr = NULL;
 697                 char const *name = value_peek_string ($2->constant.value);
 698                 GnmParsePos pos = *state->pos;
 699
 700                 pos.sheet = NULL;
 701                 pos.wb = $1;
 702                 nexpr = expr_name_lookup (&pos, name);
 703                 if (nexpr != NULL) {
 704                         unregister_allocation ($2); gnm_expr_free ($2);
 705                         $$ = register_expr_allocation (gnm_expr_new_name (nexpr, NULL, $1));
 706                 } else {
 707                         report_err (state, g_error_new (1, PERR_UNKNOWN_NAME,
 708                                 _("Name '%s' does not exist in workbook"),
 709                                                         name),
 710                                 state->ptr, strlen (name));
 711                         YYERROR;
 712                 }
 713         }
 714         ;
 715
 716 function : STRING '(' arg_list ')' {
 717                 char const *name = value_peek_string ($1->constant.value);
 718                 GnmExpr const *f_call = (*state->convs->input.func) (
 719                         state->convs, state->pos->wb, name, $3);
 720
 721                 $$ = NULL;
 722                 if (f_call) {
 723                         /* We're done with the function name.  */
 724                         unregister_allocation ($1); gnm_expr_free ($1);
 725                         unregister_allocation ($3);
 726                         $$ = register_expr_allocation (f_call);
 727                 } else {
 728                         YYERROR;
 729                 }
 730         }
 731         ;
 732
 733 string_opt_quote : STRING
 734                  | QUOTED_STRING
 735                  ;
 736
 737 opt_sheet_sep    : SHEET_SEP
 738                  | ;
 739
 740 /* only used for names */
 741 workbookref : tok_WORKBOOKREF opt_sheet_sep
 742             | '[' string_opt_quote ']'  {
 743                 char const *wb_name = value_peek_string ($2->constant.value);
 744                 Workbook *ref_wb = state->pos
 745                         ? (state->pos->wb
 746                            ? state->pos->wb
 747                            : (state->pos->sheet
 748                               ? state->pos->sheet->workbook
 749                               : NULL))
 750                         : NULL;
 751                 Workbook *wb =
 752                         state->convs->input.external_wb (state->convs,
 753                                                          ref_wb,
 754                                                          wb_name);
 755
 756                 if (wb != NULL) {
 757                         unregister_allocation ($2); gnm_expr_free ($2);
 758                         $$ = wb;
 759                 } else {
 760                         /* kludge to produce better error messages
 761                          * we know that the last token read will be the ']'
 762                          * so subtract 1.
 763                          */
 764                         report_err (state, g_error_new (1, PERR_UNKNOWN_WORKBOOK,
 765                                 _("Unknown workbook '%s'"), wb_name),
 766                                 state->ptr - 1, strlen (wb_name));
 767                         YYERROR;
 768                 }
 769         }
 770         | '[' ']' {
 771                 /* Special syntax for global names shadowed by sheet names.  */
 772                 Workbook *wb = state->pos
 773                         ? (state->pos->wb
 774                            ? state->pos->wb
 775                            : (state->pos->sheet
 776                               ? state->pos->sheet->workbook
 777                               : NULL))
 778                         : NULL;
 779                 $$ = wb;
 780                 if (wb == NULL) {
 781                         report_err (state, g_error_new (1, PERR_UNKNOWN_WORKBOOK,
 782                                 _("Unknown workbook")),
 783                                 state->ptr - 1, 1);
 784                         YYERROR;
 785                 }
 786         }
 787         ;
 788
 789 /* does not need to handle 3d case.  this is only used for names.
 790  * 3d cell references are handled in the lexer
 791  */
 792 sheetref: string_opt_quote SHEET_SEP {
 793                 Sheet *sheet = parser_sheet_by_name (state->pos->wb, $1);
 794                 if (sheet != NULL) {
 795                         unregister_allocation ($1); gnm_expr_free ($1);
 796                         $$ = sheet;
 797                 } else {
 798                         YYERROR;
 799                 }
 800         }
 801         | workbookref string_opt_quote SHEET_SEP {
 802                 Workbook *wb = $1;
 803                 Sheet *sheet = parser_sheet_by_name (wb, $2);
 804                 if (sheet != NULL) {
 805                         unregister_allocation ($2); gnm_expr_free ($2);
 806                         $$ = sheet;
 807                 } else {
 808                         YYERROR;
 809                 }
 810         }
 811         ;
 812
 813 cellref:  RANGEREF { $$ = $1; }
 814         | function RANGE_SEP function {
 815                 $$ = build_range_ctor ($1, $3, NULL);
 816                 if ($$ == NULL) { YYERROR; }
 817         }
 818         | RANGEREF RANGE_SEP function {
 819                 $$ = build_range_ctor ($1, $3, $1);
 820                 if ($$ == NULL) { YYERROR; }
 821         }
 822         | function RANGE_SEP RANGEREF {
 823                 $$ = build_range_ctor ($1, $3, $3);
 824                 if ($$ == NULL) { YYERROR; }
 825         }
 826         | RANGEREF RANGE_SEP RANGEREF {
 827                 $$ = build_range_ctor ($1, $3, NULL);
 828                 if ($$ == NULL) { YYERROR; }
 829         }
 830         ;
 831
 832 arg_list: exp {
 833                 unregister_allocation ($1);
 834                 $$ = gnm_expr_list_prepend (NULL, $1);
 835                 register_expr_list_allocation ($$);
 836         }
 837         | exp ARG_SEP arg_list {
 838                 GSList *tmp = $3;
 839                 unregister_allocation ($3);
 840                 unregister_allocation ($1);
 841
 842                 if (tmp == NULL)
 843                         tmp = gnm_expr_list_prepend (NULL, gnm_expr_new_constant (value_new_empty ()));
 844
 845                 $$ = gnm_expr_list_prepend (tmp, $1);
 846                 register_expr_list_allocation ($$);
 847         }
 848         | ARG_SEP arg_list {
 849                 GSList *tmp = $2;
 850                 unregister_allocation ($2);
 851
 852                 if (tmp == NULL)
 853                         tmp = gnm_expr_list_prepend (NULL, gnm_expr_new_constant (value_new_empty ()));
 854
 855                 $$ = gnm_expr_list_prepend (tmp, gnm_expr_new_constant (value_new_empty ()));
 856                 register_expr_list_allocation ($$);
 857         }
 858         | { $$ = NULL; }
 859         ;
 860
 861 array_exp:     CONSTANT         { $$ = $1; }
 862          | '-' CONSTANT         {
 863                 GnmExpr *tmp = fold_negative_constant ($2);
 864                 if (!tmp) { YYERROR; }
 865                 $$ = tmp;
 866          }
 867          | '+' CONSTANT         {
 868                 GnmExpr *tmp = fold_positive_constant ($2);
 869                 if (!tmp) { YYERROR; }
 870                 $$ = tmp;
 871          }
 872          | string_opt_quote     { $$ = parse_string_as_value ($1); }
 873          ;
 874
 875
 876 array_row : { $$ = NULL; }
 877         | array_exp {
 878                 unregister_allocation ($1);
 879                 $$ = g_slist_prepend (NULL, $1);
 880                 register_expr_list_allocation ($$);
 881         }
 882         | array_exp ARRAY_COL_SEP array_row {
 883                 unregister_allocation ($3);
 884                 unregister_allocation ($1);
 885                 $$ = g_slist_prepend ($3, $1);
 886                 register_expr_list_allocation ($$);
 887         }
 888         ;
 889
 890 array_rows: array_row {
 891                 unregister_allocation ($1);
 892                 $$ = g_slist_prepend (NULL, $1);
 893                 register_expr_list_list_allocation ($$);
 894         }
 895         | array_row ARRAY_ROW_SEP array_rows {
 896                 unregister_allocation ($3);
 897                 unregister_allocation ($1);
 898                 $$ = g_slist_prepend ($3, $1);
 899                 register_expr_list_list_allocation ($$);
 900         }
 901         ;
 902
 903 %%
 904
 905 static char const *
 906 find_matching_close (char const *str, char const **res)
 907 {
 908         while (*str) {
 909                 if (*str == '(') {
 910                         char const *tmp = str;
 911                         str = find_matching_close (str + 1, res);
 912                         if (*str != ')' && *res == NULL) {
 913                                 *res = tmp;
 914                                 return str;
 915                         }
 916                         if (*str == 0)
 917                                 return str;
 918                 } else if (*str == ')')
 919                         return str;
 920                 else if (*str == '\'' || *str == '\"') {
 921                         GString *dummy = g_string_new (NULL);
 922                         char const *end = go_strunescape (dummy, str);
 923                         g_string_free (dummy, TRUE);
 924                         if (end == NULL)
 925                                 return str + strlen (str);
 926                         str = end;
 927                         continue; /* skip incrementing str */
 928                 }
 929                 str = g_utf8_next_char (str);
 930         }
 931
 932         return str;
 933 }
 934
 935 static inline int
 936 eat_space (ParserState *state, int res)
 937 {
 938         /* help the user by ignoring pointless spaces after an
 939          * arg_sep.  We know they are going to be errors and
 940          * the spaces can not be operators in this context */
 941         while (*state->ptr == ' ')
 942                 state->ptr++;
 943         return res;
 944 }
 945
 946 /*
 947  * Do we want to ignore space before a given character?
 948  */
 949 static gboolean
 950 ignore_space_before (gunichar c)
 951 {
 952         switch (c) {
 953         case '*': case '/': case '+': case '-': case '%': case '^': case '&':
 954         case '>': case '<': case '=':
 955         case ')':
 956         case '#':
 957         case '"': case '\'':  /* Refers to opening quote only.  */
 958         case UNICODE_LOGICAL_NOT_C:
 959         case UNICODE_LOGICAL_AND_C:
 960         case UNICODE_LOGICAL_OR_C:
 961         case UNICODE_MINUS_SIGN_C:
 962         case UNICODE_DIVISION_SLASH_C:
 963         case UNICODE_NOT_EQUAL_TO_C:
 964         case UNICODE_LESS_THAN_OR_EQUAL_TO_C:
 965         case UNICODE_GREATER_THAN_OR_EQUAL_TO_C:
 966         case 0:
 967                 return TRUE;
 968         default:
 969                 return FALSE;
 970         }
 971 }
 972
 973 /*
 974  * Do we want to ignore space after a given character?
 975  */
 976 static gboolean
 977 ignore_space_after (gunichar c)
 978 {
 979         switch (c) {
 980         case '*': case '/': case '+': case '-': case '%': case '^': case '&':
 981         case '>': case '<': case '=':
 982         case '(':
 983         case '"': case '\'':  /* Refers to closing quote only [not actually hit].  */
 984         case UNICODE_LOGICAL_NOT_C:
 985         case UNICODE_LOGICAL_AND_C:
 986         case UNICODE_LOGICAL_OR_C:
 987         case UNICODE_MINUS_SIGN_C:
 988         case UNICODE_DIVISION_SLASH_C:
 989         case UNICODE_NOT_EQUAL_TO_C:
 990         case UNICODE_LESS_THAN_OR_EQUAL_TO_C:
 991         case UNICODE_GREATER_THAN_OR_EQUAL_TO_C:
 992         case 0:
 993                 return TRUE;
 994         default:
 995                 return FALSE;
 996         }
 997 }
 998
 999 static gboolean
1000 open_paren (const char *p)
1001 {
1002         while (g_unichar_isspace (g_utf8_get_char (p)))
1003                 p = g_utf8_next_char (p);
1004         return *p == '(';
1005 }
1006
1007 static int
1008 yylex (void)
1009 {
1010         gunichar c, tmp;
1011         char const *start, *end;
1012         GnmRangeRef ref;
1013         gboolean is_number = FALSE;
1014         gboolean is_space = FALSE;
1015         gboolean error_token = FALSE;
1016
1017         /*
1018          * Some special logic to handle space as intersection char.
1019          * Any number of white space characters are treated as one
1020          * intersecton.
1021          *
1022          * Also, if we are not using space for that, drop spaces.
1023          */
1024         while (g_unichar_isspace (g_utf8_get_char (state->ptr))) {
1025                 state->ptr = g_utf8_next_char (state->ptr);
1026                 is_space = TRUE;
1027         }
1028         if (is_space && state->convs->intersection_char == ' ' &&
1029             !ignore_space_before (g_utf8_get_char (state->ptr)))
1030                 return RANGE_INTERSECT;
1031
1032         start = state->ptr;
1033         c = g_utf8_get_char (start);
1034         if (c == 0)
1035                 return 0;
1036         state->ptr = g_utf8_next_char (state->ptr);
1037
1038         if (c == state->convs->intersection_char)
1039                 return RANGE_INTERSECT;
1040
1041         if (c == '&' && state->convs->decode_ampersands) {
1042                 if (!strncmp (state->ptr, "amp;", 4)) {
1043                         state->ptr += 4;
1044                         return '&';
1045                 }
1046
1047                 if (!strncmp (state->ptr, "lt;", 3)) {
1048                         state->ptr += 3;
1049                         if (*state->ptr == '='){
1050                                 state->ptr++;
1051                                 return tok_LTE;
1052                         }
1053                         if (!strncmp (state->ptr, "&gt;", 4)) {
1054                                 state->ptr += 4;
1055                                 return tok_NE;
1056                         }
1057                         return '<';
1058                 }
1059                 if (!strncmp (state->ptr, "gt;", 3)) {
1060                         state->ptr += 3;
1061                         if (*state->ptr == '='){
1062                                 state->ptr++;
1063                                 return tok_GTE;
1064                         }
1065                         return '>';
1066                 }
1067                 if (!strncmp (state->ptr, "apos;", 5) ||
1068                     !strncmp (state->ptr, "quot;", 5)) {
1069                         char const *quotes_end;
1070                         char const *p;
1071                         char *string, *s;
1072                         GnmValue *v;
1073
1074                         if (*state->ptr == 'q') {
1075                                 quotes_end = "&quot;";
1076                                 c = '\"';
1077                         } else {
1078                                 quotes_end = "&apos;";
1079                                 c = '\'';
1080                         }
1081
1082                         state->ptr += 5;
1083                         p = state->ptr;
1084                         double_quote_loop :
1085                                 state->ptr = strstr (state->ptr, quotes_end);
1086                         if (!*state->ptr) {
1087                                 report_err (state, g_error_new (1, PERR_MISSING_CLOSING_QUOTE,
1088                                                                 _("Could not find matching closing quote")),
1089                                             p, 1);
1090                                 return INVALID_TOKEN;
1091                         }
1092                         if (!strncmp (state->ptr + 6, quotes_end, 6)) {
1093                                 state->ptr += 2 * 6;
1094                                 goto double_quote_loop;
1095                         }
1096
1097                         s = string = (char *) g_alloca (1 + state->ptr - p);
1098                         while (p != state->ptr) {
1099                                 if (*p == '&') {
1100                                         if (!strncmp (p, "&amp;", 5)) {
1101                                                 p += 5;
1102                                                 *s++ = '&';
1103                                                 continue;
1104                                         } else if (!strncmp (p, "&lt;", 4)) {
1105                                                 p += 4;
1106                                                 *s++ = '<';
1107                                                 continue;
1108                                         } else if (!strncmp (p, "&gt;", 4)) {
1109                                                 p += 4;
1110                                                 *s++ = '>';
1111                                                 continue;
1112                                         } else if (!strncmp (p, quotes_end, 6)) {
1113                                                 p += 12; /* two in a row is the escape mechanism */
1114                                                 *s++ = c;
1115                                                 continue;
1116                                         } else if (!strncmp (p, "&quot;", 6)) {
1117                                                 p += 6;
1118                                                 *s++ = '\"';
1119                                                 continue;
1120                                         } else if (!strncmp (p, "&apos;", 6)) {
1121                                                 p += 6;
1122                                                 *s++ = '\'';
1123                                                 continue;
1124                                         }
1125                                 }
1126                                 *s++ = *p++;
1127                         }
1128
1129                         *s = 0;
1130                         state->ptr += 6;
1131
1132                         v = value_new_string (string);
1133                         yylval.expr = register_expr_allocation (gnm_expr_new_constant (v));
1134                         return QUOTED_STRING;
1135                 }
1136         }
1137
1138         if (c == ':' && state->convs->range_sep_colon)
1139                 return eat_space (state, RANGE_SEP);
1140
1141         if (c == state->convs->sheet_name_sep)
1142                 return eat_space (state, SHEET_SEP);
1143
1144         if (c == '.' && *state->ptr == '.' && state->convs->range_sep_dotdot) {
1145                 state->ptr++;
1146                 return RANGE_SEP;
1147         }
1148
1149         if (c == '#' && state->convs->accept_hash_logicals) {
1150                 if (!strncmp (state->ptr, "NOT#", 4)) {
1151                         state->ptr += 4;
1152                         return eat_space (state, tok_NOT);
1153                 }
1154                 if (!strncmp (state->ptr, "AND#", 4)) {
1155                         state->ptr += 4;
1156                         return eat_space (state, tok_AND);
1157                 }
1158                 if (!strncmp (state->ptr, "OR#", 3)) {
1159                         state->ptr += 3;
1160                         return eat_space (state, tok_OR);
1161                 }
1162         }
1163
1164         if (c == state->arg_sep)
1165                 return eat_space (state, state->in_array ? state->in_array_sep_is : ARG_SEP);
1166         if (c == state->array_col_sep)
1167                 return eat_space (state, ARRAY_COL_SEP);
1168         if (c == state->array_row_sep)
1169                 return eat_space (state, ARRAY_ROW_SEP);
1170
1171         end = state->convs->input.range_ref (&ref, start,
1172                                              state->pos, state->convs);
1173         /*
1174          * In order to parse "LOG10(1024)" in sheets with more than ~8500
1175          * columns we do not consider anything a rangeref if it is followed
1176          * by an opening parenthesis.
1177          */
1178         if (start != end && !open_paren (end)) {
1179                 state->ptr = end;
1180                 if (invalid_sheet == ref.a.sheet) {
1181                         yylval.expr = register_expr_allocation
1182                                 (gnm_expr_new_constant
1183                                  (value_new_error_REF (NULL)));
1184                         return CONSTANT;
1185                 }
1186                 if (state->flags & GNM_EXPR_PARSE_FORCE_ABSOLUTE_REFERENCES) {
1187                         if (ref.a.col_relative) {
1188                                 ref.a.col += state->pos->eval.col;
1189                                 ref.a.col_relative = FALSE;
1190                         }
1191                         if (ref.b.col_relative) {
1192                                 ref.b.col += state->pos->eval.col;
1193                                 ref.b.col_relative = FALSE;
1194                         }
1195                         if (ref.a.row_relative) {
1196                                 ref.a.row += state->pos->eval.row;
1197                                 ref.a.row_relative = FALSE;
1198                         }
1199                         if (ref.b.row_relative) {
1200                                 ref.b.row += state->pos->eval.row;
1201                                 ref.b.row_relative = FALSE;
1202                         }
1203                 } else if (state->flags & GNM_EXPR_PARSE_FORCE_RELATIVE_REFERENCES) {
1204                         if (!ref.a.col_relative) {
1205                                 ref.a.col -= state->pos->eval.col;
1206                                 ref.a.col_relative = TRUE;
1207                         }
1208                         if (!ref.b.col_relative) {
1209                                 ref.b.col -= state->pos->eval.col;
1210                                 ref.b.col_relative = TRUE;
1211                         }
1212                         if (!ref.a.row_relative) {
1213                                 ref.a.row -= state->pos->eval.row;
1214                                 ref.a.row_relative = TRUE;
1215                         }
1216                         if (!ref.b.row_relative) {
1217                                 ref.b.row -= state->pos->eval.row;
1218                                 ref.b.row_relative = TRUE;
1219                         }
1220                 }
1221
1222                 if (ref.a.sheet == NULL && (state->flags & GNM_EXPR_PARSE_FORCE_EXPLICIT_SHEET_REFERENCES)) {
1223                         ref.a.sheet = state->pos->sheet;
1224                         if (ref.a.sheet == NULL) {
1225                                 report_err (state, g_error_new (1, PERR_SHEET_IS_REQUIRED,
1226                                         _("Sheet name is required")),
1227                                         state->ptr, 0);
1228                                 return INVALID_TOKEN;
1229                         }
1230                 }
1231
1232                 if ((ref.b.sheet == NULL || ref.b.sheet == ref.a.sheet) &&
1233                     ref.a.col           == ref.b.col &&
1234                     ref.a.col_relative  == ref.b.col_relative &&
1235                     ref.a.row           == ref.b.row &&
1236                     ref.a.row_relative  == ref.b.row_relative) {
1237                         yylval.expr = register_expr_allocation (gnm_expr_new_cellref (&ref.a));
1238                         return RANGEREF;
1239                 }
1240                 yylval.expr = register_expr_allocation (gnm_expr_new_constant (
1241                          value_new_cellrange_unsafe (&ref.a, &ref.b)));
1242                 return RANGEREF;
1243         }
1244
1245         /* Do NOT handle negative numbers here.  That has to be done in the
1246          * parser otherwise we mishandle A1-1 when it looks like
1247          * rangeref CONSTANT  */
1248         if (c == state->decimal_point) {
1249                 /* Could be a number or a stand alone  */
1250                 if (!g_unichar_isdigit (g_utf8_get_char (state->ptr)))
1251                         return c;
1252                 is_number = TRUE;
1253         }  else if (g_unichar_isdigit (c)) {
1254                 /* find the end of the first portion of the number */
1255                 do {
1256                         c = g_utf8_get_char (state->ptr);
1257                         state->ptr = g_utf8_next_char (state->ptr);
1258                 } while (g_unichar_isdigit (c));
1259                 is_number = TRUE;
1260         }
1261
1262         if (is_number) {
1263                 GnmValue *v = NULL;
1264
1265                 if (c == state->decimal_point || c == 'e' || c == 'E') {
1266                         /* This is a floating point number */
1267                         char *end;
1268                         gnm_float d;
1269
1270                         errno = 0;
1271                         d = gnm_utf8_strto (start, &end);
1272                         if (start == end) {
1273                                 g_warning ("%s is not a double, but was expected to be one", start);
1274                         }  else if (errno != ERANGE) {
1275                                 v = value_new_float (d);
1276                                 state->ptr = end;
1277                         } else if (c != 'e' && c != 'E') {
1278                                 report_err (state, g_error_new (1, PERR_OUT_OF_RANGE,
1279                                         _("The number is out of range")),
1280                                         state->ptr, end - start);
1281                                 return INVALID_TOKEN;
1282                         } else {
1283                                 /* For an exponent it's hard to highlight the
1284                                  * right region w/o it turning into an ugly
1285                                  * hack, for now the cursor is put at the end.
1286                                  */
1287                                 report_err (state, g_error_new (1, PERR_OUT_OF_RANGE,
1288                                         _("The number is out of range")),
1289                                         state->ptr, 0);
1290                                 return INVALID_TOKEN;
1291                         }
1292                 } else {
1293                         char *end;
1294                         long l;
1295
1296                         l = gnm_utf8_strtol (start, &end);
1297                         if (start == end) {
1298                                 g_warning ("%s is not an integer, but was expected to be one", start);
1299                         } else if (errno != ERANGE && l >= INT_MIN && l <= INT_MAX) {
1300                                 v = value_new_int (l);
1301                                 state->ptr = end;
1302                         } else {
1303                                 gnm_float d;
1304
1305                                 errno = 0;
1306                                 d = gnm_utf8_strto (start, &end);
1307                                 if (errno != ERANGE) {
1308                                         v = value_new_float (d);
1309                                         state->ptr = end;
1310                                 } else {
1311                                         report_err (state, g_error_new (1, PERR_OUT_OF_RANGE,
1312                                                 _("The number is out of range")),
1313                                                 state->ptr, end - start);
1314                                         return INVALID_TOKEN;
1315                                 }
1316                         }
1317                 }
1318
1319                 /* Very odd string,  Could be a bound problem.  Trigger an error */
1320                 if (v == NULL)
1321                         return c;
1322
1323                 yylval.expr = register_expr_allocation (gnm_expr_new_constant (v));
1324                 return CONSTANT;
1325         }
1326
1327         switch (c) {
1328         case '#':
1329                 if (state->ptr[0] != '"') {
1330                         while ((tmp = g_utf8_get_char (state->ptr)) != 0 &&
1331                                !g_unichar_isspace (tmp)) {
1332                                 state->ptr = g_utf8_next_char (state->ptr);
1333                                 if (tmp == '!' || tmp == '?' ||
1334                                 ((state->ptr - start) == 4 && 0 == strncmp (start, "#N/A", 4))) {
1335                                         GOString *name = go_string_new_nocopy (g_strndup (start, state->ptr - start));
1336                                         yylval.expr = register_expr_allocation
1337                                                 (gnm_expr_new_constant (
1338                                                         value_new_error_str (NULL, name)));
1339                                         go_string_unref (name);
1340                                         return CONSTANT;
1341                                 }
1342                         }
1343
1344                         report_err (state, g_error_new
1345                                     (1, PERR_UNEXPECTED_TOKEN,
1346                                      _("Improperly formatted error token")),
1347                                     state->ptr, state->ptr - start);
1348
1349                         return INVALID_TOKEN;
1350                 }
1351                 error_token = TRUE;
1352                 start++;
1353                 /* Fall through */
1354         case '\'':
1355         case '"': {
1356                 GString *s = g_string_new (NULL);
1357                 char const *end = state->convs->input.string (start, s, state->convs);
1358
1359                 if (end == NULL) {
1360                         size_t len = strlen (start);
1361                         g_string_free (s, TRUE);
1362                         report_err (state,
1363                                     g_error_new (1, PERR_MISSING_CLOSING_QUOTE,
1364                                                  _("Could not find matching closing quote")),
1365                                     start + len, len);
1366                         return INVALID_TOKEN;
1367                 }
1368
1369                 state->ptr = (char *)end;
1370
1371                 if (error_token) {
1372                         GnmValue *v = value_new_error (NULL, s->str);
1373                         yylval.expr = register_expr_allocation (gnm_expr_new_constant (v));
1374                         g_string_free (s, TRUE);
1375                         return eat_space (state, CONSTANT);
1376                 } else {
1377                         GnmValue *v = value_new_string_nocopy (g_string_free (s, FALSE));
1378                         yylval.expr = register_expr_allocation (gnm_expr_new_constant (v));
1379                         return eat_space (state, QUOTED_STRING);
1380                 }
1381         }
1382
1383         case '[': {
1384                 const char *p = state->ptr;
1385                 GString *s = g_string_new (NULL);
1386                 Workbook *ref_wb = state->pos
1387                         ? (state->pos->wb
1388                            ? state->pos->wb
1389                            : (state->pos->sheet
1390                               ? state->pos->sheet->workbook
1391                               : NULL))
1392                         : NULL;
1393
1394                 while (g_unichar_isspace (g_utf8_get_char (p)))
1395                         p = g_utf8_next_char (p);
1396
1397                 if (p[0] == '"' || p[0] == '\'') {
1398                         p = go_strunescape (s, p);
1399                 } else {
1400                         gunichar uc;
1401                         while (1) {
1402                                 uc = g_utf8_get_char (p);
1403                                 if (!uc || uc == ']' || g_unichar_isspace (uc))
1404                                         break;
1405                                 p = g_utf8_next_char (p);
1406                                 g_string_append_unichar (s, uc);
1407                         }
1408                 }
1409
1410                 while (p && g_unichar_isspace (g_utf8_get_char (p)))
1411                         p = g_utf8_next_char (p);
1412
1413                 if (s->len == 0 || !p || p[0] != ']') {
1414                         g_string_free (s, TRUE);
1415                         break;
1416                 }
1417
1418                 yylval.wb = state->convs->input.external_wb (state->convs,
1419                                                              ref_wb,
1420                                                              s->str);
1421                 g_string_free (s, TRUE);
1422                 if (!yylval.wb)
1423                         break;
1424
1425                 state->ptr = p + 1;
1426                 return tok_WORKBOOKREF;
1427         }
1428         }
1429
1430         if ((end = state->convs->input.name (start, state->convs))) {
1431                 state->ptr = end;
1432                 yylval.expr = register_expr_allocation (gnm_expr_new_constant (
1433                         value_new_string_nocopy (g_strndup (start, state->ptr - start))));
1434                 return STRING;
1435         }
1436
1437         switch (c) {
1438         case '<':
1439                 if (*state->ptr == '='){
1440                         state->ptr++;
1441                         return eat_space (state, tok_LTE);
1442                 }
1443                 if (*state->ptr == '>'){
1444                         state->ptr++;
1445                         return eat_space (state, tok_NE);
1446                 }
1447                 return eat_space (state, c);
1448
1449         case '>':
1450                 if (*state->ptr == '='){
1451                         state->ptr++;
1452                         return eat_space (state, tok_GTE);
1453                 }
1454                 return eat_space (state, c);
1455
1456         case '\n': return 0;
1457
1458         case '{' :
1459                 state->in_array++;
1460                 return c;
1461         case '}' :
1462                 state->in_array--;
1463                 return c;
1464
1465         case '^':
1466                 return state->convs->exp_is_left_associative
1467                         ? tok_LEFT_EXP
1468                         : tok_RIGHT_EXP;
1469
1470         case UNICODE_LOGICAL_NOT_C: return tok_NOT;
1471         case UNICODE_MINUS_SIGN_C: return '-';
1472         case UNICODE_DIVISION_SLASH_C: return '/';
1473         case UNICODE_LOGICAL_AND_C: return tok_AND;
1474         case UNICODE_LOGICAL_OR_C: return tok_OR;
1475         case UNICODE_NOT_EQUAL_TO_C: return eat_space (state, tok_NE);
1476         case UNICODE_LESS_THAN_OR_EQUAL_TO_C: return eat_space (state, tok_LTE);
1477         case UNICODE_GREATER_THAN_OR_EQUAL_TO_C: return eat_space (state, tok_GTE);
1478         }
1479
1480         if (ignore_space_after (c))
1481                 return eat_space (state, c);
1482         else
1483                 return c;
1484 }
1485
1486 int
1487 yyerror (char const *s)
1488 {
1489 #if 0
1490         g_printerr ("Error: %s\n", s);
1491 #endif
1492         return 0;
1493 }
1494
1495 static void
1496 setup_state (ParserState *pstate, const char *str,
1497              GnmParsePos const *pp,
1498              GnmExprParseFlags flags,
1499              GnmConventions const *convs,
1500              GnmParseError *error)
1501 {
1502         pstate->start = pstate->ptr = str;
1503         pstate->pos   = pp;
1504
1505         pstate->flags           = flags;
1506         pstate->convs                                    =
1507                 (NULL != convs) ? convs : ((NULL != pp->sheet) ? pp->sheet->convs : gnm_conventions_default);
1508
1509
1510         pstate->decimal_point = pstate->convs->decimal_sep_dot
1511                 ? '.'
1512                 : g_utf8_get_char (go_locale_get_decimal ()->str); /* FIXME: one char handled.  */
1513
1514         if (pstate->convs->arg_sep != 0)
1515                 pstate->arg_sep = pstate->convs->arg_sep;
1516         else
1517                 pstate->arg_sep = go_locale_get_arg_sep ();
1518         if (pstate->convs->array_col_sep != 0)
1519                 pstate->array_col_sep = pstate->convs->array_col_sep;
1520         else
1521                 pstate->array_col_sep = go_locale_get_col_sep ();
1522         if (pstate->convs->array_row_sep != 0)
1523                 pstate->array_row_sep = pstate->convs->array_row_sep;
1524         else
1525                 pstate->array_row_sep = go_locale_get_row_sep ();
1526
1527         /* Some locales/conventions have ARG_SEP == ARRAY_ROW_SEP
1528          *      eg {1\2\3;4\5\6} for XL style with ',' as a decimal
1529          * some have ARG_SEP == ARRAY_COL_SEPARATOR
1530          *      eg {1,2,3;4,5,6} for XL style with '.' as a decimal
1531          *      or {1;2;3|4;5;6} for OOo/
1532          * keep track of whether we are in an array to allow the lexer to
1533          * dis-ambiguate. */
1534         if (pstate->arg_sep == pstate->array_col_sep)
1535                 pstate->in_array_sep_is = ARRAY_COL_SEP;
1536         else if (pstate->arg_sep == pstate->array_row_sep)
1537                 pstate->in_array_sep_is = ARRAY_ROW_SEP;
1538         else
1539                 pstate->in_array_sep_is = ARG_SEP;
1540         pstate->in_array = 0;
1541
1542         pstate->result = NULL;
1543         pstate->error = error;
1544
1545         state = pstate;
1546 }
1547
1548 /**
1549  * gnm_expr_parse_str:
1550  *
1551  * @str   : The string to parse.
1552  * @pp    : #GnmParsePos
1553  * @flags : See parse-utils for descriptions
1554  * @convs : optionally NULL #GnmConventions
1555  * @error : optionally NULL ptr to store details of error.
1556  *
1557  * Parse a string. if @error is non-null it will be assumed that the
1558  * caller has passed a pointer to a GnmParseError struct AND that it will
1559  * take responsibility for freeing that struct and its contents.
1560  * with parse_error_free.
1561  * If @convs is NULL use the conventions from @pp.
1562  **/
1563 GnmExprTop const *
1564 gnm_expr_parse_str (char const *str, GnmParsePos const *pp,
1565                     GnmExprParseFlags flags,
1566                     GnmConventions const *convs,
1567                     GnmParseError *error)
1568 {
1569         GnmExpr const *expr;
1570         ParserState pstate;
1571
1572         g_return_val_if_fail (str != NULL, NULL);
1573         g_return_val_if_fail (pp != NULL, NULL);
1574         g_return_val_if_fail (state == NULL, NULL);
1575
1576         if (deallocate_stack == NULL)
1577                 deallocate_init ();
1578
1579         setup_state (&pstate, str, pp, flags, convs, error);
1580         yyparse ();
1581         state = NULL;
1582
1583         if (pstate.result != NULL) {
1584                 deallocate_assert_empty ();
1585
1586 #if 0
1587                 /* If this happens, something is very wrong */
1588                 if (pstate.error != NULL && pstate.error->message != NULL) {
1589                         g_warning ("An error occurred and the GnmExpr is non-null! This should not happen");
1590                         g_warning ("Error message is %s (%d, %d)", pstate.error->message, pstate.error->begin_char,
1591                                         pstate.error->end_char);
1592                 }
1593 #endif
1594
1595                 /* Do we have multiple expressions */
1596                 if (pstate.result->next != NULL) {
1597                         if (flags & GNM_EXPR_PARSE_PERMIT_MULTIPLE_EXPRESSIONS)
1598                                 expr = gnm_expr_new_set (g_slist_reverse (pstate.result));
1599                         else {
1600                                 gnm_expr_list_unref (pstate.result);
1601                                 report_err (&pstate, g_error_new (1, PERR_MULTIPLE_EXPRESSIONS,
1602                                         _("Multiple expressions are not supported in this context")),
1603                                         pstate.start,
1604                                         (pstate.ptr - pstate.start));
1605                                 expr = NULL;
1606                         }
1607                 } else {
1608                         /* Free the list, do not unref the content */
1609                         expr = pstate.result->data;
1610                         gnm_expr_list_free (pstate.result);
1611                 }
1612         } else {
1613                 /* If there is no error message, attempt to be more detailed */
1614                 if (pstate.error != NULL &&
1615                     (pstate.error->err == NULL || pstate.error->err->message == NULL)) {
1616                         char const *last_token = pstate.ptr;
1617
1618                         if (*last_token == '\0') {
1619                                 char const *str = pstate.start;
1620                                 char const *res = NULL;
1621                                 char const *last = find_matching_close (str, &res);
1622
1623                                 if (*last)
1624                                         report_err (&pstate, g_error_new (1, PERR_MISSING_PAREN_OPEN,
1625                                                 _("Could not find matching opening parenthesis")),
1626                                                 last, 1);
1627                                 else if (res != NULL)
1628                                         report_err (&pstate, g_error_new (1, PERR_MISSING_PAREN_CLOSE,
1629                                                 _("Could not find matching closing parenthesis")),
1630                                                 res, 1);
1631                                 else
1632                                         report_err (&pstate, g_error_new (1, PERR_INVALID_EXPRESSION,
1633                                                 _("Invalid expression")),
1634                                                 pstate.ptr, pstate.ptr - pstate.start);
1635                         } else
1636                                 report_err (&pstate, g_error_new (1, PERR_UNEXPECTED_TOKEN,
1637                                         _("Unexpected token %c"), *last_token),
1638                                         last_token, 1);
1639                 }
1640
1641                 deallocate_all ();
1642
1643                 expr = NULL;
1644         }
1645
1646         deallocate_uninit ();
1647
1648         return gnm_expr_top_new (expr);
1649 }
1650
1651 GnmLexerItem *
1652 gnm_expr_lex_all (char const *str, GnmParsePos const *pp,
1653                   GnmExprParseFlags flags,
1654                   GnmConventions const *convs)
1655 {
1656         GnmLexerItem *res = NULL;
1657         int n = 0, alloc = 0;
1658         ParserState pstate;
1659         GnmParseError *error = NULL;
1660
1661         g_return_val_if_fail (str != NULL, NULL);
1662         g_return_val_if_fail (pp != NULL, NULL);
1663
1664         if (deallocate_stack == NULL)
1665                 deallocate_init ();
1666
1667         setup_state (&pstate, str, pp, flags, convs, error);
1668
1669         while (1) {
1670                 int len;
1671
1672                 if (alloc <= n) {
1673                         alloc = alloc * 2 + 20;
1674                         res = g_renew (GnmLexerItem, res, alloc);
1675                 }
1676
1677                 res[n].start = pstate.ptr - pstate.start;
1678                 res[n].token = yylex ();
1679                 res[n].end = pstate.ptr - pstate.start;
1680
1681                 if (res[n].token == 0)
1682                         break;
1683
1684                 len = res[n].end - res[n].start;
1685                 /* Kill spaces that got eaten, but not a space operator */
1686                 while (len > 1 && str[res[n].start] == ' ') {
1687                         res[n].start++;
1688                         len--;
1689                 }
1690                 while (len > 1 && str[res[n].end - 1] == ' ') {
1691                         res[n].end--;
1692                         len--;
1693                 }
1694
1695                 n++;
1696         }
1697
1698         deallocate_all ();
1699
1700         state = NULL;
1701
1702         return res;
1703 }