src/parser.y

   1 %{
   2 /*
   3  * Gnumeric Parser
   4  *
   5  * (C) 1998-2002 GNOME Foundation
   6  * Copyright (C) 2002-2009 Morten Welinder
   7  *
   8  * Authors:
   9  *    Miguel de Icaza (miguel@gnu.org)
  10  *    Jody Goldberg (jody@gnome.org)
  11  *    Morten Welinder (terra@diku.dk)
  12  *    Almer S. Tigelaar (almer@gnome.org)
  13  */
  14 #include <gnumeric-config.h>
  15 #include <glib/gi18n-lib.h>
  16 #include <gnumeric.h>
  17 #include <number-match.h>
  18 #include <expr.h>
  19 #include <expr-impl.h>
  20 #include <expr-name.h>
  21 #include <func.h>
  22 #include <workbook.h>
  23 #include <sheet.h>
  24 #include <gnm-format.h>
  25 #include <application.h>
  26 #include <parse-util.h>
  27 #include <gutils.h>
  28 #include <style.h>
  29 #include <value.h>
  30 #include <goffice/goffice.h>
  31
  32 #include <string.h>
  33 #include <errno.h>
  34 #include <stdlib.h>
  35
  36 #define YYDEBUG 1
  37
  38 /* ------------------------------------------------------------------------- */
  39 /* Allocation with disposal-on-error */
  40
  41 /*
  42  * If some dork enters "=1+2+2*(1+" we have already allocated space for
  43  * "1+2", "2", and "1" before the parser sees the syntax error and warps
  44  * us to the error production in the "line" non-terminal.
  45  *
  46  * To make sure we can clean up, we register every allocation.  On success,
  47  * nothing should be left (except the final expression which is unregistered),
  48  * but on failure we must free everything allocated.
  49  *
  50  * Note: there is some room left for optimisation here.  Talk to terra@diku.dk
  51  * before you set out to do it.
  52  */
  53
  54 static void
  55 free_expr_list_list (GSList *list)
  56 {
  57         GSList *l;
  58         for (l = list; l; l = l->next)
  59                 gnm_expr_list_unref (l->data);
  60         g_slist_free (list);
  61 }
  62
  63 typedef void (*ParseDeallocator) (void *);
  64 static GPtrArray *deallocate_stack;
  65
  66 static void
  67 deallocate_init (void)
  68 {
  69         deallocate_stack = g_ptr_array_new ();
  70 }
  71
  72 static void
  73 deallocate_uninit (void)
  74 {
  75         g_ptr_array_free (deallocate_stack, TRUE);
  76         deallocate_stack = NULL;
  77 }
  78
  79 static void
  80 deallocate_all (void)
  81 {
  82         int i;
  83
  84         for (i = 0; i < (int)deallocate_stack->len; i += 2) {
  85                 ParseDeallocator freer = g_ptr_array_index (deallocate_stack, i + 1);
  86                 freer (g_ptr_array_index (deallocate_stack, i));
  87         }
  88
  89         g_ptr_array_set_size (deallocate_stack, 0);
  90 }
  91
  92 static void
  93 deallocate_assert_empty (void)
  94 {
  95         if (deallocate_stack->len == 0)
  96                 return;
  97
  98         g_warning ("deallocate_stack not empty as expected.");
  99         deallocate_all ();
 100 }
 101
 102 static void *
 103 register_allocation (gpointer data, ParseDeallocator freer)
 104 {
 105         /* It's handy to be able to register and unregister NULLs.  */
 106         if (data) {
 107                 int len;
 108                 /*
 109                  * There are really only a few different freers, so we
 110                  * could encode the freer in the lower bits of the data
 111                  * pointer.  Unfortunately, no-one can predict how high
 112                  * Miguel would jump when he found out.
 113                  */
 114                 len = deallocate_stack->len;
 115                 g_ptr_array_set_size (deallocate_stack, len + 2);
 116                 g_ptr_array_index (deallocate_stack, len) = data;
 117                 g_ptr_array_index (deallocate_stack, len + 1) = freer;
 118         }
 119
 120         /* Returning the pointer here improved readability of the caller.  */
 121         return data;
 122 }
 123
 124 #define register_expr_allocation(expr) \
 125   register_allocation ((gpointer)(expr), (ParseDeallocator)&gnm_expr_free)
 126
 127 #define register_expr_list_allocation(list) \
 128   register_allocation ((list), (ParseDeallocator)&gnm_expr_list_unref)
 129
 130 #define register_expr_list_list_allocation(list) \
 131   register_allocation ((list), (ParseDeallocator)&free_expr_list_list)
 132
 133 static void
 134 unregister_allocation (void const *data)
 135 {
 136         int i, pos;
 137
 138         /* It's handy to be able to register and unregister NULLs.  */
 139         if (!data)
 140                 return;
 141
 142         pos = deallocate_stack->len - 2;
 143         if (pos >= 0 && data == g_ptr_array_index (deallocate_stack, pos)) {
 144                 g_ptr_array_set_size (deallocate_stack, pos);
 145                 return;
 146         }
 147
 148         /*
 149          * Bummer.  In certain error cases, it is possible that the parser
 150          * will reduce after it has discovered a token that will lead to an
 151          * error.  "2/16/1800 00:00" (without the quotes) is an example.
 152          * The first "00" is registered before the second division is
 153          * reduced.
 154          *
 155          * Another example is 564077 where we deallocate out of order.
 156          *
 157          * This isn't a big deal -- we will just look at the entries below
 158          * the top.
 159          */
 160         for (i = pos - 2; i >= 0; i -= 2) {
 161                 if (data == g_ptr_array_index (deallocate_stack, i)) {
 162                         g_ptr_array_remove_index (deallocate_stack, i);
 163                         g_ptr_array_remove_index (deallocate_stack, i);
 164                         return;
 165                 }
 166         }
 167
 168         g_warning ("Unbalanced allocation registration");
 169 }
 170
 171 /* ------------------------------------------------------------------------- */
 172
 173 /* Bison/Yacc internals */
 174 static int yylex (void);
 175 static int yyerror (char const *s);
 176
 177 typedef struct {
 178         char const *ptr;        /* current position of the lexer */
 179         char const *start;      /* start of the expression */
 180
 181         /* Location where the parsing is taking place */
 182         GnmParsePos const *pos;
 183
 184         /* loaded from convs with locale specific mappings */
 185         gunichar decimal_point;
 186         gunichar arg_sep;
 187         gunichar union_char;
 188         gunichar array_col_sep;
 189         gunichar array_row_sep;
 190         /* if arg_sep conflicts with array_col_sep or array_row_sep */
 191         int in_array_sep_is;    /* token id */
 192
 193         GnmExprParseFlags     flags;
 194         GnmConventions const *convs;
 195
 196         /* dynamic state */
 197         int in_array; /* toggled in the lexer for '{' and '}' */
 198         GnmExprList *result;
 199
 200         GnmParseError *error;
 201 } ParserState;
 202
 203 /* The error returned from the */
 204 static ParserState *state;
 205
 206 static void
 207 report_err (ParserState *state, GError *err,
 208             char const *last, int guesstimate_of_length)
 209 {
 210         if (state->error != NULL) {
 211                 state->error->err        = err;
 212                 state->error->end_char   = last - state->start;
 213                 state->error->begin_char = state->error->end_char - guesstimate_of_length;
 214                 if (state->error->begin_char < 0)
 215                         state->error->begin_char = 0;
 216         } else
 217                 g_error_free (err);
 218 }
 219
 220 static gboolean
 221 is_signed (const GnmExpr *expr)
 222 {
 223         if (GNM_EXPR_GET_OPER (expr) == GNM_EXPR_OP_UNARY_NEG)
 224                 return TRUE;
 225
 226         if (GNM_EXPR_GET_OPER (expr) == GNM_EXPR_OP_UNARY_PLUS)
 227                 return TRUE;
 228
 229         if (GNM_EXPR_GET_OPER (expr) == GNM_EXPR_OP_CONSTANT) {
 230                 GnmValue const *v = expr->constant.value;
 231                 return VALUE_IS_FLOAT (v) && value_get_as_float (v) < 0;
 232         }
 233
 234         return FALSE;
 235 }
 236
 237 /* Handle -cst for use in arrays.  Don't handle other types here.  */
 238 static GnmExpr *
 239 fold_negative_constant (GnmExpr *expr)
 240 {
 241         if (expr && GNM_EXPR_GET_OPER (expr) == GNM_EXPR_OP_CONSTANT) {
 242                 GnmValue *v = (GnmValue *)expr->constant.value;
 243
 244                 if (VALUE_IS_FLOAT (v)) {
 245                         gnm_float f = value_get_as_float (v);
 246                         expr->constant.value = value_new_float (0 - f);
 247                         value_release (v);
 248                         return expr;
 249                 }
 250         }
 251
 252         return NULL;
 253 }
 254
 255 /* Handle +cst for use in arrays.  Don't handle other types here.  */
 256 static GnmExpr *
 257 fold_positive_constant (GnmExpr *expr)
 258 {
 259         if (expr && GNM_EXPR_GET_OPER (expr) == GNM_EXPR_OP_CONSTANT) {
 260                 const GnmValue *v = expr->constant.value;
 261                 if (VALUE_IS_FLOAT (v))
 262                         return expr;
 263         }
 264
 265         return NULL;
 266 }
 267
 268 static GnmExpr *
 269 build_unary_op (GnmExprOp op, GnmExpr *expr)
 270 {
 271         if (!expr) return NULL;
 272
 273         unregister_allocation (expr);
 274         return register_expr_allocation (gnm_expr_new_unary (op, expr));
 275 }
 276
 277 static GnmExpr *
 278 build_binop (GnmExpr *l, GnmExprOp op, GnmExpr *r)
 279 {
 280         if (!l || !r) return NULL;
 281
 282         unregister_allocation (r);
 283         unregister_allocation (l);
 284         return register_expr_allocation (gnm_expr_new_binary (l, op, r));
 285 }
 286
 287 static GnmExpr *
 288 build_logical (GnmExpr *l, gboolean is_and, GnmExpr *r)
 289 {
 290         static GnmFunc *and_func = NULL, *or_func = NULL;
 291
 292         if (!l || !r) return NULL;
 293
 294         if (and_func == NULL)
 295                 and_func = gnm_func_lookup ("AND", NULL);
 296         if (or_func == NULL)
 297                 or_func = gnm_func_lookup ("OR", NULL);
 298
 299         unregister_allocation (r);
 300         unregister_allocation (l);
 301         return register_expr_allocation
 302                 (gnm_expr_new_funcall2 (is_and ? and_func : or_func, l, r));
 303 }
 304
 305 static GnmExpr *
 306 build_not (GnmExpr *expr)
 307 {
 308         static GnmFunc *not_func = NULL;
 309
 310         if (!expr) return NULL;
 311
 312         if (not_func == NULL)
 313                 not_func = gnm_func_lookup ("NOT", NULL);
 314         unregister_allocation (expr);
 315         return register_expr_allocation
 316                 (gnm_expr_new_funcall1 (not_func, expr));
 317 }
 318
 319 static GnmExpr *
 320 build_exp (GnmExpr *l, GnmExpr *r)
 321 {
 322         if (is_signed (l)) {
 323                 /* See bug 115941 */
 324                 l = build_unary_op (GNM_EXPR_OP_PAREN, l);
 325         }
 326
 327         if (GNM_EXPR_GET_OPER (l) == GNM_EXPR_OP_EXP) {
 328                 /* Add ()s to x^y^z */
 329                 l = build_unary_op (GNM_EXPR_OP_PAREN, l);
 330         }
 331
 332         if (GNM_EXPR_GET_OPER (r) == GNM_EXPR_OP_EXP) {
 333                 /* Add ()s to x^y^z */
 334                 r = build_unary_op (GNM_EXPR_OP_PAREN, r);
 335         }
 336
 337         return build_binop (l, GNM_EXPR_OP_EXP, r);
 338 }
 339
 340 /*
 341  * Build an array expression.
 342  *
 343  * Returns NULL on failure.  Caller must YYERROR in that case.
 344  */
 345 static GnmExpr *
 346 build_array (GSList *cols)
 347 {
 348         GnmValue *array;
 349         int mx, y;
 350
 351         if (!cols) {
 352                 report_err (state, g_error_new (1, PERR_INVALID_EMPTY,
 353                         _("An array must have at least 1 element")),
 354                         state->ptr, 0);
 355                 return NULL;
 356         }
 357
 358         mx = g_list_length (cols->data);
 359         array = value_new_array_empty (mx, g_slist_length (cols));
 360
 361         y = 0;
 362         while (cols) {
 363                 GSList *row = cols->data;
 364                 int x = 0;
 365                 while (row && x < mx) {
 366                         GnmExpr const *expr = row->data;
 367                         GnmValue const *v = expr->constant.value;
 368
 369                         g_assert (expr && GNM_EXPR_GET_OPER (expr) == GNM_EXPR_OP_CONSTANT);
 370
 371                         value_array_set (array, x, y, value_dup (v));
 372
 373                         x++;
 374                         row = row->next;
 375                 }
 376                 if (x < mx || row) {
 377                         /* parser_error = PARSE_ERR_SYNTAX; */
 378                         report_err (state, g_error_new (1, PERR_ASYMETRIC_ARRAY,
 379                                 _("Arrays must be rectangular")),
 380                                 state->ptr, 0);
 381                         value_release (array);
 382                         return NULL;
 383                 }
 384                 y++;
 385                 cols = cols->next;
 386         }
 387
 388         return register_expr_allocation (gnm_expr_new_constant (array));
 389 }
 390
 391 /*
 392  * Build a range constructor.
 393  *
 394  * Returns NULL on failure.  Caller must YYERROR in that case.
 395  */
 396 static GnmExpr *
 397 build_range_ctor (GnmExpr *l, GnmExpr *r, GnmExpr *validate)
 398 {
 399         if (!l || !r) return NULL;
 400
 401         if (validate != NULL) {
 402                 if (GNM_EXPR_GET_OPER (validate) != GNM_EXPR_OP_CELLREF ||
 403                     validate->cellref.ref.sheet != NULL) {
 404                         report_err (state, g_error_new (1, PERR_UNEXPECTED_TOKEN,
 405                                 _("Constructed ranges use simple references")),
 406                                 state->ptr, 0);
 407                         return NULL;
 408                     }
 409         }
 410
 411         unregister_allocation (r);
 412         unregister_allocation (l);
 413         return register_expr_allocation (gnm_expr_new_range_ctor (l, r));
 414 }
 415
 416 /*
 417  * Build an intersection expression.
 418  *
 419  * Returns NULL on failure.  Caller must YYERROR in that case.
 420  */
 421 static GnmExpr *
 422 build_intersect (GnmExpr *l, GnmExpr *r)
 423 {
 424         if (!l || !r) return NULL;
 425
 426         if (gnm_expr_is_rangeref (l) && gnm_expr_is_rangeref (r))
 427                 return build_binop (l, GNM_EXPR_OP_INTERSECT, r);
 428         report_err (state, g_error_new (1, PERR_SET_CONTENT_MUST_BE_RANGE,
 429                 _("All entries in the set must be references")),
 430                 state->ptr, 0);
 431         return NULL;
 432 }
 433
 434 /*
 435  * Build a set expression.
 436  *
 437  * Returns NULL on failure.  Caller must YYERROR in that case.
 438  */
 439 static GnmExpr *
 440 build_set (GnmExprList *list)
 441 {
 442         /* verify that every thing is a ref */
 443         GnmExprList *ptr;
 444         for (ptr = list; ptr != NULL ; ptr = ptr->next) {
 445                 GnmExpr const *expr = ptr->data;
 446                 if (!expr || !gnm_expr_is_rangeref (expr)) {
 447                         report_err (state, g_error_new (1, PERR_SET_CONTENT_MUST_BE_RANGE,
 448                                 _("All entries in the set must be references")),
 449                                 state->ptr, 0);
 450                         return NULL;
 451                 }
 452         }
 453
 454         unregister_allocation (list);
 455         return register_expr_allocation (gnm_expr_new_set (list));
 456 }
 457
 458 /**
 459  * parse_string_as_value:
 460  *
 461  * Try to parse the entered text as a basic value (empty, bool, int,
 462  * gnm_float, err) if this succeeds, we store this as a GnmValue otherwise, we
 463  * return a string.
 464  */
 465 static GnmExpr *
 466 parse_string_as_value (GnmExpr *str)
 467 {
 468         GnmValue *v = format_match_simple (value_peek_string (str->constant.value));
 469
 470         if (v != NULL) {
 471                 unregister_allocation (str);
 472                 gnm_expr_free (str);
 473                 return register_expr_allocation (gnm_expr_new_constant (v));
 474         }
 475         return str;
 476 }
 477
 478 static const GnmExpr *
 479 parser_simple_name (const char *str, Sheet *sheet)
 480 {
 481         GnmExpr const *res;
 482         GnmNamedExpr *nexpr;
 483
 484         if (sheet) {
 485                 GnmParsePos pp;
 486                 parse_pos_init_sheet (&pp, sheet);
 487                 nexpr = expr_name_lookup (&pp, str);
 488         } else
 489                 nexpr = expr_name_lookup (state->pos, str);
 490
 491         if (nexpr == NULL) {
 492                 if (state->flags & GNM_EXPR_PARSE_UNKNOWN_NAMES_ARE_INVALID) {
 493                         GError *e;
 494                         e = sheet
 495                                 ? g_error_new (1, PERR_UNKNOWN_NAME,
 496                                                _("Name '%s' does not exist in sheet '%s'"),
 497                                                str, sheet->name_quoted)
 498                                 : g_error_new (1, PERR_UNKNOWN_NAME,
 499                                                _("Name '%s' does not exist"),
 500                                                str);
 501                         report_err (state, e, state->ptr, 0);
 502                         res = NULL;
 503                 } else if (!sheet && state->flags & GNM_EXPR_PARSE_UNKNOWN_NAMES_ARE_STRINGS) {
 504                         res = gnm_expr_new_constant (value_new_string (str));
 505                 } else if (state->convs->input.name_validate (str)) {
 506                         GnmParsePos pp = *state->pos;
 507                         pp.sheet = sheet;
 508                         /* Create a place holder */
 509                         nexpr = expr_name_add (&pp, str, NULL, NULL, TRUE, NULL);
 510                         res = gnm_expr_new_name (nexpr, sheet, NULL);
 511                 } else {
 512                         report_err (state, g_error_new (1, PERR_UNKNOWN_NAME,
 513                                                         _("'%s' cannot be used as a name"),
 514                                                         str),
 515                                     state->ptr, 0);
 516                         res = NULL;
 517                 }
 518         } else
 519                 res = gnm_expr_new_name (nexpr, sheet, NULL);
 520
 521         return res;
 522 }
 523
 524 /**
 525  * parser_simple_val_or_name:
 526  * @str : An expression with oper constant, whose value is a string.
 527  *
 528  * Check to see if a string is a simple value or failing that a named
 529  * expression, if it is not create a placeholder name for it.
 530  */
 531 static GnmExpr *
 532 parser_simple_val_or_name (GnmExpr *str_expr)
 533 {
 534         GnmExpr const *res;
 535         char const *str = value_peek_string (str_expr->constant.value);
 536         GnmValue *v = format_match_simple (str);
 537
 538         /* if it is not a simple value see if it is a name */
 539         if (v == NULL) {
 540                 res = parser_simple_name (str, NULL);
 541         } else
 542                 res = gnm_expr_new_constant (v);
 543
 544         unregister_allocation (str_expr);
 545         gnm_expr_free (str_expr);
 546         return register_expr_allocation (res);
 547 }
 548
 549 static Sheet *
 550 parser_sheet_by_name (Workbook *wb, GnmExpr *name_expr)
 551 {
 552         char const *name = value_peek_string (name_expr->constant.value);
 553         Sheet *sheet = NULL;
 554
 555         if (wb == NULL)
 556                 return NULL;
 557
 558         sheet = workbook_sheet_by_name (wb, name);
 559
 560         /* Applix has absolute and relative sheet references */
 561         if (sheet == NULL && *name == '$' &&
 562             state->convs->allow_absolute_sheet_references)
 563                 sheet = workbook_sheet_by_name (wb, name + 1);
 564
 565         if (sheet == NULL)
 566                 /* TODO : length is broken in the context of quoted names or
 567                  * names with escaped character */
 568                 /* -1 is a kludge.  We know that this routine is only called
 569                  * when the last token was SHEET_SEP */
 570                 report_err (state, g_error_new (1, PERR_UNKNOWN_SHEET,
 571                         _("Unknown sheet '%s'"), name),
 572                         state->ptr-1, strlen (name));
 573
 574         return sheet;
 575 }
 576
 577 /* Make byacc happier */
 578 static int yyparse (void);
 579
 580 %}
 581
 582 %union {
 583         GnmExpr         *expr;
 584         GnmValue        *value;
 585         GnmCellRef      *cell;
 586         GnmExprList     *list;
 587         Sheet           *sheet;
 588         Workbook        *wb;
 589 }
 590 %type  <list>   opt_exp arg_list array_row array_rows
 591 %type  <expr>   exp array_exp function string_opt_quote cellref
 592 %token <expr>   STRING QUOTED_STRING CONSTANT RANGEREF tok_GTE tok_LTE tok_NE tok_AND tok_OR tok_NOT INTERSECT
 593 %token          ARG_SEP ARRAY_COL_SEP ARRAY_ROW_SEP SHEET_SEP INVALID_TOKEN
 594 %type  <sheet>  sheetref
 595 %type  <wb>     workbookref
 596 %token <wb>     tok_WORKBOOKREF
 597
 598 %left '<' '>' '=' tok_GTE tok_LTE tok_NE
 599 %left '&'
 600 %left '-' '+'
 601 %left '*' '/'
 602 %right tok_RIGHT_EXP
 603 %left  tok_LEFT_EXP
 604 %nonassoc '%'
 605 %nonassoc tok_NEG tok_PLUS tok_NOT
 606 %left tok_AND tok_OR
 607 %left ','
 608 %left RANGE_INTERSECT
 609 %left RANGE_SEP
 610
 611 %%
 612 line:   opt_exp exp {
 613                 unregister_allocation ($2);
 614                 unregister_allocation ($1);
 615                 state->result = gnm_expr_list_prepend ($1, $2);
 616         }
 617
 618         | error         {
 619                 if (state->result != NULL) {
 620                         gnm_expr_list_unref (state->result);
 621                         state->result = NULL;
 622                 }
 623         }
 624         ;
 625
 626 opt_exp : opt_exp exp  ARG_SEP {
 627                unregister_allocation ($2);
 628                unregister_allocation ($1);
 629                $$ = gnm_expr_list_prepend ($1, $2);
 630                register_expr_list_allocation ($$);
 631         }
 632         | { $$ = NULL; register_expr_list_allocation ($$); }
 633         ;
 634
 635 exp:      CONSTANT      { $$ = $1; }
 636         | QUOTED_STRING { $$ = $1; }
 637         | STRING        {
 638                 $$ = parser_simple_val_or_name ($1);
 639                 if ($$ == NULL) { YYERROR; }
 640         }
 641         | cellref       { $$ = $1; }
 642         | exp '+' exp   { $$ = build_binop ($1, GNM_EXPR_OP_ADD,        $3); }
 643         | exp '-' exp   { $$ = build_binop ($1, GNM_EXPR_OP_SUB,        $3); }
 644         | exp '*' exp   { $$ = build_binop ($1, GNM_EXPR_OP_MULT,       $3); }
 645         | exp '/' exp   { $$ = build_binop ($1, GNM_EXPR_OP_DIV,        $3); }
 646         | exp tok_RIGHT_EXP exp { $$ = build_exp ($1, $3); }
 647         | exp tok_LEFT_EXP  exp { $$ = build_exp ($1, $3); }
 648         | exp '&' exp   { $$ = build_binop ($1, GNM_EXPR_OP_CAT,        $3); }
 649         | exp '=' exp   { $$ = build_binop ($1, GNM_EXPR_OP_EQUAL,      $3); }
 650         | exp '<' exp   { $$ = build_binop ($1, GNM_EXPR_OP_LT,         $3); }
 651         | exp '>' exp   { $$ = build_binop ($1, GNM_EXPR_OP_GT,         $3); }
 652         | exp tok_GTE exp       { $$ = build_binop ($1, GNM_EXPR_OP_GTE,        $3); }
 653         | exp tok_NE  exp       { $$ = build_binop ($1, GNM_EXPR_OP_NOT_EQUAL,  $3); }
 654         | exp tok_LTE exp       { $$ = build_binop ($1, GNM_EXPR_OP_LTE,        $3); }
 655         | exp tok_AND exp       { $$ = build_logical ($1, TRUE, $3); }
 656         | exp tok_OR  exp       { $$ = build_logical ($1, FALSE, $3); }
 657         | exp RANGE_INTERSECT exp {
 658                 $$ = build_intersect ($1, $3);
 659                 if ($$ == NULL) { YYERROR; }
 660         }
 661
 662         | '-' exp %prec tok_NEG {
 663                 GnmExpr *tmp = fold_negative_constant ($2);
 664                 $$ = tmp ? tmp : build_unary_op (GNM_EXPR_OP_UNARY_NEG, $2);
 665         }
 666         | '+' exp %prec tok_PLUS {
 667                 /* Don't fold here.  */
 668                 $$ = build_unary_op (GNM_EXPR_OP_UNARY_PLUS, $2);
 669         }
 670         | tok_NOT exp { $$ = build_not ($2); }
 671         | exp '%' { $$ = build_unary_op (GNM_EXPR_OP_PERCENTAGE, $1); }
 672
 673         | '(' arg_list ')' {
 674                 if ($2 == NULL) {
 675                         report_err (state, g_error_new (1, PERR_INVALID_EMPTY,
 676                                 _("() is an invalid expression")),
 677                                 state->ptr-2, 2);
 678                         YYERROR;
 679                 } else {
 680                         if ($2->next == NULL) {
 681                                 unregister_allocation ($2);
 682                                 $$ = register_expr_allocation (gnm_expr_new_unary (GNM_EXPR_OP_PAREN, $2->data));
 683                                 /* NOTE : free list not content */
 684                                 gnm_expr_list_free ($2);
 685                         } else {
 686                                 $$ = build_set ($2);
 687                                 if ($$ == NULL) { YYERROR; }
 688                         }
 689                 }
 690         }
 691         | '{' array_rows '}' {
 692                 unregister_allocation ($2);
 693                 $$ = build_array ($2);
 694                 free_expr_list_list ($2);
 695                 if ($$ == NULL) { YYERROR; }
 696         }
 697
 698         | function
 699         | sheetref STRING {
 700                 char const *name = value_peek_string ($2->constant.value);
 701                 GnmExpr const *ename = parser_simple_name (name, $1);
 702
 703                 if (ename) {
 704                         unregister_allocation ($2); gnm_expr_free ($2);
 705                         $$ = register_expr_allocation (ename);
 706                 } else {
 707                         YYERROR;
 708                 }
 709         }
 710         | workbookref STRING {
 711                 GnmNamedExpr *nexpr = NULL;
 712                 char const *name = value_peek_string ($2->constant.value);
 713                 GnmParsePos pos = *state->pos;
 714
 715                 pos.sheet = NULL;
 716                 pos.wb = $1;
 717                 nexpr = expr_name_lookup (&pos, name);
 718                 if (nexpr != NULL) {
 719                         unregister_allocation ($2); gnm_expr_free ($2);
 720                         $$ = register_expr_allocation (gnm_expr_new_name (nexpr, NULL, $1));
 721                 } else {
 722                         report_err (state, g_error_new (1, PERR_UNKNOWN_NAME,
 723                                 _("Name '%s' does not exist in workbook"),
 724                                                         name),
 725                                 state->ptr, strlen (name));
 726                         YYERROR;
 727                 }
 728         }
 729         ;
 730
 731 function : STRING '(' arg_list ')' {
 732                 char const *name = value_peek_string ($1->constant.value);
 733                 GnmExpr const *f_call = (*state->convs->input.func) (
 734                         state->convs, state->pos->wb, name, $3);
 735
 736                 $$ = NULL;
 737                 if (f_call) {
 738                         /* We're done with the function name.  */
 739                         unregister_allocation ($1); gnm_expr_free ($1);
 740                         unregister_allocation ($3);
 741                         $$ = register_expr_allocation (f_call);
 742                 } else {
 743                         YYERROR;
 744                 }
 745         }
 746         ;
 747
 748 string_opt_quote : STRING
 749                  | QUOTED_STRING
 750                  ;
 751
 752 opt_sheet_sep    : SHEET_SEP
 753                  | ;
 754
 755 /* only used for names */
 756 workbookref : tok_WORKBOOKREF opt_sheet_sep
 757             | '[' string_opt_quote ']'  {
 758                 char const *wb_name = value_peek_string ($2->constant.value);
 759                 Workbook *ref_wb = state->pos
 760                         ? (state->pos->wb
 761                            ? state->pos->wb
 762                            : (state->pos->sheet
 763                               ? state->pos->sheet->workbook
 764                               : NULL))
 765                         : NULL;
 766                 Workbook *wb =
 767                         state->convs->input.external_wb (state->convs,
 768                                                          ref_wb,
 769                                                          wb_name);
 770
 771                 if (wb != NULL) {
 772                         unregister_allocation ($2); gnm_expr_free ($2);
 773                         $$ = wb;
 774                 } else {
 775                         /* kludge to produce better error messages
 776                          * we know that the last token read will be the ']'
 777                          * so subtract 1.
 778                          */
 779                         report_err (state, g_error_new (1, PERR_UNKNOWN_WORKBOOK,
 780                                 _("Unknown workbook '%s'"), wb_name),
 781                                 state->ptr - 1, strlen (wb_name));
 782                         YYERROR;
 783                 }
 784         }
 785         | '[' ']' {
 786                 /* Special syntax for global names shadowed by sheet names.  */
 787                 Workbook *wb = state->pos
 788                         ? (state->pos->wb
 789                            ? state->pos->wb
 790                            : (state->pos->sheet
 791                               ? state->pos->sheet->workbook
 792                               : NULL))
 793                         : NULL;
 794                 $$ = wb;
 795                 if (wb == NULL) {
 796                         report_err (state, g_error_new (1, PERR_UNKNOWN_WORKBOOK,
 797                                 _("Unknown workbook")),
 798                                 state->ptr - 1, 1);
 799                         YYERROR;
 800                 }
 801         }
 802         ;
 803
 804 /* does not need to handle 3d case.  this is only used for names.
 805  * 3d cell references are handled in the lexer
 806  */
 807 sheetref: string_opt_quote SHEET_SEP {
 808                 Sheet *sheet = parser_sheet_by_name (state->pos->wb, $1);
 809                 if (sheet != NULL) {
 810                         unregister_allocation ($1); gnm_expr_free ($1);
 811                         $$ = sheet;
 812                 } else {
 813                         YYERROR;
 814                 }
 815         }
 816         | workbookref string_opt_quote SHEET_SEP {
 817                 Workbook *wb = $1;
 818                 Sheet *sheet = parser_sheet_by_name (wb, $2);
 819                 if (sheet != NULL) {
 820                         unregister_allocation ($2); gnm_expr_free ($2);
 821                         $$ = sheet;
 822                 } else {
 823                         YYERROR;
 824                 }
 825         }
 826         ;
 827
 828 cellref:  RANGEREF { $$ = $1; }
 829         | function RANGE_SEP function {
 830                 $$ = build_range_ctor ($1, $3, NULL);
 831                 if ($$ == NULL) { YYERROR; }
 832         }
 833         | RANGEREF RANGE_SEP function {
 834                 $$ = build_range_ctor ($1, $3, $1);
 835                 if ($$ == NULL) { YYERROR; }
 836         }
 837         | function RANGE_SEP RANGEREF {
 838                 $$ = build_range_ctor ($1, $3, $3);
 839                 if ($$ == NULL) { YYERROR; }
 840         }
 841         | RANGEREF RANGE_SEP RANGEREF {
 842                 $$ = build_range_ctor ($1, $3, NULL);
 843                 if ($$ == NULL) { YYERROR; }
 844         }
 845         ;
 846
 847 arg_list: exp {
 848                 unregister_allocation ($1);
 849                 $$ = gnm_expr_list_prepend (NULL, $1);
 850                 register_expr_list_allocation ($$);
 851         }
 852         | exp ARG_SEP arg_list {
 853                 GSList *tmp = $3;
 854                 unregister_allocation ($3);
 855                 unregister_allocation ($1);
 856
 857                 if (tmp == NULL)
 858                         tmp = gnm_expr_list_prepend (NULL, gnm_expr_new_constant (value_new_empty ()));
 859
 860                 $$ = gnm_expr_list_prepend (tmp, $1);
 861                 register_expr_list_allocation ($$);
 862         }
 863         | ARG_SEP arg_list {
 864                 GSList *tmp = $2;
 865                 unregister_allocation ($2);
 866
 867                 if (tmp == NULL)
 868                         tmp = gnm_expr_list_prepend (NULL, gnm_expr_new_constant (value_new_empty ()));
 869
 870                 $$ = gnm_expr_list_prepend (tmp, gnm_expr_new_constant (value_new_empty ()));
 871                 register_expr_list_allocation ($$);
 872         }
 873         | { $$ = NULL; }
 874         ;
 875
 876 array_exp:     CONSTANT         { $$ = $1; }
 877          | '-' CONSTANT         {
 878                 GnmExpr *tmp = fold_negative_constant ($2);
 879                 if (!tmp) { YYERROR; }
 880                 $$ = tmp;
 881          }
 882          | '+' CONSTANT         {
 883                 GnmExpr *tmp = fold_positive_constant ($2);
 884                 if (!tmp) { YYERROR; }
 885                 $$ = tmp;
 886          }
 887          | string_opt_quote     { $$ = parse_string_as_value ($1); }
 888          ;
 889
 890
 891 array_row : { $$ = NULL; }
 892         | array_exp {
 893                 unregister_allocation ($1);
 894                 $$ = g_slist_prepend (NULL, $1);
 895                 register_expr_list_allocation ($$);
 896         }
 897         | array_exp ARRAY_COL_SEP array_row {
 898                 unregister_allocation ($3);
 899                 unregister_allocation ($1);
 900                 $$ = g_slist_prepend ($3, $1);
 901                 register_expr_list_allocation ($$);
 902         }
 903         ;
 904
 905 array_rows: array_row {
 906                 unregister_allocation ($1);
 907                 $$ = g_slist_prepend (NULL, $1);
 908                 register_expr_list_list_allocation ($$);
 909         }
 910         | array_row ARRAY_ROW_SEP array_rows {
 911                 unregister_allocation ($3);
 912                 unregister_allocation ($1);
 913                 $$ = g_slist_prepend ($3, $1);
 914                 register_expr_list_list_allocation ($$);
 915         }
 916         ;
 917
 918 %%
 919
 920 static char const *
 921 find_matching_close (char const *str, char const **res)
 922 {
 923         while (*str) {
 924                 if (*str == '(') {
 925                         char const *tmp = str;
 926                         str = find_matching_close (str + 1, res);
 927                         if (*str != ')' && *res == NULL) {
 928                                 *res = tmp;
 929                                 return str;
 930                         }
 931                         if (*str == 0)
 932                                 return str;
 933                 } else if (*str == ')')
 934                         return str;
 935                 else if (*str == '\'' || *str == '\"') {
 936                         GString *dummy = g_string_new (NULL);
 937                         char const *end = go_strunescape (dummy, str);
 938                         g_string_free (dummy, TRUE);
 939                         if (end == NULL)
 940                                 return str + strlen (str);
 941                         str = end;
 942                         continue; /* skip incrementing str */
 943                 }
 944                 str = g_utf8_next_char (str);
 945         }
 946
 947         return str;
 948 }
 949
 950 static inline int
 951 eat_space (ParserState *state, int res)
 952 {
 953         /* help the user by ignoring pointless spaces after an
 954          * arg_sep.  We know they are going to be errors and
 955          * the spaces can not be operators in this context */
 956         while (*state->ptr == ' ')
 957                 state->ptr++;
 958         return res;
 959 }
 960
 961 /*
 962  * Do we want to ignore space before a given character?
 963  */
 964 static gboolean
 965 ignore_space_before (gunichar c)
 966 {
 967         switch (c) {
 968         case '*': case '/': case '+': case '-': case '%': case '^': case '&':
 969         case '>': case '<': case '=':
 970         case ')':
 971         case '#':
 972         case '"': case '\'':  /* Refers to opening quote only.  */
 973         case UNICODE_LOGICAL_NOT_C:
 974         case UNICODE_LOGICAL_AND_C:
 975         case UNICODE_LOGICAL_OR_C:
 976         case UNICODE_MINUS_SIGN_C:
 977         case UNICODE_DIVISION_SLASH_C:
 978         case UNICODE_NOT_EQUAL_TO_C:
 979         case UNICODE_LESS_THAN_OR_EQUAL_TO_C:
 980         case UNICODE_GREATER_THAN_OR_EQUAL_TO_C:
 981         case 0:
 982                 return TRUE;
 983         default:
 984                 return FALSE;
 985         }
 986 }
 987
 988 /*
 989  * Do we want to ignore space after a given character?
 990  */
 991 static gboolean
 992 ignore_space_after (gunichar c)
 993 {
 994         switch (c) {
 995         case '*': case '/': case '+': case '-': case '%': case '^': case '&':
 996         case '>': case '<': case '=':
 997         case '(':
 998         case '"': case '\'':  /* Refers to closing quote only [not actually hit].  */
 999         case UNICODE_LOGICAL_NOT_C:
1000         case UNICODE_LOGICAL_AND_C:
1001         case UNICODE_LOGICAL_OR_C:
1002         case UNICODE_MINUS_SIGN_C:
1003         case UNICODE_DIVISION_SLASH_C:
1004         case UNICODE_NOT_EQUAL_TO_C:
1005         case UNICODE_LESS_THAN_OR_EQUAL_TO_C:
1006         case UNICODE_GREATER_THAN_OR_EQUAL_TO_C:
1007         case 0:
1008                 return TRUE;
1009         default:
1010                 return FALSE;
1011         }
1012 }
1013
1014 static gboolean
1015 open_paren (const char *p)
1016 {
1017         while (g_unichar_isspace (g_utf8_get_char (p)))
1018                 p = g_utf8_next_char (p);
1019         return *p == '(';
1020 }
1021
1022 static int
1023 yylex (void)
1024 {
1025         gunichar c, tmp;
1026         char const *start, *end;
1027         GnmRangeRef ref;
1028         gboolean is_number = FALSE;
1029         gboolean is_space = FALSE;
1030         gboolean error_token = FALSE;
1031
1032         /*
1033          * Some special logic to handle space as intersection char.
1034          * Any number of white space characters are treated as one
1035          * intersecton.
1036          *
1037          * Also, if we are not using space for that, drop spaces.
1038          */
1039         while (g_unichar_isspace (g_utf8_get_char (state->ptr))) {
1040                 state->ptr = g_utf8_next_char (state->ptr);
1041                 is_space = TRUE;
1042         }
1043         if (is_space && state->convs->intersection_char == ' ' &&
1044             !ignore_space_before (g_utf8_get_char (state->ptr)))
1045                 return RANGE_INTERSECT;
1046
1047         start = state->ptr;
1048         c = g_utf8_get_char (start);
1049         if (c == 0)
1050                 return 0;
1051         state->ptr = g_utf8_next_char (state->ptr);
1052
1053         if (c == state->convs->intersection_char)
1054                 return RANGE_INTERSECT;
1055
1056         if (c == '&' && state->convs->decode_ampersands) {
1057                 if (!strncmp (state->ptr, "amp;", 4)) {
1058                         state->ptr += 4;
1059                         return '&';
1060                 }
1061
1062                 if (!strncmp (state->ptr, "lt;", 3)) {
1063                         state->ptr += 3;
1064                         if (*state->ptr == '='){
1065                                 state->ptr++;
1066                                 return tok_LTE;
1067                         }
1068                         if (!strncmp (state->ptr, "&gt;", 4)) {
1069                                 state->ptr += 4;
1070                                 return tok_NE;
1071                         }
1072                         return '<';
1073                 }
1074                 if (!strncmp (state->ptr, "gt;", 3)) {
1075                         state->ptr += 3;
1076                         if (*state->ptr == '='){
1077                                 state->ptr++;
1078                                 return tok_GTE;
1079                         }
1080                         return '>';
1081                 }
1082                 if (!strncmp (state->ptr, "apos;", 5) ||
1083                     !strncmp (state->ptr, "quot;", 5)) {
1084                         char const *quotes_end;
1085                         char const *p;
1086                         char *string, *s;
1087                         GnmValue *v;
1088
1089                         if (*state->ptr == 'q') {
1090                                 quotes_end = "&quot;";
1091                                 c = '\"';
1092                         } else {
1093                                 quotes_end = "&apos;";
1094                                 c = '\'';
1095                         }
1096
1097                         state->ptr += 5;
1098                         p = state->ptr;
1099                         double_quote_loop:
1100                                 state->ptr = strstr (state->ptr, quotes_end);
1101                         if (!*state->ptr) {
1102                                 report_err (state, g_error_new (1, PERR_MISSING_CLOSING_QUOTE,
1103                                                                 _("Could not find matching closing quote")),
1104                                             p, 1);
1105                                 return INVALID_TOKEN;
1106                         }
1107                         if (!strncmp (state->ptr + 6, quotes_end, 6)) {
1108                                 state->ptr += 2 * 6;
1109                                 goto double_quote_loop;
1110                         }
1111
1112                         s = string = g_malloc (1 + state->ptr - p);
1113                         while (p != state->ptr) {
1114                                 if (*p == '&') {
1115                                         if (!strncmp (p, "&amp;", 5)) {
1116                                                 p += 5;
1117                                                 *s++ = '&';
1118                                                 continue;
1119                                         } else if (!strncmp (p, "&lt;", 4)) {
1120                                                 p += 4;
1121                                                 *s++ = '<';
1122                                                 continue;
1123                                         } else if (!strncmp (p, "&gt;", 4)) {
1124                                                 p += 4;
1125                                                 *s++ = '>';
1126                                                 continue;
1127                                         } else if (!strncmp (p, quotes_end, 6)) {
1128                                                 p += 12; /* two in a row is the escape mechanism */
1129                                                 *s++ = c;
1130                                                 continue;
1131                                         } else if (!strncmp (p, "&quot;", 6)) {
1132                                                 p += 6;
1133                                                 *s++ = '\"';
1134                                                 continue;
1135                                         } else if (!strncmp (p, "&apos;", 6)) {
1136                                                 p += 6;
1137                                                 *s++ = '\'';
1138                                                 continue;
1139                                         }
1140                                 }
1141                                 *s++ = *p++;
1142                         }
1143
1144                         *s = 0;
1145                         state->ptr += 6;
1146
1147                         v = value_new_string_nocopy (string);
1148                         yylval.expr = register_expr_allocation (gnm_expr_new_constant (v));
1149                         return QUOTED_STRING;
1150                 }
1151         }
1152
1153         if (c == ':' && state->convs->range_sep_colon)
1154                 return eat_space (state, RANGE_SEP);
1155
1156         if (c == state->convs->sheet_name_sep)
1157                 return eat_space (state, SHEET_SEP);
1158
1159         if (c == '.' && *state->ptr == '.' && state->convs->range_sep_dotdot) {
1160                 state->ptr++;
1161                 return RANGE_SEP;
1162         }
1163
1164         if (c == '#' && state->convs->accept_hash_logicals) {
1165                 if (!strncmp (state->ptr, "NOT#", 4)) {
1166                         state->ptr += 4;
1167                         return eat_space (state, tok_NOT);
1168                 }
1169                 if (!strncmp (state->ptr, "AND#", 4)) {
1170                         state->ptr += 4;
1171                         return eat_space (state, tok_AND);
1172                 }
1173                 if (!strncmp (state->ptr, "OR#", 3)) {
1174                         state->ptr += 3;
1175                         return eat_space (state, tok_OR);
1176                 }
1177         }
1178
1179         if (c == state->arg_sep)
1180                 return eat_space (state, state->in_array ? state->in_array_sep_is : ARG_SEP);
1181         if ((c == state->union_char) && (state->union_char != 0))
1182                 return eat_space (state, ARG_SEP);
1183         if (c == state->array_col_sep)
1184                 return eat_space (state, ARRAY_COL_SEP);
1185         if (c == state->array_row_sep)
1186                 return eat_space (state, ARRAY_ROW_SEP);
1187
1188         end = state->convs->input.range_ref (&ref, start,
1189                                              state->pos, state->convs);
1190         /*
1191          * In order to parse "LOG10(1024)" in sheets with more than ~8500
1192          * columns we do not consider anything a rangeref if it is followed
1193          * by an opening parenthesis.
1194          */
1195         if (start != end && !open_paren (end)) {
1196                 state->ptr = end;
1197                 if (invalid_sheet == ref.a.sheet) {
1198                         yylval.expr = register_expr_allocation
1199                                 (gnm_expr_new_constant
1200                                  (value_new_error_REF (NULL)));
1201                         return CONSTANT;
1202                 }
1203                 if (state->flags & GNM_EXPR_PARSE_FORCE_ABSOLUTE_REFERENCES) {
1204                         if (ref.a.col_relative) {
1205                                 ref.a.col += state->pos->eval.col;
1206                                 ref.a.col_relative = FALSE;
1207                         }
1208                         if (ref.b.col_relative) {
1209                                 ref.b.col += state->pos->eval.col;
1210                                 ref.b.col_relative = FALSE;
1211                         }
1212                         if (ref.a.row_relative) {
1213                                 ref.a.row += state->pos->eval.row;
1214                                 ref.a.row_relative = FALSE;
1215                         }
1216                         if (ref.b.row_relative) {
1217                                 ref.b.row += state->pos->eval.row;
1218                                 ref.b.row_relative = FALSE;
1219                         }
1220                 } else if (state->flags & GNM_EXPR_PARSE_FORCE_RELATIVE_REFERENCES) {
1221                         if (!ref.a.col_relative) {
1222                                 ref.a.col -= state->pos->eval.col;
1223                                 ref.a.col_relative = TRUE;
1224                         }
1225                         if (!ref.b.col_relative) {
1226                                 ref.b.col -= state->pos->eval.col;
1227                                 ref.b.col_relative = TRUE;
1228                         }
1229                         if (!ref.a.row_relative) {
1230                                 ref.a.row -= state->pos->eval.row;
1231                                 ref.a.row_relative = TRUE;
1232                         }
1233                         if (!ref.b.row_relative) {
1234                                 ref.b.row -= state->pos->eval.row;
1235                                 ref.b.row_relative = TRUE;
1236                         }
1237                 }
1238
1239                 if (ref.a.sheet == NULL && (state->flags & GNM_EXPR_PARSE_FORCE_EXPLICIT_SHEET_REFERENCES)) {
1240                         ref.a.sheet = state->pos->sheet;
1241                         if (ref.a.sheet == NULL) {
1242                                 report_err (state, g_error_new (1, PERR_SHEET_IS_REQUIRED,
1243                                         _("Sheet name is required")),
1244                                         state->ptr, 0);
1245                                 return INVALID_TOKEN;
1246                         }
1247                 }
1248
1249                 if ((ref.b.sheet == NULL || ref.b.sheet == ref.a.sheet) &&
1250                     ref.a.col           == ref.b.col &&
1251                     ref.a.col_relative  == ref.b.col_relative &&
1252                     ref.a.row           == ref.b.row &&
1253                     ref.a.row_relative  == ref.b.row_relative) {
1254                         yylval.expr = register_expr_allocation (gnm_expr_new_cellref (&ref.a));
1255                         return RANGEREF;
1256                 }
1257                 yylval.expr = register_expr_allocation (gnm_expr_new_constant (
1258                          value_new_cellrange_unsafe (&ref.a, &ref.b)));
1259                 return RANGEREF;
1260         }
1261
1262         /* Do NOT handle negative numbers here.  That has to be done in the
1263          * parser otherwise we mishandle A1-1 when it looks like
1264          * rangeref CONSTANT  */
1265         if (c == state->decimal_point) {
1266                 /* Could be a number or a stand alone  */
1267                 if (!g_unichar_isdigit (g_utf8_get_char (state->ptr)))
1268                         return c;
1269                 is_number = TRUE;
1270         }  else if (g_unichar_isdigit (c)) {
1271                 /* find the end of the first portion of the number */
1272                 do {
1273                         c = g_utf8_get_char (state->ptr);
1274                         state->ptr = g_utf8_next_char (state->ptr);
1275                 } while (g_unichar_isdigit (c));
1276                 is_number = TRUE;
1277                 if (c == 0)
1278                         state->ptr--;
1279         }
1280
1281         if (is_number) {
1282                 GnmValue *v = NULL;
1283
1284                 if (c == state->decimal_point || c == 'e' || c == 'E') {
1285                         /* This is a floating point number */
1286                         char *end;
1287                         gnm_float d;
1288
1289                         errno = 0;
1290                         d = gnm_utf8_strto (start, &end);
1291                         if (start == end) {
1292                                 g_warning ("%s is not a double, but was expected to be one", start);
1293                         }  else if (errno != ERANGE) {
1294                                 v = value_new_float (d);
1295                                 state->ptr = end;
1296                         } else if (c != 'e' && c != 'E') {
1297                                 report_err (state, g_error_new (1, PERR_OUT_OF_RANGE,
1298                                         _("The number is out of range")),
1299                                         state->ptr, end - start);
1300                                 return INVALID_TOKEN;
1301                         } else {
1302                                 /* For an exponent it's hard to highlight the
1303                                  * right region w/o it turning into an ugly
1304                                  * hack, for now the cursor is put at the end.
1305                                  */
1306                                 report_err (state, g_error_new (1, PERR_OUT_OF_RANGE,
1307                                         _("The number is out of range")),
1308                                         state->ptr, 0);
1309                                 return INVALID_TOKEN;
1310                         }
1311                 } else {
1312                         char *end;
1313                         long l;
1314
1315                         l = gnm_utf8_strtol (start, &end);
1316                         if (start == end) {
1317                                 g_warning ("%s is not an integer, but was expected to be one", start);
1318                         } else if (errno != ERANGE && l >= INT_MIN && l <= INT_MAX) {
1319                                 v = value_new_int (l);
1320                                 state->ptr = end;
1321                         } else {
1322                                 gnm_float d;
1323
1324                                 errno = 0;
1325                                 d = gnm_utf8_strto (start, &end);
1326                                 if (errno != ERANGE) {
1327                                         v = value_new_float (d);
1328                                         state->ptr = end;
1329                                 } else {
1330                                         report_err (state, g_error_new (1, PERR_OUT_OF_RANGE,
1331                                                 _("The number is out of range")),
1332                                                 state->ptr, end - start);
1333                                         return INVALID_TOKEN;
1334                                 }
1335                         }
1336                 }
1337
1338                 /* Very odd string,  Could be a bound problem.  Trigger an error */
1339                 if (v == NULL)
1340                         return c;
1341
1342                 yylval.expr = register_expr_allocation (gnm_expr_new_constant (v));
1343                 return CONSTANT;
1344         }
1345
1346         switch (c) {
1347         case '#':
1348                 if (state->ptr[0] != '"') {
1349                         while ((tmp = g_utf8_get_char (state->ptr)) != 0 &&
1350                                !g_unichar_isspace (tmp)) {
1351                                 state->ptr = g_utf8_next_char (state->ptr);
1352                                 if (tmp == '!' || tmp == '?' ||
1353                                 ((state->ptr - start) == 4 && 0 == strncmp (start, "#N/A", 4))) {
1354                                         GOString *name = go_string_new_nocopy (g_strndup (start, state->ptr - start));
1355                                         yylval.expr = register_expr_allocation
1356                                                 (gnm_expr_new_constant (
1357                                                         value_new_error_str (NULL, name)));
1358                                         go_string_unref (name);
1359                                         return CONSTANT;
1360                                 }
1361                         }
1362
1363                         report_err (state, g_error_new
1364                                     (1, PERR_UNEXPECTED_TOKEN,
1365                                      _("Improperly formatted error token")),
1366                                     state->ptr, state->ptr - start);
1367
1368                         return INVALID_TOKEN;
1369                 }
1370                 error_token = TRUE;
1371                 start++;
1372                 /* Fall through */
1373         case '\'':
1374         case '"': {
1375                 GString *s = g_string_new (NULL);
1376                 char const *end = state->convs->input.string (start, s, state->convs);
1377
1378                 if (end == NULL) {
1379                         size_t len = strlen (start);
1380                         g_string_free (s, TRUE);
1381                         report_err (state,
1382                                     g_error_new (1, PERR_MISSING_CLOSING_QUOTE,
1383                                                  _("Could not find matching closing quote")),
1384                                     start + len, len);
1385                         return INVALID_TOKEN;
1386                 }
1387
1388                 state->ptr = (char *)end;
1389
1390                 if (error_token) {
1391                         GnmValue *v = value_new_error (NULL, s->str);
1392                         yylval.expr = register_expr_allocation (gnm_expr_new_constant (v));
1393                         g_string_free (s, TRUE);
1394                         return eat_space (state, CONSTANT);
1395                 } else {
1396                         GnmValue *v = value_new_string_nocopy (g_string_free (s, FALSE));
1397                         yylval.expr = register_expr_allocation (gnm_expr_new_constant (v));
1398                         return eat_space (state, QUOTED_STRING);
1399                 }
1400         }
1401
1402         case '[': {
1403                 const char *p = state->ptr;
1404                 GString *s = g_string_new (NULL);
1405                 Workbook *ref_wb = state->pos
1406                         ? (state->pos->wb
1407                            ? state->pos->wb
1408                            : (state->pos->sheet
1409                               ? state->pos->sheet->workbook
1410                               : NULL))
1411                         : NULL;
1412
1413                 while (g_unichar_isspace (g_utf8_get_char (p)))
1414                         p = g_utf8_next_char (p);
1415
1416                 if (p[0] == '"' || p[0] == '\'') {
1417                         p = go_strunescape (s, p);
1418                 } else {
1419                         gunichar uc;
1420                         while (1) {
1421                                 uc = g_utf8_get_char (p);
1422                                 if (!uc || uc == ']' || g_unichar_isspace (uc))
1423                                         break;
1424                                 p = g_utf8_next_char (p);
1425                                 g_string_append_unichar (s, uc);
1426                         }
1427                 }
1428
1429                 while (p && g_unichar_isspace (g_utf8_get_char (p)))
1430                         p = g_utf8_next_char (p);
1431
1432                 if (s->len == 0 || !p || p[0] != ']') {
1433                         g_string_free (s, TRUE);
1434                         break;
1435                 }
1436
1437                 yylval.wb = state->convs->input.external_wb (state->convs,
1438                                                              ref_wb,
1439                                                              s->str);
1440                 g_string_free (s, TRUE);
1441                 if (!yylval.wb)
1442                         break;
1443
1444                 state->ptr = p + 1;
1445                 return tok_WORKBOOKREF;
1446         }
1447         }
1448
1449         if ((end = state->convs->input.name (start, state->convs))) {
1450                 state->ptr = end;
1451                 yylval.expr = register_expr_allocation (gnm_expr_new_constant (
1452                         value_new_string_nocopy (g_strndup (start, state->ptr - start))));
1453                 return STRING;
1454         }
1455
1456         switch (c) {
1457         case '<':
1458                 if (*state->ptr == '='){
1459                         state->ptr++;
1460                         return eat_space (state, tok_LTE);
1461                 }
1462                 if (*state->ptr == '>'){
1463                         state->ptr++;
1464                         return eat_space (state, tok_NE);
1465                 }
1466                 return eat_space (state, c);
1467
1468         case '>':
1469                 if (*state->ptr == '='){
1470                         state->ptr++;
1471                         return eat_space (state, tok_GTE);
1472                 }
1473                 return eat_space (state, c);
1474
1475         case '\n': return 0;
1476
1477         case '{':
1478                 state->in_array++;
1479                 return c;
1480         case '}':
1481                 state->in_array--;
1482                 return c;
1483
1484         case '^':
1485                 return state->convs->exp_is_left_associative
1486                         ? tok_LEFT_EXP
1487                         : tok_RIGHT_EXP;
1488
1489         case UNICODE_LOGICAL_NOT_C: return tok_NOT;
1490         case UNICODE_MINUS_SIGN_C: return '-';
1491         case UNICODE_DIVISION_SLASH_C: return '/';
1492         case UNICODE_LOGICAL_AND_C: return tok_AND;
1493         case UNICODE_LOGICAL_OR_C: return tok_OR;
1494         case UNICODE_NOT_EQUAL_TO_C: return eat_space (state, tok_NE);
1495         case UNICODE_LESS_THAN_OR_EQUAL_TO_C: return eat_space (state, tok_LTE);
1496         case UNICODE_GREATER_THAN_OR_EQUAL_TO_C: return eat_space (state, tok_GTE);
1497         }
1498
1499         if (ignore_space_after (c))
1500                 return eat_space (state, c);
1501         else
1502                 return c;
1503 }
1504
1505 int
1506 yyerror (char const *s)
1507 {
1508 #if 0
1509         g_printerr ("Error: %s\n", s);
1510 #endif
1511         return 0;
1512 }
1513
1514 static void
1515 setup_state (ParserState *pstate, const char *str,
1516              GnmParsePos const *pp,
1517              GnmExprParseFlags flags,
1518              GnmConventions const *convs,
1519              GnmParseError *error)
1520 {
1521         pstate->start = pstate->ptr = str;
1522         pstate->pos   = pp;
1523
1524         pstate->flags           = flags;
1525         pstate->convs                                    =
1526                 (NULL != convs) ? convs : ((NULL != pp->sheet) ? pp->sheet->convs : gnm_conventions_default);
1527
1528
1529         pstate->decimal_point = pstate->convs->decimal_sep_dot
1530                 ? '.'
1531                 : g_utf8_get_char (go_locale_get_decimal ()->str); /* FIXME: one char handled.  */
1532
1533         if (pstate->convs->arg_sep != 0)
1534                 pstate->arg_sep = pstate->convs->arg_sep;
1535         else
1536                 pstate->arg_sep = go_locale_get_arg_sep ();
1537         pstate->union_char = pstate->convs->union_char;
1538         if (pstate->convs->array_col_sep != 0)
1539                 pstate->array_col_sep = pstate->convs->array_col_sep;
1540         else
1541                 pstate->array_col_sep = go_locale_get_col_sep ();
1542         if (pstate->convs->array_row_sep != 0)
1543                 pstate->array_row_sep = pstate->convs->array_row_sep;
1544         else
1545                 pstate->array_row_sep = go_locale_get_row_sep ();
1546
1547         /* Some locales/conventions have ARG_SEP == ARRAY_ROW_SEP
1548          *      eg {1\2\3;4\5\6} for XL style with ',' as a decimal
1549          * some have ARG_SEP == ARRAY_COL_SEPARATOR
1550          *      eg {1,2,3;4,5,6} for XL style with '.' as a decimal
1551          *      or {1;2;3|4;5;6} for OOo/
1552          * keep track of whether we are in an array to allow the lexer to
1553          * dis-ambiguate. */
1554         if (pstate->arg_sep == pstate->array_col_sep)
1555                 pstate->in_array_sep_is = ARRAY_COL_SEP;
1556         else if (pstate->arg_sep == pstate->array_row_sep)
1557                 pstate->in_array_sep_is = ARRAY_ROW_SEP;
1558         else
1559                 pstate->in_array_sep_is = ARG_SEP;
1560         pstate->in_array = 0;
1561
1562         pstate->result = NULL;
1563         pstate->error = error;
1564
1565         state = pstate;
1566 }
1567
1568 /**
1569  * gnm_expr_parse_str:
1570  *
1571  * @str   : The string to parse.
1572  * @pp    : #GnmParsePos
1573  * @flags : See parse-utils for descriptions
1574  * @convs : optionally NULL #GnmConventions
1575  * @error : optionally NULL ptr to store details of error.
1576  *
1577  * Parse a string. if @error is non-null it will be assumed that the
1578  * caller has passed a pointer to a GnmParseError struct AND that it will
1579  * take responsibility for freeing that struct and its contents.
1580  * with parse_error_free.
1581  * If @convs is NULL use the conventions from @pp.
1582  **/
1583 GnmExprTop const *
1584 gnm_expr_parse_str (char const *str, GnmParsePos const *pp,
1585                     GnmExprParseFlags flags,
1586                     GnmConventions const *convs,
1587                     GnmParseError *error)
1588 {
1589         GnmExpr const *expr;
1590         ParserState pstate;
1591
1592         g_return_val_if_fail (str != NULL, NULL);
1593         g_return_val_if_fail (pp != NULL, NULL);
1594         g_return_val_if_fail (state == NULL, NULL);
1595
1596         if (deallocate_stack == NULL)
1597                 deallocate_init ();
1598
1599         setup_state (&pstate, str, pp, flags, convs, error);
1600         yyparse ();
1601         state = NULL;
1602
1603         if (pstate.result != NULL) {
1604                 deallocate_assert_empty ();
1605
1606 #if 0
1607                 /* If this happens, something is very wrong */
1608                 if (pstate.error != NULL && pstate.error->message != NULL) {
1609                         g_warning ("An error occurred and the GnmExpr is non-null! This should not happen");
1610                         g_warning ("Error message is %s (%d, %d)", pstate.error->message, pstate.error->begin_char,
1611                                         pstate.error->end_char);
1612                 }
1613 #endif
1614
1615                 /* Do we have multiple expressions */
1616                 if (pstate.result->next != NULL) {
1617                         if (flags & GNM_EXPR_PARSE_PERMIT_MULTIPLE_EXPRESSIONS)
1618                                 expr = gnm_expr_new_set (g_slist_reverse (pstate.result));
1619                         else {
1620                                 gnm_expr_list_unref (pstate.result);
1621                                 report_err (&pstate, g_error_new (1, PERR_MULTIPLE_EXPRESSIONS,
1622                                         _("Multiple expressions are not supported in this context")),
1623                                         pstate.start,
1624                                         (pstate.ptr - pstate.start));
1625                                 expr = NULL;
1626                         }
1627                 } else {
1628                         /* Free the list, do not unref the content */
1629                         expr = pstate.result->data;
1630                         gnm_expr_list_free (pstate.result);
1631                 }
1632         } else {
1633                 /* If there is no error message, attempt to be more detailed */
1634                 if (pstate.error != NULL &&
1635                     (pstate.error->err == NULL || pstate.error->err->message == NULL)) {
1636                         char const *last_token = pstate.ptr;
1637
1638                         if (*last_token == '\0') {
1639                                 char const *str = pstate.start;
1640                                 char const *res = NULL;
1641                                 char const *last = find_matching_close (str, &res);
1642
1643                                 if (*last)
1644                                         report_err (&pstate, g_error_new (1, PERR_MISSING_PAREN_OPEN,
1645                                                 _("Could not find matching opening parenthesis")),
1646                                                 last, 1);
1647                                 else if (res != NULL)
1648                                         report_err (&pstate, g_error_new (1, PERR_MISSING_PAREN_CLOSE,
1649                                                 _("Could not find matching closing parenthesis")),
1650                                                 res, 1);
1651                                 else
1652                                         report_err (&pstate, g_error_new (1, PERR_INVALID_EXPRESSION,
1653                                                 _("Invalid expression")),
1654                                                 pstate.ptr, pstate.ptr - pstate.start);
1655                         } else
1656                                 report_err (&pstate, g_error_new (1, PERR_UNEXPECTED_TOKEN,
1657                                         _("Unexpected token %c"), *last_token),
1658                                         last_token, 1);
1659                 }
1660
1661                 deallocate_all ();
1662
1663                 expr = NULL;
1664         }
1665
1666         deallocate_uninit ();
1667
1668         return gnm_expr_top_new (expr);
1669 }
1670
1671 GnmLexerItem *
1672 gnm_expr_lex_all (char const *str, GnmParsePos const *pp,
1673                   GnmExprParseFlags flags,
1674                   GnmConventions const *convs)
1675 {
1676         GnmLexerItem *res = NULL;
1677         int n = 0, alloc = 0;
1678         ParserState pstate;
1679         GnmParseError *error = NULL;
1680
1681         g_return_val_if_fail (str != NULL, NULL);
1682         g_return_val_if_fail (pp != NULL, NULL);
1683
1684         if (deallocate_stack == NULL)
1685                 deallocate_init ();
1686
1687         setup_state (&pstate, str, pp, flags, convs, error);
1688
1689         while (1) {
1690                 int len;
1691
1692                 if (alloc <= n) {
1693                         alloc = alloc * 2 + 20;
1694                         res = g_renew (GnmLexerItem, res, alloc);
1695                 }
1696
1697                 res[n].start = pstate.ptr - pstate.start;
1698                 res[n].token = yylex ();
1699                 res[n].end = pstate.ptr - pstate.start;
1700
1701                 if (res[n].token == 0)
1702                         break;
1703
1704                 len = res[n].end - res[n].start;
1705                 /* Kill spaces that got eaten, but not a space operator */
1706                 while (len > 1 && str[res[n].start] == ' ') {
1707                         res[n].start++;
1708                         len--;
1709                 }
1710                 while (len > 1 && str[res[n].end - 1] == ' ') {
1711                         res[n].end--;
1712                         len--;
1713                 }
1714
1715                 n++;
1716         }
1717
1718         deallocate_all ();
1719
1720         state = NULL;
1721
1722         return res;
1723 }