libanjuta/anjuta-token-stream.c

   1 /* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4 -*- */
   2 /*
   3  * anjuta-token-stream.c
   4  * Copyright (C) Sébastien Granjoux 2009 <seb.sfo@free.fr>
   5  *
   6  * This program is free software: you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License as published by the
   8  * Free Software Foundation, either version 3 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14  * See the GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License along
  17  * with this program.  If not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "anjuta-token-stream.h"
  21
  22 #include "anjuta-debug.h"
  23
  24 #include <glib-object.h>
  25
  26 #include <stdio.h>
  27 #include <string.h>
  28
  29 /**
  30  * SECTION:anjuta-token-stream
  31  * @title: Anjuta token stream
  32  * @short_description: Anjuta token stream
  33  * @see_also:
  34  * @stability: Unstable
  35  * @include: libanjuta/anjuta-token-stream.h
  36  *
  37  * A #AnjutaTokenStream object reads and writes a list of tokens. It uses two
  38  * list. The first list is assigned when the object is created. Each token is
  39  * read as characters discarding the separation between tokens. The second list
  40  * is written using the data of the first list, so no new memory is allocated,
  41  * in order to recreate a new list of tokens.
  42  *
  43  * This is used when the lexer needs several passes. At the beginning the file
  44  * is read as a single token containing the whole file content. The first pass
  45  * split this content into tokens. Additional passes are done on some parts of
  46  * the token list to get a more precise splitting.
  47  *
  48  * It is important to not allocate new memory and keep the same character
  49  * pointers in the additional passes because the token list does not own the
  50  * memory. The address of each character is used to find the position of the
  51  * changed data in the file.
  52  *
  53  * Several objects can be linked together to create a stack. It is used for
  54  * included file or variable expansion.
  55  */
  56
  57 /* Types declarations
  58  *---------------------------------------------------------------------------*/
  59
  60 struct _AnjutaTokenStream
  61 {
  62         /* Input stream */
  63         AnjutaToken *first;
  64         AnjutaToken *last;
  65
  66         /* Read position in input stream */
  67         AnjutaToken *token;
  68         gsize pos;
  69
  70         /* Write position in input stream */
  71         AnjutaToken *start;
  72         gsize begin;
  73
  74         /* Output stream */
  75         AnjutaToken *root;
  76
  77         /* Parent stream */
  78         AnjutaTokenStream *parent;
  79
  80         /* Current directory */
  81         GFile *current_directory;
  82
  83         /* Current file */
  84         GFile *current_file;
  85 };
  86
  87 /* Helpers functions
  88  *---------------------------------------------------------------------------*/
  89
  90 /* Private functions
  91  *---------------------------------------------------------------------------*/
  92
  93 /* Public functions
  94  *---------------------------------------------------------------------------*/
  95
  96 /**
  97  * anjuta_token_stream_append_token:
  98  * @stream: a #AnjutaTokenStream object.
  99  * @token: a #AnjutaToken object.
 100  *
 101  * Append an already existing token in the output stream.
 102  */
 103 void
 104 anjuta_token_stream_append_token (AnjutaTokenStream *stream, AnjutaToken *token)
 105 {
 106         anjuta_token_append_child (stream->root, token);
 107 }
 108
 109 /**
 110  * anjuta_token_stream_tokenize:
 111  * @stream: a #AnjutaTokenStream object.
 112  * @type: a token type.
 113  * @length: the token length in character.
 114  *
 115  * Create a token of type from the last length characters previously read and
 116  * append it in the output stream. The characters are not copied in the output
 117  * stream, the new token uses the same characters.
 118  *
 119  * Return value: The created token.
 120  */
 121 AnjutaToken*
 122 anjuta_token_stream_tokenize (AnjutaTokenStream *stream, gint type, gsize length)
 123 {
 124     AnjutaToken *frag;
 125     AnjutaToken *end;
 126
 127     frag = anjuta_token_new_static (type, NULL);
 128
 129     for (end = stream->start; end != NULL;)
 130     {
 131         if ((anjuta_token_get_type (end) < ANJUTA_TOKEN_PARSED) || (anjuta_token_get_length (end) == 0))
 132         {
 133             gint toklen = anjuta_token_get_length (end);
 134             AnjutaToken *copy = anjuta_token_cut (end, stream->begin, length);
 135
 136             if (toklen >= (length + stream->begin))
 137             {
 138
 139                 if (end == stream->start)
 140                 {
 141                     /* Get whole token */
 142                     anjuta_token_free (frag);
 143                     anjuta_token_set_type (copy, type);
 144                     frag = copy;
 145                 }
 146                 else
 147                 {
 148                     /* Get several token */
 149                     anjuta_token_insert_after (frag, copy);
 150                     anjuta_token_merge (frag, copy);
 151                 }
 152
 153                 if (toklen == (length + stream->begin))
 154                 {
 155                     stream->start = anjuta_token_next (end);
 156                     stream->begin = 0;
 157                 }
 158                 else
 159                 {
 160                     stream->start = end;
 161                     stream->begin += length;
 162                 }
 163                 break;
 164             }
 165             else
 166             {
 167                 anjuta_token_insert_after (frag, copy);
 168                 anjuta_token_merge (frag, copy);
 169                 length -= toklen - stream->begin;
 170                 end = anjuta_token_next (end);
 171                 stream->begin = 0;
 172             }
 173         }
 174         else
 175         {
 176             end = anjuta_token_next (end);
 177             stream->begin = 0;
 178         }
 179     }
 180
 181     anjuta_token_stream_append_token (stream, frag);
 182
 183     return frag;
 184 }
 185
 186 /**
 187  * anjuta_token_stream_read:
 188  * @stream: a #AnjutaTokenStream object.
 189  * @buffer: a character buffer to fill with token data.
 190  * @max_size: the size of the buffer.
 191  *
 192  * Read token from the input stream and write the content as a C string in the
 193  * buffer passed as argument.
 194  *
 195  * Return value: The number of characters written in the buffer.
 196  */
 197 gint
 198 anjuta_token_stream_read (AnjutaTokenStream *stream, gchar *buffer, gsize max_size)
 199 {
 200     gint result = 0;
 201
 202     if (stream->token != NULL)
 203     {
 204         gsize length = anjuta_token_get_length (stream->token);
 205
 206         if ((anjuta_token_get_type (stream->token) >= ANJUTA_TOKEN_PARSED) || (stream->pos >= length))
 207         {
 208             for (;;)
 209             {
 210                 /* Last token */
 211                 if (stream->token== stream->last) return 0;
 212
 213                 if (anjuta_token_get_type (stream->token) >= ANJUTA_TOKEN_PARSED)
 214                 {
 215                     stream->token = anjuta_token_next (stream->token);
 216                 }
 217                 else
 218                 {
 219                     stream->token = anjuta_token_next (stream->token);
 220                 }
 221
 222                 if ((stream->token == NULL) || (anjuta_token_get_type (stream->token) == ANJUTA_TOKEN_EOV))
 223                 {
 224                     /* Last token */
 225                     return 0;
 226                 }
 227                 else if ((anjuta_token_get_length (stream->token) != 0) && (anjuta_token_get_type (stream->token) < ANJUTA_TOKEN_PARSED))
 228                 {
 229                     /* Find some data */
 230                     stream->pos = 0;
 231                     length = anjuta_token_get_length (stream->token);
 232                     break;
 233                 }
 234             }
 235         }
 236
 237         if (stream->pos < length)
 238         {
 239             const gchar *start = anjuta_token_get_string (stream->token);
 240
 241             length -= stream->pos;
 242
 243             if (length > max_size) length = max_size;
 244             memcpy (buffer, start + stream->pos, length);
 245             stream->pos += length;
 246             result = length;
 247         }
 248     }
 249
 250     return result;
 251 }
 252
 253 /**
 254  * anjuta_token_stream_get_root:
 255  * @stream: a #AnjutaTokenStream object.
 256  *
 257  * Return the root token for the output stream.
 258  *
 259  * Return value: The output root token.
 260  */
 261 AnjutaToken*
 262 anjuta_token_stream_get_root (AnjutaTokenStream *stream)
 263 {
 264         g_return_val_if_fail (stream != NULL, NULL);
 265
 266         return stream->root;
 267 }
 268
 269 /**
 270  * anjuta_token_stream_get_current_directory:
 271  * @stream: a #AnjutaTokenStream object.
 272  *
 273  * Return the current directory.
 274  *
 275  * Return value: The current directory.
 276  */
 277 GFile*
 278 anjuta_token_stream_get_current_directory (AnjutaTokenStream *stream)
 279 {
 280         g_return_val_if_fail (stream != NULL, NULL);
 281
 282         return stream->current_directory;
 283 }
 284
 285
 286 /**
 287  * anjuta_token_stream_get_current_file:
 288  * @stream: a #AnjutaTokenStream object.
 289  *
 290  * Return the current file.
 291  *
 292  * Return value: The current file.
 293  */
 294 GFile*
 295 anjuta_token_stream_get_current_file (AnjutaTokenStream *stream)
 296 {
 297         g_return_val_if_fail (stream != NULL, NULL);
 298
 299         return stream->current_file;
 300 }
 301
 302 /* Constructor & Destructor
 303  *---------------------------------------------------------------------------*/
 304
 305 /**
 306  * anjuta_token_stream_push:
 307  * @parent: a parent #AnjutaTokenStream object or NULL.
 308  * @root: a token or NULL
 309  * @content: a token list.
 310  *
 311  * Create a new stream from a list of tokens. If a parent stream is passed,
 312  * the new stream keep a link on it, so we can return it when the new stream
 313  * will be destroyed.
 314  *
 315  * Return value: The newly created stream.
 316  */
 317 AnjutaTokenStream *
 318 anjuta_token_stream_push (AnjutaTokenStream *parent, AnjutaToken *root, AnjutaToken *content, GFile *file)
 319 {
 320         AnjutaTokenStream *child;
 321
 322         child = g_new (AnjutaTokenStream, 1);
 323         child->first = content;
 324         child->pos = 0;
 325         child->begin = 0;
 326         child->parent = parent;
 327         child->token = content;
 328         child->start = child->token;
 329         child->last = content == NULL ? NULL : anjuta_token_last (content);
 330
 331         child->root = root == NULL ? anjuta_token_new_static (ANJUTA_TOKEN_FILE, NULL) : root;
 332         if (file == NULL)
 333         {
 334                 child->current_directory = parent == NULL ? NULL : (parent->current_directory == NULL ? NULL : g_object_ref (parent->current_directory));
 335                 child->current_file = NULL;
 336         }
 337         else
 338         {
 339                 child->current_directory = g_file_get_parent (file);
 340                 child->current_file = g_object_ref (file);
 341         }
 342
 343         return child;
 344 }
 345
 346 /**
 347  * anjuta_token_stream_pop:
 348  * @stream: a #AnjutaTokenStream object.
 349  *
 350  * Destroy the stream object and return the parent stream if it exists.
 351  *
 352  * Return value: The parent stream or NULL if there is no parent.
 353  */
 354 AnjutaTokenStream *
 355 anjuta_token_stream_pop (AnjutaTokenStream *stream)
 356 {
 357         AnjutaTokenStream *parent;
 358
 359         g_return_val_if_fail (stream != NULL, NULL);
 360
 361         if (stream->current_directory) g_object_unref (stream->current_directory);
 362         if (stream->current_file) g_object_unref (stream->current_file);
 363         parent = stream->parent;
 364         g_free (stream);
 365
 366         return parent;
 367 }
 368
 369 /**
 370  * anjuta_token_stream_get_parent:
 371  * @stream: a #AnjutaTokenStream object.
 372  *
 373  * Return the parent stream
 374  *
 375  * Return value: The parent stream or NULL if there is no parent.
 376  */
 377 AnjutaTokenStream *
 378 anjuta_token_stream_get_parent (AnjutaTokenStream *stream)
 379 {
 380         g_return_val_if_fail (stream != NULL, NULL);
 381
 382         return stream->parent;
 383 }