Merge pull request #3035 from techee/different_parsers
[geany-mirror.git] / src / tagmanager / tm_source_file.c
blob488acdfdeb19ba99d525bb6199e0a41f4291bebf
1 /*
3 * Copyright (c) 2001-2002, Biswapesh Chattopadhyay
4 * Copyright 2005 The Geany contributors
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 */
11 /**
12 * @file tm_source_file.h
13 The TMSourceFile structure and associated functions are used to maintain
14 tags for individual files.
18 #include <stdio.h>
19 #include <limits.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <ctype.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <glib/gstdio.h>
26 #ifdef G_OS_WIN32
27 # define VC_EXTRALEAN
28 # define WIN32_LEAN_AND_MEAN
29 # include <windows.h> /* for GetFullPathName */
30 #endif
32 #include "tm_source_file.h"
33 #include "tm_tag.h"
34 #include "tm_parser.h"
35 #include "tm_ctags.h"
37 typedef struct
39 TMSourceFile public;
40 guint refcount;
41 } TMSourceFilePriv;
44 typedef enum {
45 TM_FILE_FORMAT_TAGMANAGER,
46 TM_FILE_FORMAT_PIPE,
47 TM_FILE_FORMAT_CTAGS
48 } TMFileFormat;
50 /* Note: To preserve binary compatibility, it is very important
51 that you only *append* to this list ! */
52 enum
54 TA_NAME = 200,
55 TA_LINE,
56 TA_LOCAL,
57 TA_POS, /* Obsolete */
58 TA_TYPE,
59 TA_ARGLIST,
60 TA_SCOPE,
61 TA_VARTYPE,
62 TA_INHERITS,
63 TA_TIME,
64 TA_ACCESS,
65 TA_IMPL,
66 TA_LANG,
67 TA_INACTIVE, /* Obsolete */
68 TA_FLAGS
72 #define SOURCE_FILE_NEW(S) ((S) = g_slice_new(TMSourceFilePriv))
73 #define SOURCE_FILE_FREE(S) g_slice_free(TMSourceFilePriv, (TMSourceFilePriv *) S)
75 static int get_path_max(const char *path)
77 #ifdef PATH_MAX
78 return PATH_MAX;
79 #else
80 int path_max = pathconf(path, _PC_PATH_MAX);
81 if (path_max <= 0)
82 path_max = 4096;
83 return path_max;
84 #endif
88 #if defined(G_OS_WIN32) && !defined(HAVE_REALPATH)
89 /* realpath implementation for Windows found at http://bugzilla.gnome.org/show_bug.cgi?id=342926
90 * this one is better than e.g. liberty's lrealpath because this one uses Win32 API and works
91 * with special chars within the filename */
92 static char *realpath (const char *pathname, char *resolved_path)
94 int size;
96 if (resolved_path != NULL)
98 int path_max = get_path_max(pathname);
99 size = GetFullPathNameA (pathname, path_max, resolved_path, NULL);
100 if (size > path_max)
101 return NULL;
102 else
103 return resolved_path;
105 else
107 size = GetFullPathNameA (pathname, 0, NULL, NULL);
108 resolved_path = g_new0 (char, size);
109 GetFullPathNameA (pathname, size, resolved_path, NULL);
110 return resolved_path;
113 #endif
116 Given a file name, returns a newly allocated string containing the realpath()
117 of the file.
118 @param file_name The original file_name
119 @return A newly allocated string containing the real path to the file. NULL if none is available.
120 @deprecated since 1.32 (ABI 235)
121 @see utils_get_real_path()
123 GEANY_API_SYMBOL
124 gchar *tm_get_real_path(const gchar *file_name)
126 if (file_name)
128 gsize len = get_path_max(file_name) + 1;
129 gchar *path = g_malloc0(len);
131 if (realpath(file_name, path))
132 return path;
133 else
134 g_free(path);
136 return NULL;
139 gchar tm_source_file_get_tag_impl(const gchar *impl)
141 if ((0 == strcmp("virtual", impl))
142 || (0 == strcmp("pure virtual", impl)))
143 return TAG_IMPL_VIRTUAL;
145 #ifdef TM_DEBUG
146 g_warning("Unknown implementation %s", impl);
147 #endif
148 return TAG_IMPL_UNKNOWN;
151 gchar tm_source_file_get_tag_access(const gchar *access)
153 if (0 == strcmp("public", access))
154 return TAG_ACCESS_PUBLIC;
155 else if (0 == strcmp("protected", access))
156 return TAG_ACCESS_PROTECTED;
157 else if (0 == strcmp("private", access))
158 return TAG_ACCESS_PRIVATE;
159 else if (0 == strcmp("friend", access))
160 return TAG_ACCESS_FRIEND;
161 else if (0 == strcmp("default", access))
162 return TAG_ACCESS_DEFAULT;
164 #ifdef TM_DEBUG
165 g_warning("Unknown access type %s", access);
166 #endif
167 return TAG_ACCESS_UNKNOWN;
171 Initializes an already malloc()ed TMTag structure by reading a tag entry
172 line from a file. The structure should be allocated beforehand.
173 @param tag The TMTag structure to populate
174 @param file The TMSourceFile struct (assigned to the file member)
175 @param fp FILE pointer from where the tag line is read
176 @return TRUE on success, FALSE on FAILURE
178 static gboolean init_tag_from_file(TMTag *tag, TMSourceFile *file, FILE *fp, TMParserType lang)
180 guchar buf[BUFSIZ];
181 guchar *start, *end;
182 gboolean status;
183 guchar changed_char = TA_NAME;
185 tag->refcount = 1;
186 if ((NULL == fgets((gchar*)buf, BUFSIZ, fp)) || ('\0' == *buf))
187 return FALSE;
188 for (start = end = buf, status = TRUE; (TRUE == status); start = end, ++ end)
190 while ((*end < TA_NAME) && (*end != '\0') && (*end != '\n'))
191 ++ end;
192 if (('\0' == *end) || ('\n' == *end))
193 status = FALSE;
194 changed_char = *end;
195 *end = '\0';
196 if (NULL == tag->name)
198 if (!isprint(*start))
199 return FALSE;
200 else
202 tag->name = g_strdup((gchar*)start);
203 if (tm_parser_is_anon_name(lang, tag->name))
204 tag->flags |= tm_tag_flag_anon_t;
207 else
209 switch (*start)
211 case TA_LINE:
212 tag->line = atol((gchar*)start + 1);
213 break;
214 case TA_LOCAL:
215 tag->local = atoi((gchar*)start + 1);
216 break;
217 case TA_TYPE:
218 tag->type = (TMTagType) atoi((gchar*)start + 1);
219 break;
220 case TA_ARGLIST:
221 tag->arglist = g_strdup((gchar*)start + 1);
222 break;
223 case TA_SCOPE:
224 tag->scope = g_strdup((gchar*)start + 1);
225 break;
226 case TA_FLAGS:
227 tag->flags |= atoi((gchar*)start + 1);
228 break;
229 case TA_VARTYPE:
230 tag->var_type = g_strdup((gchar*)start + 1);
231 break;
232 case TA_INHERITS:
233 tag->inheritance = g_strdup((gchar*)start + 1);
234 break;
235 case TA_TIME: /* Obsolete */
236 break;
237 case TA_LANG: /* Obsolete */
238 break;
239 case TA_INACTIVE: /* Obsolete */
240 break;
241 case TA_ACCESS:
242 tag->access = (char) *(start + 1);
243 break;
244 case TA_IMPL:
245 tag->impl = (char) *(start + 1);
246 break;
247 default:
248 #ifdef GEANY_DEBUG
249 g_warning("Unknown attribute %s", start + 1);
250 #endif
251 break;
254 *end = changed_char;
256 if (NULL == tag->name)
257 return FALSE;
258 tag->file = file;
259 return TRUE;
262 /* alternative parser for Pascal and LaTeX global tags files with the following format
263 * tagname|return value|arglist|description\n */
264 static gboolean init_tag_from_file_alt(TMTag *tag, TMSourceFile *file, FILE *fp)
266 guchar buf[BUFSIZ];
267 guchar *start, *end;
268 gboolean status;
269 /*guchar changed_char = TA_NAME;*/
271 tag->refcount = 1;
272 if ((NULL == fgets((gchar*)buf, BUFSIZ, fp)) || ('\0' == *buf))
273 return FALSE;
275 gchar **fields;
276 guint field_len;
277 for (start = end = buf, status = TRUE; (TRUE == status); start = end, ++ end)
279 while ((*end < TA_NAME) && (*end != '\0') && (*end != '\n'))
280 ++ end;
281 if (('\0' == *end) || ('\n' == *end))
282 status = FALSE;
283 /*changed_char = *end;*/
284 *end = '\0';
285 if (NULL == tag->name && !isprint(*start))
286 return FALSE;
288 fields = g_strsplit((gchar*)start, "|", -1);
289 field_len = g_strv_length(fields);
291 if (field_len >= 1) tag->name = g_strdup(fields[0]);
292 else tag->name = NULL;
293 if (field_len >= 2 && fields[1] != NULL) tag->var_type = g_strdup(fields[1]);
294 if (field_len >= 3 && fields[2] != NULL) tag->arglist = g_strdup(fields[2]);
295 tag->type = tm_tag_prototype_t;
296 g_strfreev(fields);
300 if (NULL == tag->name)
301 return FALSE;
302 tag->file = file;
303 return TRUE;
307 CTags tag file format (http://ctags.sourceforge.net/FORMAT)
309 static gboolean init_tag_from_file_ctags(TMTag *tag, TMSourceFile *file, FILE *fp, TMParserType lang)
311 gchar buf[BUFSIZ];
312 gchar *p, *tab;
314 tag->refcount = 1;
315 tag->type = tm_tag_function_t; /* default type is function if no kind is specified */
318 if ((NULL == fgets(buf, BUFSIZ, fp)) || ('\0' == *buf))
319 return FALSE;
321 while (strncmp(buf, "!_TAG_", 6) == 0); /* skip !_TAG_ lines */
323 p = buf;
325 /* tag name */
326 if (! (tab = strchr(p, '\t')) || p == tab)
327 return FALSE;
328 tag->name = g_strndup(p, (gsize)(tab - p));
329 p = tab + 1;
331 if (tm_parser_is_anon_name(lang, tag->name))
332 tag->flags |= tm_tag_flag_anon_t;
334 /* tagfile, unused */
335 if (! (tab = strchr(p, '\t')))
337 g_free(tag->name);
338 tag->name = NULL;
339 return FALSE;
341 p = tab + 1;
342 /* Ex command, unused */
343 if (*p == '/' || *p == '?')
345 gchar c = *p;
346 for (++p; *p && *p != c; p++)
348 if (*p == '\\' && p[1])
349 p++;
352 else /* assume a line */
353 tag->line = atol(p);
354 tab = strstr(p, ";\"");
355 /* read extension fields */
356 if (tab)
358 p = tab + 2;
359 while (*p && *p != '\n' && *p != '\r')
361 gchar *end;
362 const gchar *key, *value = NULL;
364 /* skip leading tabulations */
365 while (*p && *p == '\t') p++;
366 /* find the separator (:) and end (\t) */
367 key = end = p;
368 while (*end && *end != '\t' && *end != '\n' && *end != '\r')
370 if (*end == ':' && ! value)
372 *end = 0; /* terminate the key */
373 value = end + 1;
375 end++;
377 /* move p paste the so we won't stop parsing by setting *end=0 below */
378 p = *end ? end + 1 : end;
379 *end = 0; /* terminate the value (or key if no value) */
381 if (! value || 0 == strcmp(key, "kind")) /* tag kind */
383 const gchar *kind = value ? value : key;
385 if (kind[0] && kind[1])
386 tag->kind_letter = tm_ctags_get_kind_from_name(kind, lang);
387 else
388 tag->kind_letter = *kind;
389 tag->type = tm_parser_get_tag_type(tag->kind_letter, lang);
391 else if (0 == strcmp(key, "inherits")) /* comma-separated list of classes this class inherits from */
393 g_free(tag->inheritance);
394 tag->inheritance = g_strdup(value);
396 else if (0 == strcmp(key, "implementation")) /* implementation limit */
397 tag->impl = tm_source_file_get_tag_impl(value);
398 else if (0 == strcmp(key, "line")) /* line */
399 tag->line = atol(value);
400 else if (0 == strcmp(key, "access")) /* access */
401 tag->access = tm_source_file_get_tag_access(value);
402 else if (0 == strcmp(key, "class") ||
403 0 == strcmp(key, "enum") ||
404 0 == strcmp(key, "function") ||
405 0 == strcmp(key, "struct") ||
406 0 == strcmp(key, "union")) /* Name of the class/enum/function/struct/union in which this tag is a member */
408 g_free(tag->scope);
409 tag->scope = g_strdup(value);
411 else if (0 == strcmp(key, "file")) /* static (local) tag */
412 tag->local = TRUE;
413 else if (0 == strcmp(key, "signature")) /* arglist */
415 g_free(tag->arglist);
416 tag->arglist = g_strdup(value);
421 tag->file = file;
422 return TRUE;
425 static TMTag *new_tag_from_tags_file(TMSourceFile *file, FILE *fp, TMParserType mode, TMFileFormat format)
427 TMTag *tag = tm_tag_new();
428 gboolean result = FALSE;
430 switch (format)
432 case TM_FILE_FORMAT_TAGMANAGER:
433 result = init_tag_from_file(tag, file, fp, mode);
434 break;
435 case TM_FILE_FORMAT_PIPE:
436 result = init_tag_from_file_alt(tag, file, fp);
437 break;
438 case TM_FILE_FORMAT_CTAGS:
439 result = init_tag_from_file_ctags(tag, file, fp, mode);
440 break;
443 if (! result)
445 tm_tag_unref(tag);
446 return NULL;
448 tag->lang = mode;
449 return tag;
453 Writes tag information to the given FILE *.
454 @param tag The tag information to write.
455 @param file FILE pointer to which the tag information is written.
456 @param attrs Attributes to be written (bitmask).
457 @return TRUE on success, FALSE on failure.
459 static gboolean write_tag(TMTag *tag, FILE *fp, TMTagAttrType attrs)
461 fprintf(fp, "%s", tag->name);
462 if (attrs & tm_tag_attr_type_t)
463 fprintf(fp, "%c%d", TA_TYPE, tag->type);
464 if ((attrs & tm_tag_attr_arglist_t) && (NULL != tag->arglist))
465 fprintf(fp, "%c%s", TA_ARGLIST, tag->arglist);
466 if (attrs & tm_tag_attr_line_t)
467 fprintf(fp, "%c%ld", TA_LINE, tag->line);
468 if (attrs & tm_tag_attr_local_t)
469 fprintf(fp, "%c%d", TA_LOCAL, tag->local);
470 if ((attrs & tm_tag_attr_scope_t) && (NULL != tag->scope))
471 fprintf(fp, "%c%s", TA_SCOPE, tag->scope);
472 if ((attrs & tm_tag_attr_inheritance_t) && (NULL != tag->inheritance))
473 fprintf(fp, "%c%s", TA_INHERITS, tag->inheritance);
474 if (attrs & tm_tag_attr_flags_t)
475 fprintf(fp, "%c%d", TA_FLAGS, tag->flags);
476 if ((attrs & tm_tag_attr_vartype_t) && (NULL != tag->var_type))
477 fprintf(fp, "%c%s", TA_VARTYPE, tag->var_type);
478 if ((attrs & tm_tag_attr_access_t) && (TAG_ACCESS_UNKNOWN != tag->access))
479 fprintf(fp, "%c%c", TA_ACCESS, tag->access);
480 if ((attrs & tm_tag_attr_impl_t) && (TAG_IMPL_UNKNOWN != tag->impl))
481 fprintf(fp, "%c%c", TA_IMPL, tag->impl);
483 if (fprintf(fp, "\n"))
484 return TRUE;
485 else
486 return FALSE;
489 GPtrArray *tm_source_file_read_tags_file(const gchar *tags_file, TMParserType mode)
491 guchar buf[BUFSIZ];
492 FILE *fp;
493 GPtrArray *file_tags;
494 TMTag *tag;
495 TMFileFormat format = TM_FILE_FORMAT_TAGMANAGER;
497 if (NULL == (fp = g_fopen(tags_file, "r")))
498 return NULL;
499 if ((NULL == fgets((gchar*) buf, BUFSIZ, fp)) || ('\0' == *buf))
501 fclose(fp);
502 return NULL; /* early out on error */
504 else
505 { /* We read (and discard) the first line for the format specification. */
506 if (buf[0] == '#' && strstr((gchar*) buf, "format=pipe") != NULL)
507 format = TM_FILE_FORMAT_PIPE;
508 else if (buf[0] == '#' && strstr((gchar*) buf, "format=tagmanager") != NULL)
509 format = TM_FILE_FORMAT_TAGMANAGER;
510 else if (buf[0] == '#' && strstr((gchar*) buf, "format=ctags") != NULL)
511 format = TM_FILE_FORMAT_CTAGS;
512 else if (strncmp((gchar*) buf, "!_TAG_", 6) == 0)
513 format = TM_FILE_FORMAT_CTAGS;
514 else
515 { /* We didn't find a valid format specification, so we try to auto-detect the format
516 * by counting the pipe characters on the first line and asumme pipe format when
517 * we find more than one pipe on the line. */
518 guint i, pipe_cnt = 0, tab_cnt = 0;
519 for (i = 0; i < BUFSIZ && buf[i] != '\0' && pipe_cnt < 2; i++)
521 if (buf[i] == '|')
522 pipe_cnt++;
523 else if (buf[i] == '\t')
524 tab_cnt++;
526 if (pipe_cnt > 1)
527 format = TM_FILE_FORMAT_PIPE;
528 else if (tab_cnt > 1)
529 format = TM_FILE_FORMAT_CTAGS;
530 /* reset the file pointer, to start reading again from the beginning */
531 rewind(fp);
535 file_tags = g_ptr_array_new();
536 while (NULL != (tag = new_tag_from_tags_file(NULL, fp, mode, format)))
537 g_ptr_array_add(file_tags, tag);
538 fclose(fp);
540 return file_tags;
543 gboolean tm_source_file_write_tags_file(const gchar *tags_file, GPtrArray *tags_array)
545 guint i;
546 FILE *fp;
547 gboolean ret = TRUE;
549 g_return_val_if_fail(tags_array && tags_file, FALSE);
551 fp = g_fopen(tags_file, "w");
552 if (!fp)
553 return FALSE;
555 fprintf(fp, "# format=tagmanager\n");
556 for (i = 0; i < tags_array->len; i++)
558 TMTag *tag = TM_TAG(tags_array->pdata[i]);
560 ret = write_tag(tag, fp, tm_tag_attr_type_t
561 | tm_tag_attr_scope_t | tm_tag_attr_arglist_t | tm_tag_attr_vartype_t
562 | tm_tag_attr_flags_t);
564 if (!ret)
565 break;
567 fclose(fp);
569 return ret;
573 /* Initializes a TMSourceFile structure from a file name. */
574 static gboolean tm_source_file_init(TMSourceFile *source_file, const char *file_name,
575 const char* name)
577 GStatBuf s;
578 int status;
580 #ifdef TM_DEBUG
581 g_message("Source File init: %s", file_name);
582 #endif
584 if (file_name != NULL)
586 status = g_stat(file_name, &s);
587 if (0 != status)
589 /* g_warning("Unable to stat %s", file_name);*/
590 return FALSE;
592 if (!S_ISREG(s.st_mode))
594 g_warning("%s: Not a regular file", file_name);
595 return FALSE;
597 source_file->file_name = tm_get_real_path(file_name);
598 source_file->short_name = strrchr(source_file->file_name, '/');
599 if (source_file->short_name)
600 ++ source_file->short_name;
601 else
602 source_file->short_name = source_file->file_name;
605 source_file->tags_array = g_ptr_array_new();
607 if (name == NULL)
608 source_file->lang = TM_PARSER_NONE;
609 else
610 source_file->lang = tm_ctags_get_named_lang(name);
612 return TRUE;
615 /** Initializes a TMSourceFile structure and returns a pointer to it. The
616 * TMSourceFile has to be added to TMWorkspace to start its parsing.
617 * @param file_name The file name.
618 * @param name Name of the used programming language, NULL to disable parsing.
619 * @return The created unparsed TMSourceFile object.
620 * */
621 GEANY_API_SYMBOL
622 TMSourceFile *tm_source_file_new(const char *file_name, const char *name)
624 TMSourceFilePriv *priv;
626 SOURCE_FILE_NEW(priv);
627 if (TRUE != tm_source_file_init(&priv->public, file_name, name))
629 SOURCE_FILE_FREE(priv);
630 return NULL;
632 priv->refcount = 1;
633 return &priv->public;
637 static TMSourceFile *tm_source_file_dup(TMSourceFile *source_file)
639 TMSourceFilePriv *priv = (TMSourceFilePriv *) source_file;
641 g_return_val_if_fail(NULL != source_file, NULL);
643 g_atomic_int_inc(&priv->refcount);
644 return source_file;
647 /* Destroys the contents of the source file. Note that the tags are owned by the
648 source file and are also destroyed when the source file is destroyed. If pointers
649 to these tags are used elsewhere, then those tag arrays should be rebuilt.
651 static void tm_source_file_destroy(TMSourceFile *source_file)
653 #ifdef TM_DEBUG
654 g_message("Destroying source file: %s", source_file->file_name);
655 #endif
657 g_free(source_file->file_name);
658 tm_tags_array_free(source_file->tags_array, TRUE);
659 source_file->tags_array = NULL;
662 /** Decrements the reference count of @a source_file
664 * If the reference count drops to 0, then @a source_file is freed, including all contents.
665 * Make sure the @a source_file is already removed from any TMWorkSpace before the
666 * this happens.
667 * @param source_file The source file to free.
668 * @see tm_workspace_remove_source_file()
670 GEANY_API_SYMBOL
671 void tm_source_file_free(TMSourceFile *source_file)
673 TMSourceFilePriv *priv = (TMSourceFilePriv *) source_file;
675 if (NULL != priv && g_atomic_int_dec_and_test(&priv->refcount))
677 tm_source_file_destroy(source_file);
678 SOURCE_FILE_FREE(priv);
682 /** Gets the GBoxed-derived GType for TMSourceFile
684 * @return TMSourceFile type . */
685 GEANY_API_SYMBOL
686 GType tm_source_file_get_type(void);
688 G_DEFINE_BOXED_TYPE(TMSourceFile, tm_source_file, tm_source_file_dup, tm_source_file_free);
690 /* Parses the text-buffer or source file and regenarates the tags.
691 @param source_file The source file to parse
692 @param text_buf The text buffer to parse
693 @param buf_size The size of text_buf.
694 @param use_buffer Set FALSE to ignore the buffer and parse the file directly or
695 TRUE to parse the buffer and ignore the file content.
696 @return TRUE on success, FALSE on failure
698 gboolean tm_source_file_parse(TMSourceFile *source_file, guchar* text_buf, gsize buf_size,
699 gboolean use_buffer)
701 const char *file_name;
702 gboolean retry = TRUE;
704 if ((NULL == source_file) || (NULL == source_file->file_name))
706 g_warning("Attempt to parse NULL file");
707 return FALSE;
710 if (source_file->lang == TM_PARSER_NONE)
712 tm_tags_array_free(source_file->tags_array, FALSE);
713 return FALSE;
716 file_name = source_file->file_name;
718 if (use_buffer && (NULL == text_buf || 0 == buf_size))
720 /* Empty buffer, "parse" by setting empty tag array */
721 tm_tags_array_free(source_file->tags_array, FALSE);
722 return TRUE;
725 tm_tags_array_free(source_file->tags_array, FALSE);
727 tm_ctags_parse(use_buffer ? text_buf : NULL, buf_size, file_name,
728 source_file->lang, source_file);
730 return !retry;
733 /* Gets the name associated with the language index.
734 @param lang The language index.
735 @return The language name, or NULL.
737 const gchar *tm_source_file_get_lang_name(TMParserType lang)
739 return tm_ctags_get_lang_name(lang);
742 /* Gets the language index for \a name.
743 @param name The language name.
744 @return The language index, or TM_PARSER_NONE.
746 TMParserType tm_source_file_get_named_lang(const gchar *name)
748 return tm_ctags_get_named_lang(name);