Parser/tokenizer.h

   1 #ifndef Py_TOKENIZER_H
   2 #define Py_TOKENIZER_H
   3 #ifdef __cplusplus
   4 extern "C" {
   5 #endif
   6
   7 #include "object.h"
   8
   9 /* Tokenizer interface */
  10
  11 #include "token.h"      /* For token types */
  12
  13 #define MAXINDENT 100   /* Max indentation level */
  14
  15 enum decoding_state {
  16         STATE_INIT,
  17         STATE_RAW,
  18         STATE_NORMAL, /* have a codec associated with input */
  19 };
  20
  21 /* Tokenizer state */
  22 struct tok_state {
  23         /* Input state; buf <= cur <= inp <= end */
  24         /* NB an entire line is held in the buffer */
  25         char *buf;      /* Input buffer, or NULL; malloc'ed if fp != NULL */
  26         char *cur;      /* Next character in buffer */
  27         char *inp;      /* End of data in buffer */
  28         char *end;      /* End of input buffer if buf != NULL */
  29         char *start;    /* Start of current token if not NULL */
  30         int done;       /* E_OK normally, E_EOF at EOF, otherwise error code */
  31         /* NB If done != E_OK, cur must be == inp!!! */
  32         FILE *fp;       /* Rest of input; NULL if tokenizing a string */
  33         int tabsize;    /* Tab spacing */
  34         int indent;     /* Current indentation index */
  35         int indstack[MAXINDENT];        /* Stack of indents */
  36         int atbol;      /* Nonzero if at begin of new line */
  37         int pendin;     /* Pending indents (if > 0) or dedents (if < 0) */
  38         char *prompt, *nextprompt;      /* For interactive prompting */
  39         int lineno;     /* Current line number */
  40         int level;      /* () [] {} Parentheses nesting level */
  41                         /* Used to allow free continuations inside them */
  42         /* Stuff for checking on different tab sizes */
  43         const char *filename;   /* For error messages */
  44         int altwarning; /* Issue warning if alternate tabs don't match */
  45         int alterror;   /* Issue error if alternate tabs don't match */
  46         int alttabsize; /* Alternate tab spacing */
  47         int altindstack[MAXINDENT];     /* Stack of alternate indents */
  48         /* Stuff for PEP 0263 */
  49         enum decoding_state decoding_state;
  50         int decoding_erred;     /* whether erred in decoding  */
  51         int read_coding_spec;   /* whether 'coding:...' has been read  */
  52         char *encoding;         /* Source encoding. */
  53         int cont_line;          /* whether we are in a continuation line. */
  54         const char* line_start; /* pointer to start of current line */
  55 #ifndef PGEN
  56         PyObject *decoding_readline; /* codecs.open(...).readline */
  57         PyObject *decoding_buffer;
  58 #endif
  59         const char* enc;        /* Encoding for the current str. */
  60         const char* str;
  61 };
  62
  63 extern struct tok_state *PyTokenizer_FromString(const char *);
  64 extern struct tok_state *PyTokenizer_FromUTF8(const char *);
  65 extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
  66                                               char *, char *);
  67 extern void PyTokenizer_Free(struct tok_state *);
  68 extern int PyTokenizer_Get(struct tok_state *, char **, char **);
  69 extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
  70                                           int len, int *offset);
  71 extern char * PyTokenizer_FindEncoding(int);
  72
  73 #ifdef __cplusplus
  74 }
  75 #endif
  76 #endif /* !Py_TOKENIZER_H */