Moving non-unicode apertium to branches
[apertium.git] / lttoolbox-unicode / lttoolbox / pattern_list.h
blobf34901b86530a1e2e0a23eeebb650f2bf655e16f
1 /*
2 * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17 * 02111-1307, USA.
19 #ifndef _PATTERNLIST_
20 #define _PATTERNLIST_
22 #include <lttoolbox/alphabet.h>
23 #include <lttoolbox/match_exe.h>
24 #include <lttoolbox/transducer.h>
26 #include <list>
27 #include <map>
28 #include <string>
29 #include <vector>
31 using namespace std;
33 typedef multimap<int, vector<int> > PatternStore;
34 typedef pair<PatternStore::iterator, PatternStore::iterator> PatternRange;
36 class PatternList
38 private:
39 Alphabet alphabet;
40 PatternStore patterns;
41 bool sequence;
42 list<vector<int> > sequence_data;
43 Transducer transducer;
44 map<int, int> final_type;
45 int sequence_id;
47 void copy(PatternList const &o);
48 void destroy();
49 void insertOutOfSequence(wstring const &lemma, wstring const &tags,
50 vector<int> &result);
51 void insertIntoSequence(int const id, wstring const &lemma,
52 wstring const &tags);
54 static int tagCount(wstring const &tags);
55 static wstring tagAt(wstring const &tags, int const index);
57 public:
58 /**
59 * This symbol stands for any char
61 static wstring const ANY_CHAR;
63 /**
64 * This symbol stands for any tag
66 static wstring const ANY_TAG;
68 /**
69 * This symbol marks a word queue
71 static wstring const QUEUE;
73 /**
74 * Constructor
76 PatternList();
78 /**
79 * Destructor
81 ~PatternList();
83 /**
84 * Copy constructor
86 PatternList(PatternList const &o);
88 /**
89 * Assignment operator
90 * @param o the object to be assigned
92 PatternList & operator =(PatternList const &o);
94 /**
95 * Marks the start of a multiple word sequence
97 void beginSequence();
99 /**
100 * Ends the multiple word sequence
102 void endSequence();
105 * Insertion method
106 * @param id
107 * @param lemma
108 * @param tags
110 void insert(int const id, wstring const &lemma, wstring const &tags);
113 * Insertion method
114 * @param id
115 * @param otherid
117 void insert(int const id, int const otherid);
120 * Get the PatternStore
121 * @returns a PatternStore object
123 PatternStore const & getPatterns();
126 * Build the FSM
128 void buildTransducer();
131 * Read PatternList from a file
132 * @param input the input stream
134 void read(FILE *input);
137 * Write PatternList to a file
138 * @param output the output stream
140 void write(FILE *output);
143 * Create a new MatchExe from PatternList, must be freed with 'delete'
144 * @return the new MatchExe object
146 MatchExe * newMatchExe();
149 * Get the alphabet of this PatternList object
150 * @return the alphabet
152 Alphabet & getAlphabet();
155 #endif