HowManyAreAnalyzed(): use status_user_agent to report progress
[linguistica.git] / FSA.h
blobe61bc9f9db29bbd05001e36a59760128340bf9dd
1 // Finite-state automaton representation of morphology
2 // Copyright © 2009 The University of Chicago
3 #ifndef FSA_H
4 #define FSA_H
6 #include <Q3ListView>
7 #include <Q3TextEdit>
8 #include <QTableView>
9 #include <QStandardItemModel>
10 #include "MiniLexicon.h"
11 #include <QString>
12 #include <set>
13 #include <list>
15 class FSA;
16 class FSAedge;
17 class FSAstate;
18 class SigAlignment;
20 class FSAMorpheme {
21 QString str;
22 int m_corpusCount;
24 public:
25 FSAMorpheme(QString qs,int cc):str(qs),m_corpusCount(cc){};
27 double GetDL(int characterCount) const;
28 const QString & toStr() const {return str;};
30 bool operator==(const FSAMorpheme & other) const;
32 friend class FSA;
33 friend class FSAedge;
34 friend class FSAstate;
37 typedef std::list<FSAMorpheme*> FSAMorphemeList;
38 typedef std::list<FSAstate*> FSAStateList;
39 typedef std::list<FSAedge*> FSAedgeList;
41 extern QString join(const FSAMorphemeList& v, const QString& delim);
43 class FSA {
44 FSAStateList* m_States;
45 FSAedgeList* m_Edges;
46 FSAStateList* m_StartStates;
47 std::list<FSAedgeList*> m_FSAPathList;
48 bool m_searched;
49 int m_MaxPathLength;
50 CMiniLexicon* m_lexicon;
51 int m_charCount;
52 // the same morpheme may occur at multiple edges
53 QMap<QString, FSAMorpheme*> m_morphemes;
54 int m_nextStartStateId;
55 int m_nextStateId;
56 public:
57 // construction/destruction.
58 FSA();
59 explicit FSA(CMiniLexicon* pMiniLex);
61 ~FSA();
63 private:
64 // disable copy.
65 FSA(const FSA& x);
66 FSA& operator=(const FSA& x);
67 void AddEdge(FSAedge* edge);
68 public:
70 // insert.
72 void AddSignature(CSignature* pSig);
73 void AddState(FSAstate* state);
75 // remove.
77 void RemoveEdge(FSAedge* edge);
79 /// description length
80 double ComputeDL();
82 // output to GUI.
84 void FSAListDisplay(Q3ListView* widget,
85 QMap<QString, QString>* filter,
86 bool unused);
88 // display helpers.
90 /// list paths to leaves in m_FSAPathList
91 void DoSearch();
92 void ResetDisplay();
94 /// state list
95 FSAStateList* GetStates() { return m_States;}
97 // linguistic analysis.
99 /// sample function that does some manipulation of FSA
100 void FSATestFunc();
102 static int GetRobustness(FSAedgeList * pPath);
104 void OutputFSAXfst ( const QString& filename );
106 //returns pointer to new edge
107 FSAedge* DoParallelSplit (FSAedge* pEdge, FSAMorphemeList& morphsToMove);
108 //returns pointer to "first" new edge, ie, edge from start state
109 FSAedge* DoSeriesSplit(FSAedge* pEdge, unsigned int len, bool suffix=true);
111 //first,last are iterators to list of lists,
112 // union of lists must be equal to set of morphemes on pEdge
113 void DoMultParallelSplit( FSAedge* pEdge,
114 std::list<FSAMorphemeList>::iterator fisrt,
115 std::list<FSAMorphemeList>::iterator last );
117 friend class FSAedge;
118 friend class FSAstate;
121 class FSAedge {
122 FSAMorphemeList m_Morphemes;
123 FSAstate* m_FromState;
124 FSAstate* m_ToState;
125 FSA* m_FSA;
126 public:
127 // construction/destruction.
129 explicit FSAedge(FSA* pFSA, FSAstate* start = 0, FSAstate* end = 0);
130 // FSAedge(FSA* pFSA, FSAstate* start, FSAstate* end, class CParse* pLabels);
131 // suppress implied default constructor
132 private:
133 FSAedge();
134 public:
135 // destructor implicitly defined.
137 // disable copy.
138 private:
139 FSAedge(const FSAedge& x);
140 FSAedge& operator=(const FSAedge& x);
142 public:
143 void AddMorpheme(const QString& morpheme_text, int count);
144 void RemoveMorpheme(FSAMorphemeList::iterator iter);
145 FSAMorphemeList* GetMorphemes() { return &m_Morphemes; }
147 // source and target states.
149 FSAstate* GetFromState() const { return m_FromState; }
150 FSAstate* GetToState() const { return m_ToState; }
153 class FSAstate
155 FSAedgeList m_EdgesOut;
156 FSAedgeList m_EdgesIn;
157 int m_MaxDepth;
158 QString m_stateLabel;
160 // XXX. only used for breadth-first search
161 std::list<FSAedgeList*> m_PathList; //list of paths to this node
162 unsigned int m_DiscoverCount;
163 int m_stateId;
165 public:
166 std::list<FSAedgeList*>* GetPathList(){ return &m_PathList; }
167 void addPath(FSAedgeList* pPath) { m_PathList.push_back(pPath);}
168 void setMaxDepth(int d){this->m_MaxDepth=d;}
169 int getMaxDepth(){return this->m_MaxDepth;}
171 FSAstate(FSA* pFSA);
173 FSAedgeList* GetEdgesOut() { return &m_EdgesOut; }
174 void AddEdgeOut(FSAedge* pEdge);
176 FSAedgeList* GetEdgesIn() { return &m_EdgesIn; }
177 void AddEdgeIn(FSAedge* pEdge);
179 //xfst output
180 void OutputXfst(QTextStream& outf);
181 void SearchEdgeXfst (QTextStream& outf, std::set<FSAstate*>& discovered);
182 QString getStateName(){ return QString("S%1").arg(this->m_stateId) ;}
184 friend class FSA;
185 friend class FSAListViewItem;
188 class SigAlignment
190 class CSignature* m_Sig1; // the standard of comparison
191 class CSignature* m_Sig2; // the signature being reanalyzed
193 int m_Length; // number of affixes in longer signature
197 /// Qt3-style row object for a table displaying the FSA
198 /// Each row corresponds to a complete edge path
199 class FSAListViewItem : public Q3ListViewItem {
200 QPixmap* m_pImage;
201 /// points to path to some final state
202 FSAedgeList* m_pPath;
203 public:
204 // construction/destruction.
206 FSAListViewItem(Q3ListView* pView, FSAedgeList* path)
207 : Q3ListViewItem(pView), m_pImage(), m_pPath(path) { }
208 FSAListViewItem(FSAListViewItem* pParent, FSAedgeList* path)
209 : Q3ListViewItem(pParent), m_pImage(), m_pPath(path) { }
210 // disable default-construction.
211 private:
212 FSAListViewItem();
213 public:
214 ~FSAListViewItem();
216 // copy.
218 FSAListViewItem(const FSAListViewItem& x)
219 : Q3ListViewItem(x),
220 // drop cached pixmap to avoid deleting it twice
221 m_pImage(),
222 m_pPath(x.m_pPath) { }
223 FSAListViewItem& operator=(const FSAListViewItem& x)
225 Q3ListViewItem::operator=(x);
226 m_pImage = 0;
227 m_pPath = x.m_pPath;
228 return *this;
231 // output to GUI.
233 /// graphical display
234 /// repeated requests are fast since created image is cached
235 QPixmap* GetQPixmap();
236 /// write information about path to “command line” pane
237 void DisplayEdgePath(Q3TextEdit* m_commandLine);
239 /// start state
240 FSAstate* GetLVStartState()
241 { return m_pPath->front()->GetFromState(); }
242 private:
243 /// helper for graphical display
244 void build_pixmap();
246 friend class FSA;
249 #ifndef USE_GRAPHVIZ
250 inline void FSAListViewItem::build_pixmap()
252 // not using graphviz, empty image
253 m_pImage = new QPixmap;
255 #endif
257 #endif // FSA_H