CMiniLexicon::FindMajorSignatures(): use log file routines
[linguistica.git] / FSA.h
blobd3d3ee03111d03477639b118da64722f3dd725b8
1 // Finite-state automaton representation of morphology
2 // Copyright © 2009 The University of Chicago
3 #ifndef FSA_H
4 #define FSA_H
6 #include <Q3ListView>
7 #include <Q3TextEdit>
8 #include <QTableView>
9 #include <QStandardItemModel>
10 #include "MiniLexicon.h"
11 #include <QString>
12 #include <set>
13 #include <list>
15 class FSA;
16 class FSAedge;
17 class FSAstate;
18 class FSAMorpheme;
19 class SigAlignment;
22 typedef std::list<FSAMorpheme*> FSAMorphemeList;
23 typedef std::list<FSAstate*> FSAStateList;
24 typedef std::list<FSAedge*> FSAedgeList;
25 typedef std::list<FSAedgeList*> FSApathList;
27 class FSAMorpheme {
28 QString str;
29 int m_corpusCount;
31 public:
32 FSAMorpheme(QString qs,int cc):str(qs),m_corpusCount(cc){};
34 double GetDL(int characterCount) const;
35 const QString & toStr() const {return str;};
37 bool operator==(const FSAMorpheme & other) const;
39 friend class FSA;
40 friend class FSAedge;
41 friend class FSAstate;
44 extern QString join(const FSAMorphemeList& v, const QString& delim);
46 class FSA {
47 FSAStateList* m_States;
48 FSAedgeList* m_Edges;
49 FSAStateList* m_StartStates;
50 std::list<FSAedgeList*> m_FSAPathList;
51 bool m_searched;
52 int m_MaxPathLength;
53 CMiniLexicon* m_lexicon;
54 int m_charCount;
55 // the same morpheme may occur at multiple edges
56 QMap<QString, FSAMorpheme*> m_morphemes;
57 //QMultiMap<QString, FSAedge*> m_morphemeEdges;
58 int m_nextStartStateId;
59 int m_nextStateId;
60 public:
61 // construction/destruction.
62 FSA();
63 explicit FSA(CMiniLexicon* pMiniLex);
65 ~FSA();
67 private:
68 // disable copy.
69 FSA(const FSA& x);
70 FSA& operator=(const FSA& x);
71 void AddEdge(FSAedge* edge);
72 public:
74 // insert.
76 void AddSignature(CSignature* pSig);
77 //void AddPrefixSignature(CSignature* pSig);
78 void AddState(FSAstate* state);
80 // remove.
82 void RemoveEdge(FSAedge* edge);
84 /// description length
85 double ComputeDL();
87 void AddPrefixes(CMiniLexicon* pMiniLex);
88 // output to GUI.
90 void FSAListDisplay(Q3ListView* widget,
91 QMap<QString, QString>* filter,
92 bool unused);
94 // display helpers.
96 /// list paths to leaves in m_FSAPathList
97 void DoSearch();
98 void ResetDisplay();
100 /// state list
101 FSAStateList* GetStates() { return m_States;}
103 // linguistic analysis.
105 /// sample function that does some manipulation of FSA
106 void FSATestFunc();
108 static int GetRobustness(FSAedgeList * pPath);
110 void OutputFSAXfst ( const QString& filename );
112 //returns pointer to new edge
113 FSAedge* DoParallelSplit (FSAedge* pEdge, FSAMorphemeList& morphsToMove);
114 //returns pointer to "first" new edge, ie, edge from start state
115 FSAedge* DoSeriesSplit(FSAedge* pEdge, unsigned int len, bool suffix=true);
117 //first,last are iterators to list of lists,
118 // union of lists must be equal to set of morphemes on pEdge
119 void DoMultParallelSplit( FSAedge* pEdge,
120 std::list<FSAMorphemeList>::iterator fisrt,
121 std::list<FSAMorphemeList>::iterator last );
123 friend class FSAedge;
124 friend class FSAstate;
127 class FSAedge {
128 FSAMorphemeList m_Morphemes;
129 FSAstate* m_FromState;
130 FSAstate* m_ToState;
131 FSA* m_FSA;
132 public:
133 // construction/destruction.
135 explicit FSAedge(FSA* pFSA, FSAstate* start = 0, FSAstate* end = 0);
136 // FSAedge(FSA* pFSA, FSAstate* start, FSAstate* end, class CParse* pLabels);
137 // suppress implied default constructor
138 private:
139 FSAedge();
140 public:
141 // destructor implicitly defined.
143 // disable copy.
144 private:
145 FSAedge(const FSAedge& x);
146 FSAedge& operator=(const FSAedge& x);
148 public:
149 void AddMorpheme(const QString& morpheme_text, int count);
150 void RemoveMorpheme(FSAMorphemeList::iterator iter);
151 FSAMorphemeList* GetMorphemes() { return &m_Morphemes; }
153 // source and target states.
155 FSAstate* GetFromState() const { return m_FromState; }
156 FSAstate* GetToState() const { return m_ToState; }
159 class FSAstate
161 FSAedgeList m_EdgesOut;
162 FSAedgeList m_EdgesIn;
163 int m_MaxDepth;
164 QString m_stateLabel;
166 // XXX. only used for breadth-first search
167 std::list<FSAedgeList*> m_PathList; //list of paths to this node
168 unsigned int m_DiscoverCount;
169 int m_stateId;
171 public:
172 std::list<FSAedgeList*>* GetPathList(){ return &m_PathList; }
173 void addPath(FSAedgeList* pPath) { m_PathList.push_back(pPath);}
174 void setMaxDepth(int d){this->m_MaxDepth=d;}
175 int getMaxDepth(){return this->m_MaxDepth;}
177 FSAstate(FSA* pFSA);
179 FSAedgeList* GetEdgesOut() { return &m_EdgesOut; }
180 void AddEdgeOut(FSAedge* pEdge);
182 FSAedgeList* GetEdgesIn() { return &m_EdgesIn; }
183 void AddEdgeIn(FSAedge* pEdge);
185 //xfst output
186 void OutputXfst(QTextStream& outf);
187 void SearchEdgeXfst (QTextStream& outf, std::set<FSAstate*>& discovered);
188 QString getStateName(){ return QString("S%1").arg(this->m_stateId) ;}
190 friend class FSA;
191 friend class FSAListViewItem;
194 class SigAlignment
196 class CSignature* m_Sig1; // the standard of comparison
197 class CSignature* m_Sig2; // the signature being reanalyzed
199 int m_Length; // number of affixes in longer signature
203 /// Qt3-style row object for a table displaying the FSA
204 /// Each row corresponds to a complete edge path
205 class FSAListViewItem : public Q3ListViewItem {
206 QPixmap* m_pImage;
207 /// points to path to some final state
208 FSAedgeList* m_pPath;
209 public:
210 // construction/destruction.
212 FSAListViewItem(Q3ListView* pView, FSAedgeList* path)
213 : Q3ListViewItem(pView), m_pImage(), m_pPath(path) { }
214 FSAListViewItem(FSAListViewItem* pParent, FSAedgeList* path)
215 : Q3ListViewItem(pParent), m_pImage(), m_pPath(path) { }
216 // disable default-construction.
217 private:
218 FSAListViewItem();
219 public:
220 ~FSAListViewItem();
222 // copy.
224 FSAListViewItem(const FSAListViewItem& x)
225 : Q3ListViewItem(x),
226 // drop cached pixmap to avoid deleting it twice
227 m_pImage(),
228 m_pPath(x.m_pPath) { }
229 FSAListViewItem& operator=(const FSAListViewItem& x)
231 Q3ListViewItem::operator=(x);
232 m_pImage = 0;
233 m_pPath = x.m_pPath;
234 return *this;
237 // output to GUI.
239 /// graphical display
240 /// repeated requests are fast since created image is cached
241 QPixmap* GetQPixmap();
242 /// write information about path to “command line” pane
243 void DisplayEdgePath(Q3TextEdit* m_commandLine);
245 /// start state
246 FSAstate* GetLVStartState()
247 { return m_pPath->front()->GetFromState(); }
248 private:
249 /// helper for graphical display
250 void build_pixmap();
252 friend class FSA;
255 #ifndef USE_GRAPHVIZ
256 inline void FSAListViewItem::build_pixmap()
258 // not using graphviz, empty image
259 m_pImage = new QPixmap;
261 #endif
263 #endif // FSA_H