1 // Finite-state automaton representation of morphology
2 // Copyright © 2009 The University of Chicago
9 #include <QStandardItemModel>
10 #include "MiniLexicon.h"
25 FSAMorpheme(QString qs
,int cc
):str(qs
),m_corpusCount(cc
){};
27 double GetDL(int characterCount
) const;
28 const QString
& toStr() const {return str
;};
30 bool operator==(const FSAMorpheme
& other
) const;
34 friend class FSAstate
;
37 typedef std::list
<FSAMorpheme
*> FSAMorphemeList
;
38 typedef std::list
<FSAstate
*> FSAStateList
;
39 typedef std::list
<FSAedge
*> FSAedgeList
;
41 extern QString
join(const FSAMorphemeList
& v
, const QString
& delim
);
44 FSAStateList
* m_States
;
46 FSAStateList
* m_StartStates
;
47 std::list
<FSAedgeList
*> m_FSAPathList
;
50 CMiniLexicon
* m_lexicon
;
52 // the same morpheme may occur at multiple edges
53 QMap
<QString
, FSAMorpheme
*> m_morphemes
;
54 int m_nextStartStateId
;
57 // construction/destruction.
59 explicit FSA(CMiniLexicon
* pMiniLex
);
66 FSA
& operator=(const FSA
& x
);
67 void AddEdge(FSAedge
* edge
);
72 void AddSignature(CSignature
* pSig
);
73 void AddState(FSAstate
* state
);
77 void RemoveEdge(FSAedge
* edge
);
79 /// description length
84 void FSAListDisplay(Q3ListView
* widget
,
85 QMap
<QString
, QString
>* filter
,
90 /// list paths to leaves in m_FSAPathList
95 FSAStateList
* GetStates() { return m_States
;}
97 // linguistic analysis.
99 /// sample function that does some manipulation of FSA
102 static int GetRobustness(FSAedgeList
* pPath
);
104 void OutputFSAXfst ( const QString
& filename
);
106 //returns pointer to new edge
107 FSAedge
* DoParallelSplit (FSAedge
* pEdge
, FSAMorphemeList
& morphsToMove
);
108 //returns pointer to "first" new edge, ie, edge from start state
109 FSAedge
* DoSeriesSplit(FSAedge
* pEdge
, unsigned int len
, bool suffix
=true);
111 //first,last are iterators to list of lists,
112 // union of lists must be equal to set of morphemes on pEdge
113 void DoMultParallelSplit( FSAedge
* pEdge
,
114 std::list
<FSAMorphemeList
>::iterator fisrt
,
115 std::list
<FSAMorphemeList
>::iterator last
);
117 friend class FSAedge
;
118 friend class FSAstate
;
122 FSAMorphemeList m_Morphemes
;
123 FSAstate
* m_FromState
;
127 // construction/destruction.
129 explicit FSAedge(FSA
* pFSA
, FSAstate
* start
= 0, FSAstate
* end
= 0);
130 // FSAedge(FSA* pFSA, FSAstate* start, FSAstate* end, class CParse* pLabels);
131 // suppress implied default constructor
135 // destructor implicitly defined.
139 FSAedge(const FSAedge
& x
);
140 FSAedge
& operator=(const FSAedge
& x
);
143 void AddMorpheme(const QString
& morpheme_text
, int count
);
144 void RemoveMorpheme(FSAMorphemeList::iterator iter
);
145 FSAMorphemeList
* GetMorphemes() { return &m_Morphemes
; }
147 // source and target states.
149 FSAstate
* GetFromState() const { return m_FromState
; }
150 FSAstate
* GetToState() const { return m_ToState
; }
155 FSAedgeList m_EdgesOut
;
156 FSAedgeList m_EdgesIn
;
158 QString m_stateLabel
;
160 // XXX. only used for breadth-first search
161 std::list
<FSAedgeList
*> m_PathList
; //list of paths to this node
162 unsigned int m_DiscoverCount
;
166 std::list
<FSAedgeList
*>* GetPathList(){ return &m_PathList
; }
167 void addPath(FSAedgeList
* pPath
) { m_PathList
.push_back(pPath
);}
168 void setMaxDepth(int d
){this->m_MaxDepth
=d
;}
169 int getMaxDepth(){return this->m_MaxDepth
;}
173 FSAedgeList
* GetEdgesOut() { return &m_EdgesOut
; }
174 void AddEdgeOut(FSAedge
* pEdge
);
176 FSAedgeList
* GetEdgesIn() { return &m_EdgesIn
; }
177 void AddEdgeIn(FSAedge
* pEdge
);
180 void OutputXfst(QTextStream
& outf
);
181 void SearchEdgeXfst (QTextStream
& outf
, std::set
<FSAstate
*>& discovered
);
182 QString
getStateName(){ return QString("S%1").arg(this->m_stateId
) ;}
185 friend class FSAListViewItem
;
190 class CSignature
* m_Sig1
; // the standard of comparison
191 class CSignature
* m_Sig2
; // the signature being reanalyzed
193 int m_Length
; // number of affixes in longer signature
197 /// Qt3-style row object for a table displaying the FSA
198 /// Each row corresponds to a complete edge path
199 class FSAListViewItem
: public Q3ListViewItem
{
201 /// points to path to some final state
202 FSAedgeList
* m_pPath
;
204 // construction/destruction.
206 FSAListViewItem(Q3ListView
* pView
, FSAedgeList
* path
)
207 : Q3ListViewItem(pView
), m_pImage(), m_pPath(path
) { }
208 FSAListViewItem(FSAListViewItem
* pParent
, FSAedgeList
* path
)
209 : Q3ListViewItem(pParent
), m_pImage(), m_pPath(path
) { }
210 // disable default-construction.
218 FSAListViewItem(const FSAListViewItem
& x
)
220 // drop cached pixmap to avoid deleting it twice
222 m_pPath(x
.m_pPath
) { }
223 FSAListViewItem
& operator=(const FSAListViewItem
& x
)
225 Q3ListViewItem::operator=(x
);
233 /// graphical display
234 /// repeated requests are fast since created image is cached
235 QPixmap
* GetQPixmap();
236 /// write information about path to “command line” pane
237 void DisplayEdgePath(Q3TextEdit
* m_commandLine
);
240 FSAstate
* GetLVStartState()
241 { return m_pPath
->front()->GetFromState(); }
243 /// helper for graphical display
250 inline void FSAListViewItem::build_pixmap()
252 // not using graphviz, empty image
253 m_pImage
= new QPixmap
;