CMiniLexicon::FindMajorSignatures(): use log file routines
[linguistica.git] / SignatureCollection.h
bloba218cc4600460c9a9868d77ea3a9b56a6338f4e9
1 // Affixes and signatures in signature/suffix-based model of morphology
2 // Copyright © 2009 The University of Chicago
3 #ifndef SIGNATURECOLLECTION_H
4 #define SIGNATURECOLLECTION_H
6 // See the CMiniLexicon class in MiniLexicon.h for an overview of
7 // suffix/signature-based discovery of morphology.
9 #include "CollectionTemplate.h"
10 #include "generaldefinitions.h"
11 #include "AffixLocation.h"
12 #include "Allomorphy.h"
14 template<class K, class V> class QMap;
15 class QString;
17 class CSignatureCollection: public TCollection<class CSignature> {
18 class CSuffixCollection* MySuffixes;
19 class CPrefixCollection* MyPrefixes;
20 /// prefix or suffix
21 enum eAffixLocation m_SignatureType;
22 double m_DLofPointersToMyMembers;
23 double m_SumOfDLofPointersInternalToEachMember;
24 public:
25 // construction/destruction.
27 CSignatureCollection();
28 CSignatureCollection(enum eAffixLocation prefix_or_suffix);
29 CSignatureCollection(CMiniLexicon* mini, CSuffixCollection* suffixes,
30 enum eAffixLocation prefix_or_suffix);
31 CSignatureCollection(CMiniLexicon* mini, CPrefixCollection* prefixes,
32 enum eAffixLocation prefix_or_suffix);
33 /// deprecated
34 CSignatureCollection(CMiniLexicon* mini);
35 ~CSignatureCollection();
37 // disable copy
38 private:
39 CSignatureCollection(const CSignatureCollection& x);
40 CSignatureCollection& operator=(const CSignatureCollection& x);
41 public:
42 // assign.
44 void Empty();
45 void RemoveAll();
47 // insert.
49 CSignature* operator<<(CSignature* sig);
50 CSignature* operator<<(CParse* affixes);
51 void AddPointer(CSignature* sig);
52 CSignature* AddToCollection(CParse& affixes);
53 CSignature* AddToCollection(CStringSurrogate& dot_delimited);
55 // find.
57 CSignature* operator^=(CParse& affixes);
58 CSignature* operator^=(CParse* affixes);
59 /// str should be a .-delimited list of affixes
60 /// result is 0 if no such signature exists
61 CSignature* operator^=(CStringSurrogate& str);
62 /// *this ^= CStringSurrogate(str)
63 CSignature* operator^=(QString str);
65 // remove.
67 /// doesn't delete sig
68 bool Remove(CSignature* sig);
69 /// deletes sig
70 bool RemoveMember(CSignature* sig);
71 /// deletes *this ^= dot_delimited
72 bool RemoveMember(CStringSurrogate& dot_delimited);
73 bool RemoveMember(CStringSurrogate& dot_delimited, bool delete_it);
74 /// remove each item from m_DeletionArray
75 void DeleteMarkedMembers();
77 // union of contained signatures.
79 void SetMyPrefixes(CPrefixCollection* prefixes);
80 CPrefixCollection* GetMyPrefixes() { return MyPrefixes; }
81 void SetMySuffixes(CSuffixCollection* suffixes);
82 CSuffixCollection* GetMySuffixes() { return MySuffixes; }
84 // affix location (prefix or suffix).
86 eAffixLocation GetSignatureType() { return m_SignatureType; }
88 // Compare pairs of sigs, and identify pairs where one is exactly a suffix of the other
90 void CompareSignaturePairsForTotalOverlap();
91 void RecutLongerSigToMatchTheShorter(CSignatureAlignment* pSigAlignment);
93 // allomorphy.
95 void FindAllomorphy();
96 void PutAffixesOfRegularSignaturesIntoNewSuffixes(
97 CSuffixCollection* NewSuffixes,
98 int MinimumNumberOfStems);
99 void PutAffixesOfRegularSignaturesIntoNewPrefixes(
100 CPrefixCollection* NewPrefixes);
102 // output to GUI.
104 void BorrowedSigsDisplay(class Q3ListView* widget,
105 QMap<QString, QString>* filter);
106 void ListDisplay(class Q3ListView* widget,
107 QMap<QString, QString>* filter = 0);
109 // serialization and deserialization (file I/O).
111 void OutputXfst (QString);
112 void OutputSignatures(QString filename);
113 void ReadSignatureFile(QString filename,
114 enum eAffixLocation prefix_or_suffix);
115 void ReadSignatureFileBis(QString filename);
117 /// scan all sigs, and if the stems of pSig do not begin
118 /// with DiffLetter, then Diff
119 void TryToRemove(const QString Suffix, const QString DiffLetter);
120 double GetMDL();
121 void CalculateFrequencies();
122 /// For each signature, determines if a signature
123 /// with more robustness is contained in it; if so, takes the
124 /// larger robustness as its own. This measures our certain of
125 /// the STEMS as such for the signature.
126 void CheckRobustness();
127 void FindDisplayOrdering();
128 void CleanUp();
129 /// sum for each signature the number of stems * number of affixes
130 int GetTotalNumberOfWords();
131 int TheseTwoSuffixesShareHowManyStems(
132 class CSuffix* suf1, class CSuffix* suf2);
133 double ComputeDLofInternalPointersOfEachMember(enum eMDL_STYLE style);
134 double ComputeLengthOfPointersToEachOfMyMembers(enum eMDL_STYLE style);
135 void GetIndividualCountsForEachStem();
138 #endif // SIGNATURECOLLECTION_H