CMiniLexicon::FindMajorSignatures(): use log file routines
[linguistica.git] / StemCollection.h
blob5eb46a9a7b963323d454d2b71d486d334cb98800
1 // Mini-lexicon’s collection of stems
2 // Copyright © 2009 The University of Chicago
3 #ifndef STEMCOLLECTION_H
4 #define STEMCOLLECTION_H
6 // See the CMiniLexicon class in MiniLexicon.h for an overview of
7 // suffix/signature-based discovery of morphology.
9 class CStemCollection;
11 #include "CollectionTemplate.h"
12 #include "AffixLocation.h"
13 #include "generaldefinitions.h"
14 template<class K, class V> class QMap;
16 /// List of stems for a stem/signature model of morphology
17 ///
18 /// Maintains a list of stems used in analyzing a particular collection
19 /// of words (that is, all stems associated to a particular mini-lexicon).
20 /// Semantically, a stem collection is a set (like std::set) of pointers
21 /// to CStem objects which it owns. The contained stems are generally
22 /// deleted upon removal.
23 ///
24 /// When a stem is inserted, it is added to the associated lexicon, and
25 /// when a stem is removed, it is removed from the associated lexicon.
26 ///
27 /// This collection type also maintains a total use count for the stems it
28 /// manages, for use in description length calculations.
29 ///
30 /// To support the successor-frequency algorithm, a stem collection
31 /// should support incremental lookup of stems (find all stems starting
32 /// with a given phoneme sequence, add a phoneme to that sequence to
33 /// narrow the search, etc).
34 class CStemCollection : public TCollection<class CStem> {
35 protected:
36 enum eAffixLocation m_AffixLocation;
37 double m_TotalUseCount;
38 public:
39 // construction/destruction.
41 CStemCollection();
42 CStemCollection(class CMiniLexicon* mini);
43 ~CStemCollection();
44 // copy construction, copy-assignment implicitly defined.
46 // Qt3-style collection view.
48 void ListDisplay(class Q3ListView* parent,
49 QMap<class QString, class QString>* filter = 0);
51 // input/output to file.
53 void OutputStems(class QString filename,
54 QMap<class QString, class QString>* filter);
55 void ReadStemFile(class QString filename,
56 enum eAffixLocation affix_loc);
58 // insert.
60 CStem* operator<<(const CStem* stem);
61 CStem* operator<<(class CStringSurrogate stem_text);
62 CStem* operator<<(const class CParse* stem_text);
63 CStem* operator<<(class QString stem_text);
64 void AddPointer(CStem* pointee);
65 CStem* AddToCollection(const class CParse& stem_text);
66 CStem* AddToCollection(const class CStringSurrogate& stem_text);
68 // clear.
70 void Empty();
71 void RemoveAll();
73 // remove.
75 bool Remove(CStem* stem); // doesn't delete CStem*
76 bool RemoveMember(CStem* stem); // deletes CStem*
77 bool RemoveMember(const class CStringSurrogate& stem_text); // deletes CStem*
78 bool RemoveMember(const class CStringSurrogate& stem_text,
79 bool delete_stem); // FSA
80 void DeleteMarkedMembers();
82 // accessors.
84 double GetTotalUseCount();
86 // description length.
88 double CalculateSumOfPointersToMyStems(enum eMDL_STYLE style);
89 double CalculateTotalPhonologicalInformationContent(
90 class CLexicon* MotherLexicon);
93 #endif // STEMCOLLECTION_H