CMiniLexicon::FindMajorSignatures(): use log file routines
[linguistica.git] / WordCollection.h
blobe1a8345151752f8446e0de0b01aef135390fc191
1 // Mini-lexicon’s collection of words
2 // Copyright © 2009 The University of Chicago
3 #ifndef WORDCOLLECTION_H
4 #define WORDCOLLECTION_H
6 // See the CMiniLexicon class in MiniLexicon.h for an overview of
7 // suffix/signature-based discovery of morphology.
9 #include "CollectionTemplate.h"
10 #include <QString>
11 #include "StemListViewItem.h"
12 #include "Stem.h"
13 #include "AffixLocation.h"
14 template<class K, class V> class QMap;
15 namespace linguistica { namespace ui { class status_user_agent; } }
16 class CSignatureCollection;
17 class CPhoneCollection;
19 class CWordCollection : public TCollection<CStem> {
20 public:
21 enum eAffixLocation m_AffixLocation;
22 enum CWordListViewItem::display_mode m_DisplayMode;
23 CPhoneCollection* m_Phones_Tier1;
24 CPhoneCollection* m_Phones_Tier2;
25 CPhoneCollection* m_Phones_Tier1_Skeleton;
27 double m_PhonologicalContent_Unigram;
28 double m_PhonologicalContent_Bigram;
29 double m_Tier2_LocalMI_Score;
30 double m_Tier2_DistantMI_Score;
32 /// includes Tier1 bigram content
33 double m_LocalMI_TotalBoltzmannScore;
34 double m_LocalMI_Plog;
35 /// includes Tier1 bigram content
36 double m_DistantMI_TotalBoltzmannScore;
37 double m_DistantMI_Plog;
39 double m_MyZ_Local;
40 double m_MyZ_Distant;
41 CParse* m_Vowels;
42 public:
43 // construction/destruction.
45 explicit CWordCollection(CMiniLexicon* lexicon = 0);
46 ~CWordCollection();
48 // disable copy
49 private:
50 CWordCollection(const CWordCollection& x);
51 CWordCollection& operator=(const CWordCollection& x);
52 public:
53 CStem* operator<<(CStem*);
54 CStem* operator<<(CStem&);
55 CStem* operator<<(CStringSurrogate&);
56 CStem* operator<<(CParse*);
57 CStem* operator<<(QString);
58 void AddPointer(CStem*);
59 CStem* AddToCollection(CParse&);
60 CStem* AddToCollection(CStringSurrogate&);
62 void Empty();
63 void RemoveAll();
64 bool Remove(CStem*); ///< doesn't delete CStem*
65 bool RemoveMember(CStem*); ///< deletes CStem*
66 bool RemoveMember(CStringSurrogate&); ///< deletes CStem*
67 bool RemoveMember(CStringSurrogate&, bool);
68 void DeleteMarkedMembers();
70 // Accessors
71 void AssignSignatureFromStemsAffixPointer(enum eAffixLocation);
72 int HowManyAreAnalyzed(int& HowManyNotAnalyzed,
73 linguistica::ui::status_user_agent& status_display);
75 void OutputWords(QString filename, QMap<QString, QString>* filter);
77 void OutputWordsForTesting(QString);
78 CPhoneCollection* GetPhones();
79 CPhoneCollection* GetPhones_Tier2();
80 CPhoneCollection* GetPhones_Tier1_Skeleton();
81 // Mutators
82 void ReadWordFile(QString);
83 void PredecessorFreq1(CStemCollection*,
84 CPrefixCollection*, CSignatureCollection*,
85 enum eSuccessorFrequencyMode, int);
86 void SuccessorFreq1(CStemCollection*,
87 CSuffixCollection*, CSignatureCollection*,
88 enum eSuccessorFrequencyMode, int);
89 void FindAllWordNeighbors(CLexicon*);
90 //Phonology
91 void CountPhonesAndBiphones(enum eTier tier);
92 void DoPhonology();
93 void CreateCVTemplate();
95 void SplitPhonologyToTiers(enum CStem::ePhonologySplitType leave_slot);
96 void CreatePhonologyFromOrthography();
97 void ComputeProbabilitiesOfWords();
98 void GetPhonologyTierInfoForGraphOfWords();
99 double ComputeZeta();
100 void ComputeBoltzmannProbabilities();
102 double ComputeZ();
103 double ComputeZStar(); ///< Field method on tier 2
105 double GetPhonologicalContentTier1Bigrams()
106 { return m_PhonologicalContent_Bigram; }
107 double GetPhonologicalContentUnigrams()
108 { return m_PhonologicalContent_Unigram; }
109 double GetZ_Local() { return m_MyZ_Local; }
110 double GetZ_Distant() { return m_MyZ_Distant; }
112 double GetTier2_LocalMI_Score() { return m_Tier2_LocalMI_Score; }
113 double GetLocalMI_Plog() { return m_LocalMI_Plog; }
114 double GetTier2_DistantMI_Score() { return m_Tier2_DistantMI_Score; }
115 double GetDistantMI_Plog() { return m_DistantMI_Plog; }
118 #endif // WORDCOLLECTION_H