CMiniLexicon::FindMajorSignatures(): use log file routines
[linguistica.git] / DCNlearningsyl.cpp
blob00026f6bae468259ff4a6342d90c1d69a868978e
1 // Implementation of learningsyl methods
2 // Copyright © 2009 The University of Chicago
3 #include "DCNlearningsyl.h"
5 #include <cstdlib>
6 #include <ctime>
7 #include <fstream>
8 #include <QString>
10 #include "DCNgrammarsyl.h"
11 #include "DCNnetworksyl.h"
12 #include "DCNcorpussyl.h"
13 #include "DCNdcnword.h"
14 #include "ui/Status.h"
15 #include "random.h"
17 namespace {
18 using linguistica::ui::status_user_agent;
21 learningsyl::learningsyl()
22 : successful(false),
23 corpus(),
24 numberOfWords(0),
25 // helpers
26 grammar(),
27 net(),
28 possibleGrammar(),
29 possibleNet(),
30 rGrammar(),
31 // parameters
32 TfromUser(1.0),
33 increaseWhenWrong(0.02f),
34 decreaseWhenRight(0.95f),
35 numberOfTries(255),
36 cutoffFromUser(255),
37 startingAlpha(-0.3f),
38 startingBeta(-0.3f) { }
40 learningsyl::~learningsyl() { }
42 void learningsyl::runHelper(std::ostream &logstream, status_user_agent& status)
44 using std::srand;
45 using std::time;
46 using std::rand;
48 srand(static_cast<unsigned>(time(0)));
50 successful = false;
52 //careful, i'm specifying values
53 float T = TfromUser;
54 int cutoff = cutoffFromUser;
55 int corpusIndex = 0;
57 int maxCorpusIndex = numberOfWords;
59 // int syl = 10;
61 float alpha = 0;
62 float beta = 0;
64 grammar.setValues(startingAlpha,startingBeta);
65 net.setGrammar(&grammar);
67 status.progress.clear();
68 status.progress.set_denominator(cutoffFromUser);
69 int updateNumber = 0;
70 while (T > 0.01 && cutoff >= 0) {
71 status.progress = updateNumber;
72 logstream
73 << "T: " << T << "\t"
74 << "#: " << cutoffFromUser - cutoff << "\t"
75 << "a: " << grammar.getAlpha() << "\t"
76 << "b: " << grammar.getBeta() << "\t"
77 << "w: " << grammar.getSonority(QChar('w')) << "\t"
78 << "a: " << grammar.getSonority(QChar('a')) << "\t"
79 << "k: " << grammar.getSonority(QChar('k')) << "\t"
80 << "u: " << grammar.getSonority(QChar('u')) << "\n";
83 // 1) take a word from the corpus
84 dcnword word = corpus.wordAt(corpusIndex);
86 // 2) check all the letters -- if any are new, assign a random float
87 QString text = word.getText();
88 for (int i = 0; i < text.length(); i++)
90 if (!grammar.isInMap(text.at(i)))
92 const float randomNum =
93 static_cast<float>(rand()) /
94 static_cast<float>(RAND_MAX) / 2 +
95 float(0.5);
96 grammar.setSonority(text.at(i), randomNum);
100 // 3) make sonority vector
101 net.setWord(text);
103 // 4) run network with word & alpha & beta
104 net.equilibrium();
106 // 5) compare network & word.maxima
107 QString expMaxima = net.getMaxima(); // experimental maxima
108 QString targetMaxima = word.getMaxima();
110 // 6) make changes to letters associated with word
111 bool changes = false;
112 for (int j = 0; j < text.length(); j++)
114 QChar letter = text.at(j);
115 float s = grammar.getSonority(letter);
116 QChar expChar = expMaxima.at(j);
117 QChar targetChar = targetMaxima.at(j);
119 if (expChar == 'H' && targetChar != 'H')
121 // make sure there are no negative letters...
122 // I don't know if this is necessary or not...
123 // if (s > .1)
124 grammar.setSonority(letter, s - 0.1f);
125 changes = true;
127 else if (expChar != 'H' && targetChar == 'H')
129 grammar.setSonority(letter, s + 0.1f);
130 changes = true;
134 // 7) modify alpha & beta
135 if (changes) {
136 using linguistica::random_small_float;
137 // random float between -1 and 1
138 float deltaAlpha = T * random_small_float();
139 alpha = grammar.getAlpha() + deltaAlpha;
141 float deltaBeta = T * random_small_float();
142 beta = grammar.getBeta() + deltaBeta;
144 possibleGrammar.setValues(alpha, beta);
145 possibleNet.setWord(text);
146 possibleNet.setGrammar(&possibleGrammar);
147 possibleNet.equilibrium();
149 if (possibleNet.isConverged())
151 // alpha * beta > .3 may be another thing to put in here
153 if (alpha > 0)
154 alpha *= -1;
155 if (beta > 0)
156 beta *= -1;
158 grammar.setValues(alpha, beta);
160 T = T + increaseWhenWrong;
161 //T = T + sqrt( pow(deltaAlpha, 2.0) + pow(deltaBeta, 2.0)
162 // + pow(deltaI, 2.0) + pow(deltaF, 2.0) );
165 else
167 T = T * decreaseWhenRight;
169 //T = T - .01;
170 cutoff--; updateNumber++;
171 corpusIndex = (corpusIndex + 1)%maxCorpusIndex;
172 if (corpusIndex % 256 == 0)
173 logstream.flush();
175 status.progress.clear();
177 possibleGrammar.setValues(alpha, beta);
178 possibleNet.setGrammar(&possibleGrammar);
179 possibleNet.equilibrium();
180 if (cutoff > 2 && possibleNet.isConverged())
181 successful = true;
184 void learningsyl::run(status_user_agent& status_display)
186 using std::time_t;
187 using std::ctime;
188 using std::time;
190 grammar.clearMap();
191 std::ofstream logstream("DCNsyllog.txt"); // declare and open file stream
192 time_t rawtime;
194 time ( &rawtime );
195 logstream << "Learning Algorithm run at " << ctime(&rawtime);
197 for (int i = 0; i < numberOfTries; i++)
199 logstream << "\n\tTRIAL NUMBER " << i+1 << "\n";
200 runHelper(logstream, status_display);
201 if (this->isSuccessful()) break;
203 logstream.close();
205 rGrammar = new grammarsyl(grammar); // can do this because of the magic of copying QMaps
209 void learningsyl::setCorpus(corpussyl corpus)
211 this->corpus = corpus;
212 this->numberOfWords = corpus.numberOfWords();