tagging release
[dasher.git] / trunk / Src / DasherCore / AlphabetManager.cpp
blob28051ff1d4677f1e7ebd53a8a030cb617457822b
1 // AlphabetManager.cpp
2 //
3 // Copyright (c) 2007 The Dasher Team
4 //
5 // This file is part of Dasher.
6 //
7 // Dasher is free software; you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation; either version 2 of the License, or
10 // (at your option) any later version.
12 // Dasher is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
17 // You should have received a copy of the GNU General Public License
18 // along with Dasher; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "../Common/Common.h"
23 #include "AlphabetManager.h"
24 #include "DasherInterfaceBase.h"
25 #include "DasherNode.h"
26 #include "Event.h"
27 #include "EventHandler.h"
28 #include "NodeCreationManager.h"
30 #include <vector>
31 #include <sstream>
32 #include <iostream>
34 using namespace Dasher;
36 // Track memory leaks on Windows to the line that new'd the memory
37 #ifdef _WIN32
38 #ifdef _DEBUG_MEMLEAKS
39 #define DEBUG_NEW new( _NORMAL_BLOCK, THIS_FILE, __LINE__ )
40 #define new DEBUG_NEW
41 #undef THIS_FILE
42 static char THIS_FILE[] = __FILE__;
43 #endif
44 #endif
46 CAlphabetManager::CAlphabetManager(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, CLanguageModel *pLanguageModel, CLanguageModel::Context iLearnContext, bool bGameMode, const std::string &strGameModeText )
47 : CNodeManager(0), m_pLanguageModel(pLanguageModel), m_pNCManager(pNCManager) {
49 m_pInterface = pInterface;
51 m_iLearnContext = iLearnContext;
52 m_bGameMode = bGameMode;
53 m_strGameString = strGameModeText;
56 CDasherNode *CAlphabetManager::GetRoot(CDasherNode *pParent, int iLower, int iUpper, void *pUserData) {
57 CDasherNode *pNewNode;
59 // TODO: iOffset has gotten a bit hacky here
61 int iSymbol;
62 int iOffset;
63 int iColour;
65 std::string strContext;
67 CLanguageModel::Context iContext;
69 if(pUserData && static_cast<SRootData *>(pUserData)->szContext) {
70 std::string strRootText(static_cast<SRootData *>(pUserData)->szContext);
72 int iMaxContextLength = m_pLanguageModel->GetContextLength() + 1;
74 // TODO: No need to explicitly pass context
75 // TODO: Utility function for looking up symbolic context
77 int iStart = static_cast<SRootData *>(pUserData)->iOffset - iMaxContextLength;
78 if(iStart < 0)
79 iStart = 0;
81 strContext = m_pInterface->GetContext(iStart, static_cast<SRootData *>(pUserData)->iOffset - iStart);
82 BuildContext(strContext, false, iContext, iSymbol);
84 iOffset = static_cast<SRootData *>(pUserData)->iOffset - 1;
85 iColour = m_pNCManager->GetColour(iSymbol);
87 else {
88 // Create a root node
90 if(pUserData)
91 iOffset = static_cast<SRootData *>(pUserData)->iOffset;
92 else
93 iOffset = -1;
94 iColour = 7;
96 strContext = m_pNCManager->GetAlphabet()->GetDefaultContext();
97 BuildContext(strContext, true, iContext, iSymbol);
100 // FIXME - Make this a CDasherComponent
102 // Stuff which could in principle be done in the symbol node creation routine
103 CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
104 pDisplayInfo->iColour = iColour;
105 pDisplayInfo->bShove = true;
106 pDisplayInfo->bVisible = true;
107 pDisplayInfo->strDisplayText = m_pNCManager->GetAlphabet()->GetDisplayText(iSymbol);
109 pNewNode = new CDasherNode(pParent, iLower, iUpper, pDisplayInfo);
111 pNewNode->m_pNodeManager = this;
113 SAlphabetData *pNodeUserData = new SAlphabetData;
114 pNewNode->m_pUserData = pNodeUserData;
116 pNodeUserData->iOffset = iOffset;
117 pNodeUserData->iPhase = 0;
118 pNodeUserData->iSymbol = iSymbol;
120 pNodeUserData->iContext = iContext;
125 pNodeUserData->pLanguageModel = m_pLanguageModel;
127 pNewNode->SetFlag(NF_SEEN, true);
131 if(m_bGameMode) {
132 pNodeUserData->iGameOffset = -1;
133 pNewNode->SetFlag(NF_GAME, true);
136 return pNewNode;
139 void CAlphabetManager::PopulateChildren( CDasherNode *pNode ) {
140 PopulateChildrenWithSymbol( pNode, -2, 0 );
143 CDasherNode *CAlphabetManager::CreateGroupNode(CDasherNode *pParent, SGroupInfo *pInfo, std::vector<unsigned int> *pCProb, unsigned int iStart, unsigned int iEnd, unsigned int iMin, unsigned int iMax) {
145 #ifdef WIN32
146 unsigned int iLbnd = (((*pCProb)[iStart-1] - (*pCProb)[iMin-1]) *
147 (unsigned __int64)(m_pNCManager->GetLongParameter(LP_NORMALIZATION))) /
148 ((*pCProb)[iMax-1] - (*pCProb)[iMin-1]);
149 unsigned int iHbnd = (((*pCProb)[iEnd-1] - (*pCProb)[iMin-1]) *
150 (unsigned __int64)(m_pNCManager->GetLongParameter(LP_NORMALIZATION))) /
151 ((*pCProb)[iMax-1] - (*pCProb)[iMin-1]);
152 #else
153 unsigned int iLbnd = (((*pCProb)[iStart-1] - (*pCProb)[iMin-1]) *
154 (unsigned long long int)(m_pNCManager->GetLongParameter(LP_NORMALIZATION))) /
155 ((*pCProb)[iMax-1] - (*pCProb)[iMin-1]);
156 unsigned int iHbnd = (((*pCProb)[iEnd-1] - (*pCProb)[iMin-1]) *
157 (unsigned long long int)(m_pNCManager->GetLongParameter(LP_NORMALIZATION))) /
158 ((*pCProb)[iMax-1] - (*pCProb)[iMin-1]);
159 #endif
161 SAlphabetData *pParentData = static_cast<SAlphabetData *>(pParent->m_pUserData);
163 // TODO: More sensible structure in group data to map directly to this
164 CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
165 pDisplayInfo->iColour = pInfo->iColour;
166 pDisplayInfo->bShove = true;
167 pDisplayInfo->bVisible = pInfo->bVisible;
168 pDisplayInfo->strDisplayText = pInfo->strLabel;
170 CDasherNode *pNewNode = new CDasherNode(pParent, iLbnd, iHbnd, pDisplayInfo);
172 pNewNode->m_pNodeManager = this;
173 pNewNode->SetFlag(NF_SUBNODE, true);
175 SAlphabetData *pNodeUserData = new SAlphabetData;
176 pNewNode->m_pUserData = pNodeUserData;
178 pNodeUserData->iOffset = pParentData->iOffset;
179 pNodeUserData->iPhase = pParentData->iPhase;
180 pNodeUserData->iSymbol = 0; // TODO: Sort out symbol for groups
181 pNodeUserData->pLanguageModel = pParentData->pLanguageModel;
182 pNodeUserData->iContext = pParentData->pLanguageModel->CloneContext(pParentData->iContext);
184 return pNewNode;
187 // TODO: use these functions elsewhere in the file
188 CDasherNode *CAlphabetManager::CreateSymbolNode(CDasherNode *pParent, symbol iSymbol, std::vector<unsigned int> *pCProb, unsigned int iStart, unsigned int iEnd, unsigned int iMin, unsigned int iMax, symbol iExistingSymbol, CDasherNode *pExistingChild) {
189 // TODO: Node deletion etc.
191 #ifdef WIN32
192 unsigned int iLbnd = (((*pCProb)[iStart-1] - (*pCProb)[iMin-1]) *
193 (unsigned __int64)(m_pNCManager->GetLongParameter(LP_NORMALIZATION))) /
194 ((*pCProb)[iMax-1] - (*pCProb)[iMin-1]);
195 unsigned int iHbnd = (((*pCProb)[iEnd-1] - (*pCProb)[iMin-1]) *
196 (unsigned __int64)(m_pNCManager->GetLongParameter(LP_NORMALIZATION))) /
197 ((*pCProb)[iMax-1] - (*pCProb)[iMin-1]);
198 #else
199 unsigned int iLbnd = (((*pCProb)[iStart-1] - (*pCProb)[iMin-1]) *
200 (unsigned long long int)(m_pNCManager->GetLongParameter(LP_NORMALIZATION))) /
201 ((*pCProb)[iMax-1] - (*pCProb)[iMin-1]);
202 unsigned int iHbnd = (((*pCProb)[iEnd-1] - (*pCProb)[iMin-1]) *
203 (unsigned long long int)(m_pNCManager->GetLongParameter(LP_NORMALIZATION))) /
204 ((*pCProb)[iMax-1] - (*pCProb)[iMin-1]);
205 #endif
207 SAlphabetData *pParentData = static_cast<SAlphabetData *>(pParent->m_pUserData);
208 CDasherNode *pNewNode = NULL;
210 // TODO: Better way of specifying alternate roots
211 // TODO: Building with existing node
212 if(iSymbol == iExistingSymbol) {
213 pNewNode = pExistingChild;
214 pNewNode->SetRange(iLbnd, iHbnd);
215 pNewNode->SetParent(pParent);
217 DASHER_ASSERT(static_cast<SAlphabetData *>(pExistingChild->m_pUserData)->iOffset == pParentData->iOffset + 1);
220 else if(iSymbol == m_pNCManager->GetControlSymbol()) {
221 pNewNode = m_pNCManager->GetRoot(1, pParent, iLbnd, iHbnd, &(pParentData->iOffset));
223 else if(iSymbol == m_pNCManager->GetStartConversionSymbol()) {
224 // else if(iSymbol == m_pNCManager->GetSpaceSymbol()) {
226 // TODO: Need to consider the case where there is no compile-time support for this
227 pNewNode = m_pNCManager->GetRoot(2, pParent, iLbnd, iHbnd, &(pParentData->iOffset));
229 else {
230 int iPhase = (pParentData->iPhase + 1) % 2;
231 int iColour = m_pNCManager->GetColour(iSymbol);
233 // This is for backwards compatibility with old alphabet files -
234 // ideally make this log a warning (unrelated TODO: automate
235 // validation of alphabet files, plus maintenance of repository
236 // etc.)
237 if(iColour == -1) {
238 if(iSymbol == m_pNCManager->GetSpaceSymbol()) {
239 iColour = 9;
241 else {
242 iColour = (iSymbol % 3) + 10;
246 // Loop on low colours for nodes (TODO: go back to colour namespaces?)
247 if(iPhase == 1 && iColour < 130)
248 iColour += 130;
250 // TODO: Exceptions / error handling in general
252 CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
253 pDisplayInfo->iColour = iColour;
254 pDisplayInfo->bShove = true;
255 pDisplayInfo->bVisible = true;
256 pDisplayInfo->strDisplayText = m_pNCManager->GetAlphabet()->GetDisplayText(iSymbol);
258 pNewNode = new CDasherNode(pParent, iLbnd, iHbnd, pDisplayInfo);
261 // std::stringstream ssLabel;
263 // ssLabel << m_pNCManager->GetAlphabet()->GetDisplayText(iSymbol) << ": " << pNewNode;
265 // pDisplayInfo->strDisplayText = ssLabel.str();
268 pNewNode->m_pNodeManager = this;
270 SAlphabetData *pNodeUserData = new SAlphabetData;
271 pNewNode->m_pUserData = pNodeUserData;
273 pNodeUserData->iOffset = pParentData->iOffset + 1;
274 pNodeUserData->iPhase = iPhase;
275 pNodeUserData->iSymbol = iSymbol;
277 CLanguageModel::Context iContext;
278 iContext = m_pLanguageModel->CloneContext(pParentData->iContext);
279 m_pLanguageModel->EnterSymbol(iContext, iSymbol); // TODO: Don't use symbols?
280 pNodeUserData->iContext = iContext;
282 pNodeUserData->pLanguageModel = pParentData->pLanguageModel; // TODO: inconsistent with above?
285 return pNewNode;
288 void CAlphabetManager::RecursiveIterateGroup(CDasherNode *pParent, SGroupInfo *pInfo, std::vector<symbol> *pSymbols, std::vector<unsigned int> *pCProb, int iMin, int iMax, symbol iExistingSymbol, CDasherNode *pExistingChild) {
289 // TODO: Think through alphabet file formats etc. to make this class easier.
290 // TODO: Throw a warning if parent node already has children
292 CDasherNode **pChildNodes = new CDasherNode *[iMax - iMin];
294 for(int i(iMin); i < iMax; ++i) {
295 pChildNodes[i - iMin] = NULL;
298 // Create child nodes and cache them for later
299 SGroupInfo *pCurrentNode(pInfo);
300 while(pCurrentNode) {
301 CDasherNode *pNewChild = CreateGroupNode(pParent, pCurrentNode, pCProb, pCurrentNode->iStart, pCurrentNode->iEnd, iMin, iMax);
302 RecursiveIterateGroup(pNewChild, pCurrentNode->pChild, pSymbols, pCProb, pCurrentNode->iStart, pCurrentNode->iEnd, iExistingSymbol, pExistingChild);
304 for(int i(pCurrentNode->iStart); i < pCurrentNode->iEnd; ++i) {
305 pChildNodes[i - iMin] = pNewChild;
308 pCurrentNode = pCurrentNode->pNext;
311 CDasherNode *pLastChild = NULL;
313 // Now actually populate the children
314 for(int i(iMin); i < iMax; ++i) {
315 if(!pChildNodes[i-iMin]) {
316 CDasherNode *pNewChild = CreateSymbolNode(pParent, (*pSymbols)[i], pCProb, i, i+1, iMin, iMax, iExistingSymbol, pExistingChild);
317 pParent->Children().push_back(pNewChild);
318 pLastChild = pNewChild;
320 else if (pChildNodes[i-iMin] != pLastChild) {
321 pParent->Children().push_back(pChildNodes[i-iMin]);
322 pLastChild = pChildNodes[i-iMin];
326 // std::cout << pParent << std::endl;
328 pParent->SetFlag(NF_ALLCHILDREN, true);
330 delete[] pChildNodes;
333 void CAlphabetManager::PopulateChildrenWithSymbol( CDasherNode *pNode, int iExistingSymbol, CDasherNode *pExistingChild ) {
334 SAlphabetData *pParentUserData(static_cast<SAlphabetData *>(pNode->m_pUserData));
336 // TODO: generally improve with iterators etc.
337 // FIXME: this has to change for history stuff and Japanese dasher
338 std::vector < symbol > newchars; // place to put this list of characters
339 std::vector < unsigned int >cum; // for the probability list
341 // TODO: Need to fix up relation to language model here (use one from node, not global).
342 m_pNCManager->GetProbs(pParentUserData->iContext, newchars, cum, m_pNCManager->GetLongParameter(LP_NORMALIZATION));
343 int iChildCount = newchars.size();
345 // work out cumulative probs in place
346 for(int i = 1; i < iChildCount; i++) {
347 cum[i] += cum[i - 1];
350 RecursiveIterateGroup(pNode, m_pNCManager->GetAlphabet()->m_pBaseGroup, &newchars, &cum, 1, iChildCount, iExistingSymbol, pExistingChild);
353 void CAlphabetManager::ClearNode( CDasherNode *pNode ) {
354 SAlphabetData *pUserData(static_cast<SAlphabetData *>(pNode->m_pUserData));
356 pUserData->pLanguageModel->ReleaseContext(pUserData->iContext);
357 delete pUserData;
360 void CAlphabetManager::Output( CDasherNode *pNode, Dasher::VECTOR_SYMBOL_PROB* pAdded, int iNormalization) {
361 symbol t = static_cast<SAlphabetData *>(pNode->m_pUserData)->iSymbol;
363 // std::cout << pNode << " " << pNode->Parent() << ": Output at offset " << static_cast<SAlphabetData *>(pNode->m_pUserData)->iOffset << " *" << m_pNCManager->GetAlphabet()->GetText(t) << "* " << std::endl;
365 if(t) { // Ignore symbol 0 (root node)
366 Dasher::CEditEvent oEvent(1, m_pNCManager->GetAlphabet()->GetText(t), static_cast<SAlphabetData *>(pNode->m_pUserData)->iOffset);
367 m_pNCManager->InsertEvent(&oEvent);
369 // Track this symbol and its probability for logging purposes
370 if (pAdded != NULL) {
371 Dasher::SymbolProb sItem;
372 sItem.sym = t;
373 sItem.prob = pNode->GetProb(iNormalization);
375 pAdded->push_back(sItem);
380 void CAlphabetManager::Undo( CDasherNode *pNode ) {
381 symbol t = static_cast<SAlphabetData *>(pNode->m_pUserData)->iSymbol;
383 // std::cout << pNode << " " << pNode->Parent() << ": Undo at offset " << static_cast<SAlphabetData *>(pNode->m_pUserData)->iOffset << " *" << m_pNCManager->GetAlphabet()->GetText(t) << "* " << std::endl;
385 if(t) { // Ignore symbol 0 (root node)
386 Dasher::CEditEvent oEvent(2, m_pNCManager->GetAlphabet()->GetText(t), static_cast<SAlphabetData *>(pNode->m_pUserData)->iOffset);
387 m_pNCManager->InsertEvent(&oEvent);
391 // TODO: Sort out node deletion etc.
392 CDasherNode *CAlphabetManager::RebuildParent(CDasherNode *pNode, int iGeneration) {
394 // TODO: iGeneration obsolete?
396 int iOffset(static_cast<SAlphabetData *>(pNode->m_pUserData)->iOffset);
397 int iNewOffset = iOffset - 1;
399 CDasherNode *pNewNode;
401 int iOldPhase(static_cast<SAlphabetData *>(pNode->m_pUserData)->iPhase);
402 int iNewPhase;
404 symbol iNewSymbol;
406 std::string strContext;
407 CLanguageModel::Context iContext;
409 if(iOffset == -1) {
410 // pNode is already a root
411 return NULL;
413 else if(iOffset == 0) {
414 // TODO: Creating a root node, Shouldn't be a special case
415 iNewPhase = 0;
416 iNewSymbol = 0;
417 strContext = m_pNCManager->GetAlphabet()->GetDefaultContext();
418 BuildContext(strContext, true, iContext, iNewSymbol);
420 CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
421 pDisplayInfo->iColour = 7; // TODO: Hard coded value
422 pDisplayInfo->bShove = true;
423 pDisplayInfo->bVisible = true;
424 pDisplayInfo->strDisplayText = "";
426 pNewNode = new CDasherNode(NULL, 0, 0, pDisplayInfo);
428 else {
429 int iMaxContextLength = m_pLanguageModel->GetContextLength() + 1;
431 int iStart = iOffset - iMaxContextLength;
432 if(iStart < 0)
433 iStart = 0;
435 strContext = m_pInterface->GetContext(iStart, iOffset - iStart);
436 BuildContext(strContext, false, iContext, iNewSymbol);
438 iNewPhase = ((iOldPhase + 2 - 1) % 2);
440 int iColour(m_pNCManager->GetColour(iNewSymbol));
442 // Loop colours if necessary for the colour scheme
443 if(iNewPhase == 1)
444 iColour += 130;
446 CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
447 pDisplayInfo->iColour = iColour;
448 pDisplayInfo->bShove = true;
449 pDisplayInfo->bVisible = true;
450 pDisplayInfo->strDisplayText = m_pNCManager->GetAlphabet()->GetDisplayText(iNewSymbol);
452 // TODO: Node creation outside of if statement
453 pNewNode = new CDasherNode(NULL, 0, 0, pDisplayInfo);
456 // TODO: Some of this context stuff could be consolidated
458 pNewNode->m_pNodeManager = this;
459 pNewNode->SetFlag(NF_SEEN, true);
461 SAlphabetData *pNodeUserData = new SAlphabetData;
462 pNewNode->m_pUserData = pNodeUserData;
464 pNodeUserData->iOffset = iNewOffset;
465 pNodeUserData->iPhase = iNewPhase;
466 pNodeUserData->iSymbol = iNewSymbol;
467 pNodeUserData->pLanguageModel = m_pLanguageModel;
468 pNodeUserData->iContext = iContext;
470 PopulateChildrenWithSymbol(pNewNode, static_cast<SAlphabetData *>(pNode->m_pUserData)->iSymbol, pNode);
472 return pNewNode;
475 void CAlphabetManager::SetFlag(CDasherNode *pNode, int iFlag, bool bValue) {
476 switch(iFlag) {
477 case NF_COMMITTED:
478 if(bValue)
479 // TODO: Reimplement (need a learning context, check whether symbol actually corresponds to character)
480 static_cast<SAlphabetData *>(pNode->m_pUserData)->pLanguageModel->LearnSymbol(m_iLearnContext, static_cast<SAlphabetData *>(pNode->m_pUserData)->iSymbol);
481 break;
485 void CAlphabetManager::BuildContext(std::string strContext, bool bRoot, CLanguageModel::Context &oContext, symbol &iSymbol) {
486 // Hopefully this will obsolete any need to handle contexts outside
487 // of the alphabet manager - check this and remove resulting
488 // redundant code
490 std::vector<symbol> vContextSymbols;
491 m_pNCManager->GetAlphabet()->GetSymbols(&vContextSymbols, &strContext, false);
493 oContext = m_pLanguageModel->CreateEmptyContext();
495 for(std::vector<symbol>::iterator it(vContextSymbols.begin()); it != vContextSymbols.end(); ++it)
496 m_pLanguageModel->EnterSymbol(oContext, *it);
498 if((vContextSymbols.size() == 0) || bRoot)
499 iSymbol = 0;
500 else
501 iSymbol = vContextSymbols[vContextSymbols.size() - 1];