tagging release
[dasher.git] / trunk / Src / DasherCore / ConversionManager.cpp
blobb8d136762fdda9be691a4da0396718ba9b3d415c
1 // ConversionManager.cpp
2 //
3 // Copyright (c) 2007 The Dasher Team
4 //
5 // This file is part of Dasher.
6 //
7 // Dasher is free software; you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation; either version 2 of the License, or
10 // (at your option) any later version.
12 // Dasher is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
17 // You should have received a copy of the GNU General Public License
18 // along with Dasher; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef WIN32
22 #include "config.h"
23 #endif
25 #include "ConversionManager.h"
26 #include "Event.h"
27 #include "EventHandler.h"
28 #include "NodeCreationManager.h"
30 #include <iostream>
31 #include <string>
32 #include <vector>
33 #include <stdlib.h>
35 using namespace Dasher;
37 CConversionManager::CConversionManager(CNodeCreationManager *pNCManager, CConversionHelper *pHelper, CAlphabet *pAlphabet, int CMid)
38 : CNodeManager(2) {
40 m_pNCManager = pNCManager;
41 m_pHelper = pHelper;
42 m_pAlphabet = pAlphabet;
43 m_pRoot = NULL;
45 //DOESN'T SEEM INTRINSIC
46 //and check why pHelper may be empty
47 if(pHelper)
48 m_pLanguageModel = pHelper->GetLanguageModel();
49 else
50 m_pLanguageModel = NULL;
52 if(m_pLanguageModel)
53 m_iLearnContext = m_pLanguageModel->CreateEmptyContext();
55 m_iRefCount = 1;
56 m_iCMID = CMid;
57 // m_iHZCount = 0;
59 m_bTreeBuilt = false;
62 CConversionManager::~CConversionManager(){
64 // for (int i(0);i<m_iHZCount; i++)
65 // std::cout << "Unref: " << this << std::endl;
67 if(m_pRoot)
68 RecursiveDelTree(m_pRoot[0]);
72 CDasherNode *CConversionManager::GetRoot(CDasherNode *pParent, int iLower, int iUpper, void *pUserData) {
73 CDasherNode *pNewNode;
75 int iOffset = *(static_cast<int *>(pUserData));
77 // TODO: Parameters here are placeholders - need to figure out what's right
79 CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
80 pDisplayInfo->iColour = 9; // TODO: Hard coded value
81 pDisplayInfo->bShove = true;
82 pDisplayInfo->bVisible = true;
83 pDisplayInfo->strDisplayText = ">"; // TODO: Hard coded value, needs i18n
85 pNewNode = new CDasherNode(pParent, iLower, iUpper, pDisplayInfo);
87 // FIXME - handle context properly
88 // TODO: Reimplemnt -----
89 // pNewNode->SetContext(m_pLanguageModel->CreateEmptyContext());
90 // -----
92 pNewNode->m_pNodeManager = this;
93 pNewNode->m_pNodeManager->Ref();
96 SConversionData *pNodeUserData = new SConversionData;
97 pNewNode->m_pUserData = pNodeUserData;
98 pNodeUserData->bType = false;
99 pNodeUserData->iOffset = iOffset + 1;
101 if(m_pHelper)
102 pNodeUserData->pLanguageModel = m_pHelper->GetLanguageModel();
103 else
104 pNodeUserData->pLanguageModel = NULL;
106 // // std::cout<<m_pLanguageModel<<"lala"<<std::endl;
107 if(m_pLanguageModel) {
108 CLanguageModel::Context iContext;
109 iContext = m_pLanguageModel->CreateEmptyContext();
110 pNodeUserData->iContext = iContext;
113 pNodeUserData->pSCENode = 0;
115 return pNewNode;
118 // TODO: This function needs to be significantly tidied up
119 // TODO: get rid of pSizes
121 void CConversionManager::AssignChildSizes(SCENode **pNode, CLanguageModel::Context context, int iNChildren) {
122 DASHER_ASSERT(m_pHelper);
124 // Calculate sizes for the children. Note that normalisation is
125 // done additiviely rather than multiplicatively, so it's not
126 // quite what was originally planned (but I don't think this is
127 // much of a problem). More serious is the fact that the ordering
128 // is being lost when the tree is created, as nodes begininning
129 // with the same character are merged. This needs to be though
130 // out, but the probabilities should probably be done at the time
131 // of construction of the candidate tree rather than the Dasher
132 // tree (aside - is there any real point having two separate trees
133 // - surely we should just create Dasher nodes right away?).
135 // The algorithm should also allow for the possibility of the
136 // conversion engine returning probabilities itself, which should
137 // be used in preference to the values infered from the ordering
139 // Finally, maybe the choices should be presented in lexographic
140 // order, rather than in order returned (really not sure about
141 // this - it needs to be thought through).
144 // std::cout << "b" << std::endl;
146 //TESTING FOR CALCULATESCORE STAGE 1
147 //int test;
148 //test = CalculateScore(pNode, 1);
149 //std::cout<<"current character"<<pCurrentSCENode->pszConversion<<std::endl;
150 //std::cout<<"the score for the second candidate is"<<test<<std::endl;
154 //ASSIGNING SCORES AND CALCULATING NODE SIZE
155 //Ph: feel free to edit this part to make it more structured
156 // int iSize[pCurrentSCEChild->IsHeadAndCandNum];
157 // int score[pCurrentSCEChild->IsHeadAndCandNum];
158 // int total =0;
159 // int max = 0;
160 // int CandNum = pCurrentSCEChild -> IsHeadAndCandNum;
162 // CHANGE int iRemaining(m_pNCManager->GetLongParameter(LP_NORMALIZATION));
164 // Thoughts on the general idea here - this is very close to being
165 // a fully fledged language model, so I think we should go with
166 // that idea, but maybe we need something mode flexible. I'd
167 // imagine:
169 // 1. Probabilities provided directly with translation? Maybe hard
170 // to represent in the lattice itself.
172 // 2. Full n-gram language model provided - in general assign
173 // probabilities to paths through the lattice
175 // 3. Ordered results, but no probabilities - using a power law
176 // rule or the like.
178 // Tempted to assume (1) and (2) can be implemented together, with
179 // a second call to the library at node creation time, and (3) can
180 // be implemented as a fallback if that doesn't work.
182 // Things to be thought out:
183 // - How to deal with contexts - backtrace at time of call or stored in node?
184 // - Sharing of language model infrastructure?
188 // Lookup scores for each of the children
190 // TODO: Reimplement -----
192 // for(int i(0); i < pCurrentSCEChild->IsHeadAndCandNum; ++i){
193 // score[i] = CalculateScore(pNode, i);
194 // total += score[i];
195 // if(i!=0)
196 // if (score[i]>score[i-1])
197 // max = score[i];
198 // }
200 // -----
202 // Use the scores to calculate the size of the nodes
205 iNChildren = 0;
206 SCENode *pChild(*pNode);
208 while(pChild) {
209 pChild = pChild->GetNext();
210 ++iNChildren;
214 m_pHelper->AssignSizes(pNode, context, m_pNCManager->GetLongParameter(LP_NORMALIZATION), m_pNCManager->GetLongParameter(LP_UNIFORM), iNChildren);
217 // for(int i(0); i < iNChildren; ++i) {
219 //TESTING FOR RESIZING FREQUENT HZ CHARACTERS
220 //if(i<5)
221 // if(score[i]<max-5){
222 // std::cout<<"first scores are"<<score[i]<<std::endl;
223 // score[i]=max-5;
224 // }
226 // TODO: Reimplement new model -----
228 // if(CandNum == 1)
229 // iSize[i] = m_pNCManager->GetLongParameter(LP_NORMALIZATION);
230 // else
231 // iSize[i] = m_pNCManager->GetLongParameter(LP_NORMALIZATION)*((CandNum-i-1)+2*CandNum*score[i])/(CandNum*(CandNum-1)/2+2*CandNum*total);
233 // ----
235 //PREVIOUS MODEL: m_pNCManager->GetLongParameter(LP_NORMALIZATION)/((i + 1) * (i + 2));
239 SCENode * pIt;
241 pIt=pNode;
243 uint freq[iNChildren];
244 for(int i(0); i<iNChildren; i++)
245 freq[i] = 0;
246 uint totalFreq=0;
247 uint maxFreq=0;
250 while(pIt){
251 freq[pIt->CandIndex]=pIt->HZFreq;
252 totalFreq+=freq[pIt->CandIndex];
253 if(pIt->HZFreq>maxFreq)
254 maxFreq=pIt->HZFreq;
255 pIt = pIt->pNext;
258 pSizes[i] = m_pNCManager->GetLongParameter(LP_NORMALIZATION)*(100+5*freq[i])/(100*iNChildren+5*totalFreq);
260 //((i + 1) * (i + 2));
264 if(pSizes[i] < 1)
265 pSizes[i] = 1;
267 iRemaining -= pSizes[i];
270 // Distribute the remaining space evenly
272 int iLeft(iNChildren);
274 for(int i(0); i < iNChildren; ++i) {
275 int iDiff(iRemaining / iLeft);
277 pSizes[i] += iDiff;
279 iRemaining -= iDiff;
280 --iLeft;
286 void CConversionManager::PopulateChildren( CDasherNode *pNode ) {
287 DASHER_ASSERT(m_pNCManager);
289 SConversionData * pCurrentDataNode (static_cast<SConversionData *>(pNode->m_pUserData));
290 CDasherNode *pNewNode;
292 // If no helper class is present then just drop straight back to an
293 // alphabet root. This should only happen in error cases, and the
294 // user should have been warned here.
296 if(!m_pHelper) {
297 int iLbnd(0);
298 int iHbnd(m_pNCManager->GetLongParameter(LP_NORMALIZATION));
300 CAlphabetManager::SRootData oRootData;
301 oRootData.szContext = NULL;
302 oRootData.iOffset = pCurrentDataNode->iOffset + 1;
304 pNewNode = m_pNCManager->GetRoot(0, pNode, iLbnd, iHbnd, &oRootData);
305 pNewNode->SetFlag(NF_SEEN, false);
307 pNode->Children().push_back(pNewNode);
309 return;
312 // Do the conversion and build the tree (lattice) if it hasn't been
313 // done already.
315 if(!m_bTreeBuilt) {
316 BuildTree(pNode);
317 m_bTreeBuilt = true;
320 SCENode *pCurrentSCEChild;
322 if(pCurrentDataNode->pSCENode)
323 pCurrentSCEChild = pCurrentDataNode->pSCENode->GetChild();
324 else {
325 if(m_pRoot && !pCurrentDataNode->bType)
326 pCurrentSCEChild = m_pRoot[0];
327 else
328 pCurrentSCEChild = 0;
331 if(pCurrentSCEChild) {
333 // TODO: Reimplement (in subclass) -----
335 // if(m_iHZCount>1)
336 // if(!m_bPhrasesProcessed[pCurrentSCEChild->AcCharCount-1])
337 // if(pCurrentSCEChild->AcCharCount<m_iHZCount)
338 // ProcessPhrase(pCurrentSCEChild->AcCharCount-1);
340 // -----
342 //int *iSize;
344 // iSize = new int[pCurrentSCEChild->IsHeadAndCandNum];
349 AssignChildSizes(&pCurrentSCEChild, pCurrentDataNode->iContext, pCurrentSCEChild->IsHeadAndCandNum);
351 int iIdx(0);
352 int iCum(0);
354 // int parentClr = pNode->Colour();
355 // TODO: Fixme
356 int parentClr = 0;
358 // Finally loop through and create the children
360 do {
361 int iLbnd(iCum);
362 int iHbnd(iCum + pCurrentSCEChild->NodeSize);
364 iCum = iHbnd;
366 // TODO: Parameters here are placeholders - need to figure out
367 // what's right
370 CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
371 pDisplayInfo->iColour = m_pHelper->AssignColour(parentClr, pCurrentSCEChild, iIdx);
372 pDisplayInfo->bShove = true;
373 pDisplayInfo->bVisible = true;
375 // std::cout << "#" << pCurrentSCEChild->pszConversion << "#" << std::endl;
377 pDisplayInfo->strDisplayText = pCurrentSCEChild->pszConversion;
379 pNewNode = new CDasherNode(pNode, iLbnd, iHbnd, pDisplayInfo);
381 // TODO: Reimplement ----
383 // FIXME - handle context properly
384 // pNewNode->SetContext(m_pLanguageModel->CreateEmptyContext());
385 // -----
387 pNewNode->m_pNodeManager = this;
388 pNewNode->m_pNodeManager->Ref();
390 SConversionData *pNodeUserData = new SConversionData;
391 pNodeUserData->bType = false;
392 pNodeUserData->pSCENode = pCurrentSCEChild;
393 pNodeUserData->pLanguageModel = pCurrentDataNode->pLanguageModel;
394 pNodeUserData->iOffset = pCurrentDataNode->iOffset + 1;
396 if(pCurrentDataNode->pLanguageModel) {
397 CLanguageModel::Context iContext;
398 iContext = pCurrentDataNode->pLanguageModel->CloneContext(pCurrentDataNode->iContext);
400 if(pCurrentSCEChild ->Symbol !=-1)
401 pNodeUserData->pLanguageModel->EnterSymbol(iContext, pCurrentSCEChild->Symbol); // TODO: Don't use symbols?
404 pNodeUserData->iContext = iContext;
407 pNewNode->m_pUserData = pNodeUserData;
408 // SAlphabetData *pNodeUserData = new SAlphabetData;
410 //pNewNode->m_pUserData = pNodeUserData;
412 //pNodeUserData->iPhase = iNewPhase;
413 //pNodeUserData->iSymbol = iIdx;
415 pNode->Children().push_back(pNewNode);
417 pCurrentSCEChild = pCurrentSCEChild->GetNext();
418 ++iIdx;
419 }while(pCurrentSCEChild);
422 // delete[] iSize;
425 else {
426 if((static_cast<SConversionData *>(pNode->m_pUserData))->bType) {
427 // TODO: Placeholder algorithm here
428 // TODO: Add an 'end of conversion' node?
429 int iLbnd(0);
430 int iHbnd(m_pNCManager->GetLongParameter(LP_NORMALIZATION));
432 CAlphabetManager::SRootData oRootData;
433 oRootData.szContext = NULL;
434 oRootData.iOffset = pCurrentDataNode->iOffset;
436 pNewNode = m_pNCManager->GetRoot(0, pNode, iLbnd, iHbnd, &oRootData);
437 pNewNode->SetFlag(NF_SEEN, false);
439 pNode->Children().push_back(pNewNode);
440 // pNode->SetHasAllChildren(false);
442 else {
443 int iLbnd(0);
444 int iHbnd(m_pNCManager->GetLongParameter(LP_NORMALIZATION));
446 CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
447 pDisplayInfo->iColour = m_pHelper->AssignColour(0, pCurrentSCEChild, 0);
448 pDisplayInfo->bShove = true;
449 pDisplayInfo->bVisible = true;
450 pDisplayInfo->strDisplayText = "";
452 pNewNode = new CDasherNode(pNode, iLbnd, iHbnd, pDisplayInfo);
454 // TODO: Reimplement ----
456 // FIXME - handle context properly
457 // pNewNode->SetContext(m_pLanguageModel->CreateEmptyContext());
458 // -----
460 pNewNode->m_pNodeManager = this;
461 pNewNode->m_pNodeManager->Ref();
463 SConversionData *pNodeUserData = new SConversionData;
464 pNodeUserData->bType = true;
465 pNodeUserData->pSCENode = NULL;
466 pNodeUserData->pLanguageModel = pCurrentDataNode->pLanguageModel;
467 pNodeUserData->iOffset = pCurrentDataNode->iOffset + 1;
469 pNewNode->m_pUserData = pNodeUserData;
471 pNewNode->SetFlag(NF_SEEN, false);
473 pNode->Children().push_back(pNewNode);
478 void CConversionManager::ClearNode( CDasherNode *pNode ) {
479 // pNode->m_pNodeManager->Unref();
482 void CConversionManager::RecursiveDumpTree(SCENode *pCurrent, unsigned int iDepth) {
483 while(pCurrent) {
484 for(unsigned int i(0); i < iDepth; ++i)
485 std::cout << "-";
487 std::cout << " " << pCurrent->pszConversion << " " << pCurrent->IsHeadAndCandNum << " " << pCurrent->CandIndex << " " << pCurrent->IsComplete << " " << pCurrent->AcCharCount << std::endl;
489 RecursiveDumpTree(pCurrent->GetChild(), iDepth + 1);
490 pCurrent = pCurrent->GetNext();
494 void CConversionManager::BuildTree(CDasherNode *pRoot) {
495 DASHER_ASSERT(m_pHelper);
497 CDasherNode *pCurrentNode(pRoot->Parent());
499 std::string strCurrentString;
500 // m_pHelper->ClearData(m_iCMID);
502 while(pCurrentNode) {
503 if(pCurrentNode->m_pNodeManager->GetID() == 2)
504 break;
506 // TODO: Need to make this the edit text rather than the display text
507 CAlphabetManager::SAlphabetData *pAlphabetData =
508 static_cast<CAlphabetManager::SAlphabetData *>(pCurrentNode->m_pUserData);
510 strCurrentString = m_pAlphabet->GetText(pAlphabetData->iSymbol) + strCurrentString;
511 pCurrentNode = pCurrentNode->Parent();
514 // TODO: The remainder of this function is messy - to be sorted out
515 int iHZCount;
516 SCENode *pStartTemp;
517 bool ConversionSuccess;
519 ConversionSuccess = m_pHelper->Convert(strCurrentString, &pStartTemp , &iHZCount, m_iCMID);
521 if((!ConversionSuccess)||(iHZCount==0)) {
522 m_pRoot = 0;
524 else{
525 m_pRoot = new SCENode *[1];
526 m_pRoot[0] = pStartTemp;
530 void CConversionManager::Output( CDasherNode *pNode, Dasher::VECTOR_SYMBOL_PROB* pAdded, int iNormalization) {
531 // TODO: Reimplement this
532 // m_pNCManager->m_bContextSensitive = true;
534 SCENode *pCurrentSCENode((static_cast<SConversionData *>(pNode->m_pUserData))->pSCENode);
536 if(pCurrentSCENode) {
537 Dasher::CEditEvent oEvent(1, pCurrentSCENode->pszConversion, static_cast<SConversionData *>(pNode->m_pUserData)->iOffset);
538 m_pNCManager->InsertEvent(&oEvent);
540 if((pNode->GetChildren())[0]->m_pNodeManager != this) {
541 Dasher::CEditEvent oEvent(11, "", 0);
542 m_pNCManager->InsertEvent(&oEvent);
545 else {
546 if((static_cast<SConversionData *>(pNode->m_pUserData))->bType) {
547 Dasher::CEditEvent oOPEvent(1, "|", static_cast<SConversionData *>(pNode->m_pUserData)->iOffset);
548 m_pNCManager->InsertEvent(&oOPEvent);
550 else {
551 Dasher::CEditEvent oOPEvent(1, ">", static_cast<SConversionData *>(pNode->m_pUserData)->iOffset);
552 m_pNCManager->InsertEvent(&oOPEvent);
555 Dasher::CEditEvent oEvent(10, "", 0);
556 m_pNCManager->InsertEvent(&oEvent);
560 void CConversionManager::Undo( CDasherNode *pNode ) {
561 SCENode *pCurrentSCENode((static_cast<SConversionData *>(pNode->m_pUserData))->pSCENode);
563 if(pCurrentSCENode) {
564 if(pCurrentSCENode->pszConversion && (strlen(pCurrentSCENode->pszConversion) > 0)) {
565 Dasher::CEditEvent oEvent(2, pCurrentSCENode->pszConversion, static_cast<SConversionData *>(pNode->m_pUserData)->iOffset);
566 m_pNCManager->InsertEvent(&oEvent);
569 else {
570 if((static_cast<SConversionData *>(pNode->m_pUserData))->bType) {
571 Dasher::CEditEvent oOPEvent(2, "|", static_cast<SConversionData *>(pNode->m_pUserData)->iOffset);
572 m_pNCManager->InsertEvent(&oOPEvent);
574 else {
575 Dasher::CEditEvent oOPEvent(2, ">", static_cast<SConversionData *>(pNode->m_pUserData)->iOffset);
576 m_pNCManager->InsertEvent(&oOPEvent);
581 bool CConversionManager::RecursiveDelTree(SCENode* pNode){
582 // TODO: Do we actually care about the return value?
584 // TODO: Function now obsolete
586 pNode->Unref();
588 return false;
589 // if(!pNode)
590 // return 0;
592 // // Note that this is a lattice, not a tree, so we need to be careful
593 // // about deleting thing twice
595 // if(pNode->pChild)
596 // RecursiveDeleteTree(pNode->pChild);
598 // if(pNode->pNext)
599 // RecursiveDeleteTree(pNode->pNext);
601 // SCENode * pTemp;
603 // if(!pNode)
604 // return 0;
605 // else if(pNode->pChild)
606 // return RecursiveDelTree(pNode->pChild);
607 // else{
609 // while(!pNode->pChild){
610 // pTemp = pNode->pNext;
611 // delete pNode;
612 // pNode = pTemp;
613 // if(!pNode)
614 // return 1;
615 // }
616 // return RecursiveDelTree(pNode->pChild);
617 // }
621 void CConversionManager::SetFlag(CDasherNode *pNode, int iFlag, bool bValue) {
622 switch(iFlag) {
623 case NF_COMMITTED:
624 if(bValue){
625 // TODO: Reimplement (need a learning context, check whether
626 // symbol actually corresponds to character)
628 CLanguageModel * pLan = static_cast<SConversionData *>(pNode->m_pUserData)->pLanguageModel;
630 SCENode * pSCENode = static_cast<SConversionData *>(pNode->m_pUserData)->pSCENode;
632 if(!pSCENode)
633 return;
635 symbol s =pSCENode ->Symbol;
638 if((s!=-1) && m_pLanguageModel)
639 pLan->LearnSymbol(m_iLearnContext, s);
641 break;