1 // ConversionManager.cpp
3 // Copyright (c) 2007 The Dasher Team
5 // This file is part of Dasher.
7 // Dasher is free software; you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation; either version 2 of the License, or
10 // (at your option) any later version.
12 // Dasher is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
17 // You should have received a copy of the GNU General Public License
18 // along with Dasher; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "ConversionManager.h"
27 #include "EventHandler.h"
28 #include "NodeCreationManager.h"
35 using namespace Dasher
;
37 CConversionManager::CConversionManager(CNodeCreationManager
*pNCManager
, CConversionHelper
*pHelper
, CAlphabet
*pAlphabet
, int CMid
)
40 m_pNCManager
= pNCManager
;
42 m_pAlphabet
= pAlphabet
;
45 //DOESN'T SEEM INTRINSIC
46 //and check why pHelper may be empty
48 m_pLanguageModel
= pHelper
->GetLanguageModel();
50 m_pLanguageModel
= NULL
;
53 m_iLearnContext
= m_pLanguageModel
->CreateEmptyContext();
62 CConversionManager::~CConversionManager(){
64 // for (int i(0);i<m_iHZCount; i++)
65 // std::cout << "Unref: " << this << std::endl;
68 RecursiveDelTree(m_pRoot
[0]);
72 CDasherNode
*CConversionManager::GetRoot(CDasherNode
*pParent
, int iLower
, int iUpper
, void *pUserData
) {
73 CDasherNode
*pNewNode
;
75 int iOffset
= *(static_cast<int *>(pUserData
));
77 // TODO: Parameters here are placeholders - need to figure out what's right
79 CDasherNode::SDisplayInfo
*pDisplayInfo
= new CDasherNode::SDisplayInfo
;
80 pDisplayInfo
->iColour
= 9; // TODO: Hard coded value
81 pDisplayInfo
->bShove
= true;
82 pDisplayInfo
->bVisible
= true;
83 pDisplayInfo
->strDisplayText
= ">"; // TODO: Hard coded value, needs i18n
85 pNewNode
= new CDasherNode(pParent
, iLower
, iUpper
, pDisplayInfo
);
87 // FIXME - handle context properly
88 // TODO: Reimplemnt -----
89 // pNewNode->SetContext(m_pLanguageModel->CreateEmptyContext());
92 pNewNode
->m_pNodeManager
= this;
93 pNewNode
->m_pNodeManager
->Ref();
96 SConversionData
*pNodeUserData
= new SConversionData
;
97 pNewNode
->m_pUserData
= pNodeUserData
;
98 pNodeUserData
->bType
= false;
99 pNodeUserData
->iOffset
= iOffset
+ 1;
102 pNodeUserData
->pLanguageModel
= m_pHelper
->GetLanguageModel();
104 pNodeUserData
->pLanguageModel
= NULL
;
106 // // std::cout<<m_pLanguageModel<<"lala"<<std::endl;
107 if(m_pLanguageModel
) {
108 CLanguageModel::Context iContext
;
109 iContext
= m_pLanguageModel
->CreateEmptyContext();
110 pNodeUserData
->iContext
= iContext
;
113 pNodeUserData
->pSCENode
= 0;
118 // TODO: This function needs to be significantly tidied up
119 // TODO: get rid of pSizes
121 void CConversionManager::AssignChildSizes(SCENode
**pNode
, CLanguageModel::Context context
, int iNChildren
) {
122 DASHER_ASSERT(m_pHelper
);
124 // Calculate sizes for the children. Note that normalisation is
125 // done additiviely rather than multiplicatively, so it's not
126 // quite what was originally planned (but I don't think this is
127 // much of a problem). More serious is the fact that the ordering
128 // is being lost when the tree is created, as nodes begininning
129 // with the same character are merged. This needs to be though
130 // out, but the probabilities should probably be done at the time
131 // of construction of the candidate tree rather than the Dasher
132 // tree (aside - is there any real point having two separate trees
133 // - surely we should just create Dasher nodes right away?).
135 // The algorithm should also allow for the possibility of the
136 // conversion engine returning probabilities itself, which should
137 // be used in preference to the values infered from the ordering
139 // Finally, maybe the choices should be presented in lexographic
140 // order, rather than in order returned (really not sure about
141 // this - it needs to be thought through).
144 // std::cout << "b" << std::endl;
146 //TESTING FOR CALCULATESCORE STAGE 1
148 //test = CalculateScore(pNode, 1);
149 //std::cout<<"current character"<<pCurrentSCENode->pszConversion<<std::endl;
150 //std::cout<<"the score for the second candidate is"<<test<<std::endl;
154 //ASSIGNING SCORES AND CALCULATING NODE SIZE
155 //Ph: feel free to edit this part to make it more structured
156 // int iSize[pCurrentSCEChild->IsHeadAndCandNum];
157 // int score[pCurrentSCEChild->IsHeadAndCandNum];
160 // int CandNum = pCurrentSCEChild -> IsHeadAndCandNum;
162 // CHANGE int iRemaining(m_pNCManager->GetLongParameter(LP_NORMALIZATION));
164 // Thoughts on the general idea here - this is very close to being
165 // a fully fledged language model, so I think we should go with
166 // that idea, but maybe we need something mode flexible. I'd
169 // 1. Probabilities provided directly with translation? Maybe hard
170 // to represent in the lattice itself.
172 // 2. Full n-gram language model provided - in general assign
173 // probabilities to paths through the lattice
175 // 3. Ordered results, but no probabilities - using a power law
178 // Tempted to assume (1) and (2) can be implemented together, with
179 // a second call to the library at node creation time, and (3) can
180 // be implemented as a fallback if that doesn't work.
182 // Things to be thought out:
183 // - How to deal with contexts - backtrace at time of call or stored in node?
184 // - Sharing of language model infrastructure?
188 // Lookup scores for each of the children
190 // TODO: Reimplement -----
192 // for(int i(0); i < pCurrentSCEChild->IsHeadAndCandNum; ++i){
193 // score[i] = CalculateScore(pNode, i);
194 // total += score[i];
196 // if (score[i]>score[i-1])
202 // Use the scores to calculate the size of the nodes
206 SCENode
*pChild(*pNode
);
209 pChild
= pChild
->GetNext();
214 m_pHelper
->AssignSizes(pNode
, context
, m_pNCManager
->GetLongParameter(LP_NORMALIZATION
), m_pNCManager
->GetLongParameter(LP_UNIFORM
), iNChildren
);
217 // for(int i(0); i < iNChildren; ++i) {
219 //TESTING FOR RESIZING FREQUENT HZ CHARACTERS
221 // if(score[i]<max-5){
222 // std::cout<<"first scores are"<<score[i]<<std::endl;
226 // TODO: Reimplement new model -----
229 // iSize[i] = m_pNCManager->GetLongParameter(LP_NORMALIZATION);
231 // iSize[i] = m_pNCManager->GetLongParameter(LP_NORMALIZATION)*((CandNum-i-1)+2*CandNum*score[i])/(CandNum*(CandNum-1)/2+2*CandNum*total);
235 //PREVIOUS MODEL: m_pNCManager->GetLongParameter(LP_NORMALIZATION)/((i + 1) * (i + 2));
243 uint freq[iNChildren];
244 for(int i(0); i<iNChildren; i++)
251 freq[pIt->CandIndex]=pIt->HZFreq;
252 totalFreq+=freq[pIt->CandIndex];
253 if(pIt->HZFreq>maxFreq)
258 pSizes[i] = m_pNCManager->GetLongParameter(LP_NORMALIZATION)*(100+5*freq[i])/(100*iNChildren+5*totalFreq);
260 //((i + 1) * (i + 2));
267 iRemaining -= pSizes[i];
270 // Distribute the remaining space evenly
272 int iLeft(iNChildren);
274 for(int i(0); i < iNChildren; ++i) {
275 int iDiff(iRemaining / iLeft);
286 void CConversionManager::PopulateChildren( CDasherNode
*pNode
) {
287 DASHER_ASSERT(m_pNCManager
);
289 SConversionData
* pCurrentDataNode (static_cast<SConversionData
*>(pNode
->m_pUserData
));
290 CDasherNode
*pNewNode
;
292 // If no helper class is present then just drop straight back to an
293 // alphabet root. This should only happen in error cases, and the
294 // user should have been warned here.
298 int iHbnd(m_pNCManager
->GetLongParameter(LP_NORMALIZATION
));
300 CAlphabetManager::SRootData oRootData
;
301 oRootData
.szContext
= NULL
;
302 oRootData
.iOffset
= pCurrentDataNode
->iOffset
+ 1;
304 pNewNode
= m_pNCManager
->GetRoot(0, pNode
, iLbnd
, iHbnd
, &oRootData
);
305 pNewNode
->SetFlag(NF_SEEN
, false);
307 pNode
->Children().push_back(pNewNode
);
312 // Do the conversion and build the tree (lattice) if it hasn't been
320 SCENode
*pCurrentSCEChild
;
322 if(pCurrentDataNode
->pSCENode
)
323 pCurrentSCEChild
= pCurrentDataNode
->pSCENode
->GetChild();
325 if(m_pRoot
&& !pCurrentDataNode
->bType
)
326 pCurrentSCEChild
= m_pRoot
[0];
328 pCurrentSCEChild
= 0;
331 if(pCurrentSCEChild
) {
333 // TODO: Reimplement (in subclass) -----
336 // if(!m_bPhrasesProcessed[pCurrentSCEChild->AcCharCount-1])
337 // if(pCurrentSCEChild->AcCharCount<m_iHZCount)
338 // ProcessPhrase(pCurrentSCEChild->AcCharCount-1);
344 // iSize = new int[pCurrentSCEChild->IsHeadAndCandNum];
349 AssignChildSizes(&pCurrentSCEChild
, pCurrentDataNode
->iContext
, pCurrentSCEChild
->IsHeadAndCandNum
);
354 // int parentClr = pNode->Colour();
358 // Finally loop through and create the children
362 int iHbnd(iCum
+ pCurrentSCEChild
->NodeSize
);
366 // TODO: Parameters here are placeholders - need to figure out
370 CDasherNode::SDisplayInfo
*pDisplayInfo
= new CDasherNode::SDisplayInfo
;
371 pDisplayInfo
->iColour
= m_pHelper
->AssignColour(parentClr
, pCurrentSCEChild
, iIdx
);
372 pDisplayInfo
->bShove
= true;
373 pDisplayInfo
->bVisible
= true;
375 // std::cout << "#" << pCurrentSCEChild->pszConversion << "#" << std::endl;
377 pDisplayInfo
->strDisplayText
= pCurrentSCEChild
->pszConversion
;
379 pNewNode
= new CDasherNode(pNode
, iLbnd
, iHbnd
, pDisplayInfo
);
381 // TODO: Reimplement ----
383 // FIXME - handle context properly
384 // pNewNode->SetContext(m_pLanguageModel->CreateEmptyContext());
387 pNewNode
->m_pNodeManager
= this;
388 pNewNode
->m_pNodeManager
->Ref();
390 SConversionData
*pNodeUserData
= new SConversionData
;
391 pNodeUserData
->bType
= false;
392 pNodeUserData
->pSCENode
= pCurrentSCEChild
;
393 pNodeUserData
->pLanguageModel
= pCurrentDataNode
->pLanguageModel
;
394 pNodeUserData
->iOffset
= pCurrentDataNode
->iOffset
+ 1;
396 if(pCurrentDataNode
->pLanguageModel
) {
397 CLanguageModel::Context iContext
;
398 iContext
= pCurrentDataNode
->pLanguageModel
->CloneContext(pCurrentDataNode
->iContext
);
400 if(pCurrentSCEChild
->Symbol
!=-1)
401 pNodeUserData
->pLanguageModel
->EnterSymbol(iContext
, pCurrentSCEChild
->Symbol
); // TODO: Don't use symbols?
404 pNodeUserData
->iContext
= iContext
;
407 pNewNode
->m_pUserData
= pNodeUserData
;
408 // SAlphabetData *pNodeUserData = new SAlphabetData;
410 //pNewNode->m_pUserData = pNodeUserData;
412 //pNodeUserData->iPhase = iNewPhase;
413 //pNodeUserData->iSymbol = iIdx;
415 pNode
->Children().push_back(pNewNode
);
417 pCurrentSCEChild
= pCurrentSCEChild
->GetNext();
419 }while(pCurrentSCEChild
);
426 if((static_cast<SConversionData
*>(pNode
->m_pUserData
))->bType
) {
427 // TODO: Placeholder algorithm here
428 // TODO: Add an 'end of conversion' node?
430 int iHbnd(m_pNCManager
->GetLongParameter(LP_NORMALIZATION
));
432 CAlphabetManager::SRootData oRootData
;
433 oRootData
.szContext
= NULL
;
434 oRootData
.iOffset
= pCurrentDataNode
->iOffset
;
436 pNewNode
= m_pNCManager
->GetRoot(0, pNode
, iLbnd
, iHbnd
, &oRootData
);
437 pNewNode
->SetFlag(NF_SEEN
, false);
439 pNode
->Children().push_back(pNewNode
);
440 // pNode->SetHasAllChildren(false);
444 int iHbnd(m_pNCManager
->GetLongParameter(LP_NORMALIZATION
));
446 CDasherNode::SDisplayInfo
*pDisplayInfo
= new CDasherNode::SDisplayInfo
;
447 pDisplayInfo
->iColour
= m_pHelper
->AssignColour(0, pCurrentSCEChild
, 0);
448 pDisplayInfo
->bShove
= true;
449 pDisplayInfo
->bVisible
= true;
450 pDisplayInfo
->strDisplayText
= "";
452 pNewNode
= new CDasherNode(pNode
, iLbnd
, iHbnd
, pDisplayInfo
);
454 // TODO: Reimplement ----
456 // FIXME - handle context properly
457 // pNewNode->SetContext(m_pLanguageModel->CreateEmptyContext());
460 pNewNode
->m_pNodeManager
= this;
461 pNewNode
->m_pNodeManager
->Ref();
463 SConversionData
*pNodeUserData
= new SConversionData
;
464 pNodeUserData
->bType
= true;
465 pNodeUserData
->pSCENode
= NULL
;
466 pNodeUserData
->pLanguageModel
= pCurrentDataNode
->pLanguageModel
;
467 pNodeUserData
->iOffset
= pCurrentDataNode
->iOffset
+ 1;
469 pNewNode
->m_pUserData
= pNodeUserData
;
471 pNewNode
->SetFlag(NF_SEEN
, false);
473 pNode
->Children().push_back(pNewNode
);
478 void CConversionManager::ClearNode( CDasherNode
*pNode
) {
479 // pNode->m_pNodeManager->Unref();
482 void CConversionManager::RecursiveDumpTree(SCENode
*pCurrent
, unsigned int iDepth
) {
484 for(unsigned int i(0); i
< iDepth
; ++i
)
487 std::cout
<< " " << pCurrent
->pszConversion
<< " " << pCurrent
->IsHeadAndCandNum
<< " " << pCurrent
->CandIndex
<< " " << pCurrent
->IsComplete
<< " " << pCurrent
->AcCharCount
<< std::endl
;
489 RecursiveDumpTree(pCurrent
->GetChild(), iDepth
+ 1);
490 pCurrent
= pCurrent
->GetNext();
494 void CConversionManager::BuildTree(CDasherNode
*pRoot
) {
495 DASHER_ASSERT(m_pHelper
);
497 CDasherNode
*pCurrentNode(pRoot
->Parent());
499 std::string strCurrentString
;
500 // m_pHelper->ClearData(m_iCMID);
502 while(pCurrentNode
) {
503 if(pCurrentNode
->m_pNodeManager
->GetID() == 2)
506 // TODO: Need to make this the edit text rather than the display text
507 CAlphabetManager::SAlphabetData
*pAlphabetData
=
508 static_cast<CAlphabetManager::SAlphabetData
*>(pCurrentNode
->m_pUserData
);
510 strCurrentString
= m_pAlphabet
->GetText(pAlphabetData
->iSymbol
) + strCurrentString
;
511 pCurrentNode
= pCurrentNode
->Parent();
514 // TODO: The remainder of this function is messy - to be sorted out
517 bool ConversionSuccess
;
519 ConversionSuccess
= m_pHelper
->Convert(strCurrentString
, &pStartTemp
, &iHZCount
, m_iCMID
);
521 if((!ConversionSuccess
)||(iHZCount
==0)) {
525 m_pRoot
= new SCENode
*[1];
526 m_pRoot
[0] = pStartTemp
;
530 void CConversionManager::Output( CDasherNode
*pNode
, Dasher::VECTOR_SYMBOL_PROB
* pAdded
, int iNormalization
) {
531 // TODO: Reimplement this
532 // m_pNCManager->m_bContextSensitive = true;
534 SCENode
*pCurrentSCENode((static_cast<SConversionData
*>(pNode
->m_pUserData
))->pSCENode
);
536 if(pCurrentSCENode
) {
537 Dasher::CEditEvent
oEvent(1, pCurrentSCENode
->pszConversion
, static_cast<SConversionData
*>(pNode
->m_pUserData
)->iOffset
);
538 m_pNCManager
->InsertEvent(&oEvent
);
540 if((pNode
->GetChildren())[0]->m_pNodeManager
!= this) {
541 Dasher::CEditEvent
oEvent(11, "", 0);
542 m_pNCManager
->InsertEvent(&oEvent
);
546 if((static_cast<SConversionData
*>(pNode
->m_pUserData
))->bType
) {
547 Dasher::CEditEvent
oOPEvent(1, "|", static_cast<SConversionData
*>(pNode
->m_pUserData
)->iOffset
);
548 m_pNCManager
->InsertEvent(&oOPEvent
);
551 Dasher::CEditEvent
oOPEvent(1, ">", static_cast<SConversionData
*>(pNode
->m_pUserData
)->iOffset
);
552 m_pNCManager
->InsertEvent(&oOPEvent
);
555 Dasher::CEditEvent
oEvent(10, "", 0);
556 m_pNCManager
->InsertEvent(&oEvent
);
560 void CConversionManager::Undo( CDasherNode
*pNode
) {
561 SCENode
*pCurrentSCENode((static_cast<SConversionData
*>(pNode
->m_pUserData
))->pSCENode
);
563 if(pCurrentSCENode
) {
564 if(pCurrentSCENode
->pszConversion
&& (strlen(pCurrentSCENode
->pszConversion
) > 0)) {
565 Dasher::CEditEvent
oEvent(2, pCurrentSCENode
->pszConversion
, static_cast<SConversionData
*>(pNode
->m_pUserData
)->iOffset
);
566 m_pNCManager
->InsertEvent(&oEvent
);
570 if((static_cast<SConversionData
*>(pNode
->m_pUserData
))->bType
) {
571 Dasher::CEditEvent
oOPEvent(2, "|", static_cast<SConversionData
*>(pNode
->m_pUserData
)->iOffset
);
572 m_pNCManager
->InsertEvent(&oOPEvent
);
575 Dasher::CEditEvent
oOPEvent(2, ">", static_cast<SConversionData
*>(pNode
->m_pUserData
)->iOffset
);
576 m_pNCManager
->InsertEvent(&oOPEvent
);
581 bool CConversionManager::RecursiveDelTree(SCENode
* pNode
){
582 // TODO: Do we actually care about the return value?
584 // TODO: Function now obsolete
592 // // Note that this is a lattice, not a tree, so we need to be careful
593 // // about deleting thing twice
596 // RecursiveDeleteTree(pNode->pChild);
599 // RecursiveDeleteTree(pNode->pNext);
605 // else if(pNode->pChild)
606 // return RecursiveDelTree(pNode->pChild);
609 // while(!pNode->pChild){
610 // pTemp = pNode->pNext;
616 // return RecursiveDelTree(pNode->pChild);
621 void CConversionManager::SetFlag(CDasherNode
*pNode
, int iFlag
, bool bValue
) {
625 // TODO: Reimplement (need a learning context, check whether
626 // symbol actually corresponds to character)
628 CLanguageModel
* pLan
= static_cast<SConversionData
*>(pNode
->m_pUserData
)->pLanguageModel
;
630 SCENode
* pSCENode
= static_cast<SConversionData
*>(pNode
->m_pUserData
)->pSCENode
;
635 symbol s
=pSCENode
->Symbol
;
638 if((s
!=-1) && m_pLanguageModel
)
639 pLan
->LearnSymbol(m_iLearnContext
, s
);