Fixing up Chinese
[dasher.git] / Src / DasherCore / ConversionManager.cpp
blob2a5bfacee33a080e24bcb8ae7f20478f51b8cb5d
1 // ConversionManager.cpp
2 //
3 // Copyright (c) 2007 The Dasher Team
4 //
5 // This file is part of Dasher.
6 //
7 // Dasher is free software; you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation; either version 2 of the License, or
10 // (at your option) any later version.
12 // Dasher is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
17 // You should have received a copy of the GNU General Public License
18 // along with Dasher; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef WIN32
22 #include "config.h"
23 #endif
25 #include "ConversionManager.h"
26 #include "Event.h"
27 #include "EventHandler.h"
28 #include "NodeCreationManager.h"
30 #include <iostream>
31 #include <string>
32 #include <vector>
33 #include <stdlib.h>
35 using namespace Dasher;
37 CConversionManager::CConversionManager(CNodeCreationManager *pNCManager, CConversionHelper *pHelper, CAlphabet *pAlphabet, int CMid)
38 : CNodeManager(2) {
40 m_pNCManager = pNCManager;
41 m_pHelper = pHelper;
42 m_pAlphabet = pAlphabet;
43 m_pRoot = NULL;
45 //DOESN'T SEEM INTRINSIC
46 //and check why pHelper may be empty
47 if(pHelper)
48 m_pLanguageModel = pHelper->GetLanguageModel();
49 else
50 m_pLanguageModel = NULL;
52 if(m_pLanguageModel)
53 m_iLearnContext = m_pLanguageModel->CreateEmptyContext();
55 m_iRefCount = 1;
56 m_iCMID = CMid;
57 // m_iHZCount = 0;
59 m_bTreeBuilt = false;
62 CConversionManager::~CConversionManager(){
64 // for (int i(0);i<m_iHZCount; i++)
65 // std::cout << "Unref: " << this << std::endl;
67 if(m_pRoot)
68 RecursiveDelTree(m_pRoot[0]);
72 CDasherNode *CConversionManager::GetRoot(CDasherNode *pParent, int iLower, int iUpper, void *pUserData) {
73 CDasherNode *pNewNode;
75 int iOffset = *(static_cast<int *>(pUserData));
77 // TODO: Parameters here are placeholders - need to figure out what's right
79 CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
80 pDisplayInfo->iColour = 9; // TODO: Hard coded value
81 pDisplayInfo->bShove = true;
82 pDisplayInfo->bVisible = true;
83 pDisplayInfo->strDisplayText = ">"; // TODO: Hard coded value, needs i18n
85 pNewNode = new CDasherNode(pParent, iLower, iUpper, pDisplayInfo);
87 // FIXME - handle context properly
88 // TODO: Reimplemnt -----
89 // pNewNode->SetContext(m_pLanguageModel->CreateEmptyContext());
90 // -----
92 pNewNode->m_pNodeManager = this;
93 pNewNode->m_pNodeManager->Ref();
96 SConversionData *pNodeUserData = new SConversionData;
97 pNewNode->m_pUserData = pNodeUserData;
98 pNodeUserData->bType = false;
99 pNodeUserData->iOffset = iOffset;
101 if(m_pHelper)
102 pNodeUserData->pLanguageModel = m_pHelper->GetLanguageModel();
103 else
104 pNodeUserData->pLanguageModel = NULL;
106 CLanguageModel::Context iContext;
108 // // std::cout<<m_pLanguageModel<<"lala"<<std::endl;
109 if(m_pLanguageModel){
110 iContext = m_pLanguageModel->CreateEmptyContext();
111 pNodeUserData->iContext = iContext;
114 pNodeUserData->pSCENode = 0;
116 return pNewNode;
119 // TODO: This function needs to be significantly tidied up
120 // TODO: get rid of pSizes
122 void CConversionManager::AssignChildSizes(SCENode *pNode, CLanguageModel::Context context, int iNChildren) {
123 DASHER_ASSERT(m_pHelper);
125 // Calculate sizes for the children. Note that normalisation is
126 // done additiviely rather than multiplicatively, so it's not
127 // quite what was originally planned (but I don't think this is
128 // much of a problem). More serious is the fact that the ordering
129 // is being lost when the tree is created, as nodes begininning
130 // with the same character are merged. This needs to be though
131 // out, but the probabilities should probably be done at the time
132 // of construction of the candidate tree rather than the Dasher
133 // tree (aside - is there any real point having two separate trees
134 // - surely we should just create Dasher nodes right away?).
136 // The algorithm should also allow for the possibility of the
137 // conversion engine returning probabilities itself, which should
138 // be used in preference to the values infered from the ordering
140 // Finally, maybe the choices should be presented in lexographic
141 // order, rather than in order returned (really not sure about
142 // this - it needs to be thought through).
145 // std::cout << "b" << std::endl;
147 //TESTING FOR CALCULATESCORE STAGE 1
148 //int test;
149 //test = CalculateScore(pNode, 1);
150 //std::cout<<"current character"<<pCurrentSCENode->pszConversion<<std::endl;
151 //std::cout<<"the score for the second candidate is"<<test<<std::endl;
155 //ASSIGNING SCORES AND CALCULATING NODE SIZE
156 //Ph: feel free to edit this part to make it more structured
157 // int iSize[pCurrentSCEChild->IsHeadAndCandNum];
158 // int score[pCurrentSCEChild->IsHeadAndCandNum];
159 // int total =0;
160 // int max = 0;
161 // int CandNum = pCurrentSCEChild -> IsHeadAndCandNum;
163 // CHANGE int iRemaining(m_pNCManager->GetLongParameter(LP_NORMALIZATION));
165 // Thoughts on the general idea here - this is very close to being
166 // a fully fledged language model, so I think we should go with
167 // that idea, but maybe we need something mode flexible. I'd
168 // imagine:
170 // 1. Probabilities provided directly with translation? Maybe hard
171 // to represent in the lattice itself.
173 // 2. Full n-gram language model provided - in general assign
174 // probabilities to paths through the lattice
176 // 3. Ordered results, but no probabilities - using a power law
177 // rule or the like.
179 // Tempted to assume (1) and (2) can be implemented together, with
180 // a second call to the library at node creation time, and (3) can
181 // be implemented as a fallback if that doesn't work.
183 // Things to be thought out:
184 // - How to deal with contexts - backtrace at time of call or stored in node?
185 // - Sharing of language model infrastructure?
189 // Lookup scores for each of the children
191 // TODO: Reimplement -----
193 // for(int i(0); i < pCurrentSCEChild->IsHeadAndCandNum; ++i){
194 // score[i] = CalculateScore(pNode, i);
195 // total += score[i];
196 // if(i!=0)
197 // if (score[i]>score[i-1])
198 // max = score[i];
199 // }
201 // -----
203 // Use the scores to calculate the size of the nodes
206 iNChildren = 0;
207 SCENode *pChild(pNode);
209 while(pChild) {
210 pChild = pChild->GetNext();
211 ++iNChildren;
215 m_pHelper->AssignSizes(pNode, context, m_pNCManager->GetLongParameter(LP_NORMALIZATION), m_pNCManager->GetLongParameter(LP_UNIFORM), iNChildren);
218 // for(int i(0); i < iNChildren; ++i) {
220 //TESTING FOR RESIZING FREQUENT HZ CHARACTERS
221 //if(i<5)
222 // if(score[i]<max-5){
223 // std::cout<<"first scores are"<<score[i]<<std::endl;
224 // score[i]=max-5;
225 // }
227 // TODO: Reimplement new model -----
229 // if(CandNum == 1)
230 // iSize[i] = m_pNCManager->GetLongParameter(LP_NORMALIZATION);
231 // else
232 // iSize[i] = m_pNCManager->GetLongParameter(LP_NORMALIZATION)*((CandNum-i-1)+2*CandNum*score[i])/(CandNum*(CandNum-1)/2+2*CandNum*total);
234 // ----
236 //PREVIOUS MODEL: m_pNCManager->GetLongParameter(LP_NORMALIZATION)/((i + 1) * (i + 2));
240 SCENode * pIt;
242 pIt=pNode;
244 uint freq[iNChildren];
245 for(int i(0); i<iNChildren; i++)
246 freq[i] = 0;
247 uint totalFreq=0;
248 uint maxFreq=0;
251 while(pIt){
252 freq[pIt->CandIndex]=pIt->HZFreq;
253 totalFreq+=freq[pIt->CandIndex];
254 if(pIt->HZFreq>maxFreq)
255 maxFreq=pIt->HZFreq;
256 pIt = pIt->pNext;
259 pSizes[i] = m_pNCManager->GetLongParameter(LP_NORMALIZATION)*(100+5*freq[i])/(100*iNChildren+5*totalFreq);
261 //((i + 1) * (i + 2));
265 if(pSizes[i] < 1)
266 pSizes[i] = 1;
268 iRemaining -= pSizes[i];
271 // Distribute the remaining space evenly
273 int iLeft(iNChildren);
275 for(int i(0); i < iNChildren; ++i) {
276 int iDiff(iRemaining / iLeft);
278 pSizes[i] += iDiff;
280 iRemaining -= iDiff;
281 --iLeft;
287 void CConversionManager::PopulateChildren( CDasherNode *pNode ) {
288 DASHER_ASSERT(m_pNCManager);
290 SConversionData * pCurrentDataNode (static_cast<SConversionData *>(pNode->m_pUserData));
291 CDasherNode *pNewNode;
293 // If no helper class is present then just drop straight back to an
294 // alphabet root. This should only happen in error cases, and the
295 // user should have been warned here.
297 if(!m_pHelper) {
298 int iLbnd(0);
299 int iHbnd(m_pNCManager->GetLongParameter(LP_NORMALIZATION));
301 CAlphabetManager::SRootData oRootData;
302 oRootData.szContext = NULL;
303 oRootData.iOffset = pCurrentDataNode->iOffset + 1;
305 pNewNode = m_pNCManager->GetRoot(0, pNode, iLbnd, iHbnd, &oRootData);
306 pNewNode->SetFlag(NF_SEEN, false);
308 pNode->Children().push_back(pNewNode);
310 return;
313 // Do the conversion and build the tree (lattice) if it hasn't been
314 // done already.
316 if(!m_bTreeBuilt) {
317 BuildTree(pNode);
318 m_bTreeBuilt = true;
321 SCENode *pCurrentSCEChild;
323 if(pCurrentDataNode->pSCENode)
324 pCurrentSCEChild = pCurrentDataNode->pSCENode->GetChild();
325 else {
326 if(m_pRoot && !pCurrentDataNode->bType)
327 pCurrentSCEChild = m_pRoot[0];
328 else
329 pCurrentSCEChild = 0;
332 if(pCurrentSCEChild) {
334 // TODO: Reimplement (in subclass) -----
336 // if(m_iHZCount>1)
337 // if(!m_bPhrasesProcessed[pCurrentSCEChild->AcCharCount-1])
338 // if(pCurrentSCEChild->AcCharCount<m_iHZCount)
339 // ProcessPhrase(pCurrentSCEChild->AcCharCount-1);
341 // -----
343 //int *iSize;
345 // iSize = new int[pCurrentSCEChild->IsHeadAndCandNum];
350 AssignChildSizes(pCurrentSCEChild, pCurrentDataNode->iContext, pCurrentSCEChild->IsHeadAndCandNum);
352 int iIdx(0);
353 int iCum(0);
355 // int parentClr = pNode->Colour();
356 // TODO: Fixme
357 int parentClr = 0;
359 // Finally loop through and create the children
361 do {
362 int iLbnd(iCum);
363 int iHbnd(iCum + pCurrentSCEChild->NodeSize);
365 iCum = iHbnd;
367 // TODO: Parameters here are placeholders - need to figure out
368 // what's right
371 CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
372 pDisplayInfo->iColour = m_pHelper->AssignColour(parentClr, pCurrentSCEChild, iIdx);
373 pDisplayInfo->bShove = true;
374 pDisplayInfo->bVisible = true;
375 pDisplayInfo->strDisplayText = pCurrentSCEChild->pszConversion;
377 pNewNode = new CDasherNode(pNode, iLbnd, iHbnd, pDisplayInfo);
379 // TODO: Reimplement ----
381 // FIXME - handle context properly
382 // pNewNode->SetContext(m_pLanguageModel->CreateEmptyContext());
383 // -----
385 pNewNode->m_pNodeManager = this;
386 pNewNode->m_pNodeManager->Ref();
388 SConversionData *pNodeUserData = new SConversionData;
389 pNodeUserData->bType = false;
390 pNodeUserData->pSCENode = pCurrentSCEChild;
391 pNodeUserData->pLanguageModel = pCurrentDataNode->pLanguageModel;
392 pNodeUserData->iOffset = pCurrentDataNode->iOffset + 1;
394 CLanguageModel::Context iContext;
395 iContext = pCurrentDataNode->pLanguageModel->CloneContext(pCurrentDataNode->iContext);
397 if(pCurrentSCEChild ->Symbol !=-1)
398 pNodeUserData->pLanguageModel->EnterSymbol(iContext, pCurrentSCEChild->Symbol); // TODO: Don't use symbols?
401 pNodeUserData->iContext = iContext;
403 pNewNode->m_pUserData = pNodeUserData;
404 // SAlphabetData *pNodeUserData = new SAlphabetData;
406 //pNewNode->m_pUserData = pNodeUserData;
408 //pNodeUserData->iPhase = iNewPhase;
409 //pNodeUserData->iSymbol = iIdx;
411 pNode->Children().push_back(pNewNode);
413 pCurrentSCEChild = pCurrentSCEChild->GetNext();
414 ++iIdx;
415 }while(pCurrentSCEChild);
418 // delete[] iSize;
421 else {
422 if((static_cast<SConversionData *>(pNode->m_pUserData))->bType) {
423 // TODO: Placeholder algorithm here
424 // TODO: Add an 'end of conversion' node?
425 int iLbnd(0);
426 int iHbnd(m_pNCManager->GetLongParameter(LP_NORMALIZATION));
428 CAlphabetManager::SRootData oRootData;
429 oRootData.szContext = NULL;
430 oRootData.iOffset = pCurrentDataNode->iOffset + 1;
432 pNewNode = m_pNCManager->GetRoot(0, pNode, iLbnd, iHbnd, &oRootData);
433 pNewNode->SetFlag(NF_SEEN, false);
435 pNode->Children().push_back(pNewNode);
436 // pNode->SetHasAllChildren(false);
438 else {
439 int iLbnd(0);
440 int iHbnd(m_pNCManager->GetLongParameter(LP_NORMALIZATION));
442 CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
443 pDisplayInfo->iColour = m_pHelper->AssignColour(0, pCurrentSCEChild, 0);
444 pDisplayInfo->bShove = true;
445 pDisplayInfo->bVisible = true;
446 pDisplayInfo->strDisplayText = "";
448 pNewNode = new CDasherNode(pNode, iLbnd, iHbnd, pDisplayInfo);
450 // TODO: Reimplement ----
452 // FIXME - handle context properly
453 // pNewNode->SetContext(m_pLanguageModel->CreateEmptyContext());
454 // -----
456 pNewNode->m_pNodeManager = this;
457 pNewNode->m_pNodeManager->Ref();
459 SConversionData *pNodeUserData = new SConversionData;
460 pNodeUserData->bType = true;
461 pNodeUserData->pSCENode = NULL;
462 pNodeUserData->pLanguageModel = pCurrentDataNode->pLanguageModel;
463 pNodeUserData->iOffset = pCurrentDataNode->iOffset;
465 pNewNode->m_pUserData = pNodeUserData;
467 pNewNode->SetFlag(NF_SEEN, false);
469 pNode->Children().push_back(pNewNode);
474 void CConversionManager::ClearNode( CDasherNode *pNode ) {
475 // pNode->m_pNodeManager->Unref();
478 void CConversionManager::RecursiveDumpTree(SCENode *pCurrent, unsigned int iDepth) {
479 while(pCurrent) {
480 for(unsigned int i(0); i < iDepth; ++i)
481 std::cout << "-";
483 std::cout << " " << pCurrent->pszConversion << " " << pCurrent->IsHeadAndCandNum << " " << pCurrent->CandIndex << " " << pCurrent->IsComplete << " " << pCurrent->AcCharCount << std::endl;
485 RecursiveDumpTree(pCurrent->GetChild(), iDepth + 1);
486 pCurrent = pCurrent->GetNext();
490 void CConversionManager::BuildTree(CDasherNode *pRoot) {
491 DASHER_ASSERT(m_pHelper);
493 CDasherNode *pCurrentNode(pRoot->Parent());
495 std::string strCurrentString;
496 m_pHelper->ClearData(m_iCMID);
498 while(pCurrentNode) {
499 if(pCurrentNode->m_pNodeManager->GetID() == 2)
500 break;
502 // TODO: Need to make this the edit text rather than the display text
503 CAlphabetManager::SAlphabetData *pAlphabetData =
504 static_cast<CAlphabetManager::SAlphabetData *>(pCurrentNode->m_pUserData);
506 strCurrentString = m_pAlphabet->GetText(pAlphabetData->iSymbol) + strCurrentString;
507 pCurrentNode = pCurrentNode->Parent();
510 // TODO: The remainder of this function is messy - to be sorted out
511 int iHZCount;
512 SCENode *pStartTemp;
513 bool ConversionSuccess;
515 ConversionSuccess = m_pHelper->Convert(strCurrentString, &pStartTemp , &iHZCount, m_iCMID);
517 if((!ConversionSuccess)||(iHZCount==0)) {
518 m_pRoot = 0;
520 else{
521 m_pRoot = new SCENode *[1];
522 m_pRoot[0] = pStartTemp;
526 void CConversionManager::Output( CDasherNode *pNode, Dasher::VECTOR_SYMBOL_PROB* pAdded, int iNormalization) {
527 // TODO: Reimplement this
528 // m_pNCManager->m_bContextSensitive = true;
530 SCENode *pCurrentSCENode((static_cast<SConversionData *>(pNode->m_pUserData))->pSCENode);
532 if(pCurrentSCENode) {
533 Dasher::CEditEvent oEvent(1, pCurrentSCENode->pszConversion);
534 m_pNCManager->InsertEvent(&oEvent);
536 if((pNode->GetChildren())[0]->m_pNodeManager != this) {
537 Dasher::CEditEvent oEvent(11, "");
538 m_pNCManager->InsertEvent(&oEvent);
541 else {
542 if((static_cast<SConversionData *>(pNode->m_pUserData))->bType) {
543 Dasher::CEditEvent oOPEvent(1, "|");
544 m_pNCManager->InsertEvent(&oOPEvent);
546 else {
547 Dasher::CEditEvent oOPEvent(1, ">");
548 m_pNCManager->InsertEvent(&oOPEvent);
551 Dasher::CEditEvent oEvent(10, "");
552 m_pNCManager->InsertEvent(&oEvent);
556 void CConversionManager::Undo( CDasherNode *pNode ) {
557 SCENode *pCurrentSCENode((static_cast<SConversionData *>(pNode->m_pUserData))->pSCENode);
559 if(pCurrentSCENode) {
560 if(pCurrentSCENode->pszConversion && (strlen(pCurrentSCENode->pszConversion) > 0)) {
561 Dasher::CEditEvent oEvent(2, pCurrentSCENode->pszConversion);
562 m_pNCManager->InsertEvent(&oEvent);
565 else {
566 if((static_cast<SConversionData *>(pNode->m_pUserData))->bType) {
567 Dasher::CEditEvent oOPEvent(2, "|");
568 m_pNCManager->InsertEvent(&oOPEvent);
570 else {
571 Dasher::CEditEvent oOPEvent(2, ">");
572 m_pNCManager->InsertEvent(&oOPEvent);
577 bool CConversionManager::RecursiveDelTree(SCENode* pNode){
578 // TODO: Do we actually care about the return value?
580 // TODO: Function now obsolete
582 pNode->Unref();
584 return false;
585 // if(!pNode)
586 // return 0;
588 // // Note that this is a lattice, not a tree, so we need to be careful
589 // // about deleting thing twice
591 // if(pNode->pChild)
592 // RecursiveDeleteTree(pNode->pChild);
594 // if(pNode->pNext)
595 // RecursiveDeleteTree(pNode->pNext);
597 // SCENode * pTemp;
599 // if(!pNode)
600 // return 0;
601 // else if(pNode->pChild)
602 // return RecursiveDelTree(pNode->pChild);
603 // else{
605 // while(!pNode->pChild){
606 // pTemp = pNode->pNext;
607 // delete pNode;
608 // pNode = pTemp;
609 // if(!pNode)
610 // return 1;
611 // }
612 // return RecursiveDelTree(pNode->pChild);
613 // }
617 void CConversionManager::SetFlag(CDasherNode *pNode, int iFlag, bool bValue) {
618 switch(iFlag) {
619 case NF_COMMITTED:
620 if(bValue){
621 // TODO: Reimplement (need a learning context, check whether
622 // symbol actually corresponds to character)
624 CLanguageModel * pLan = static_cast<SConversionData *>(pNode->m_pUserData)->pLanguageModel;
626 SCENode * pSCENode = static_cast<SConversionData *>(pNode->m_pUserData)->pSCENode;
628 if(!pSCENode)
629 return;
631 symbol s =pSCENode ->Symbol;
634 if((s!=-1) && m_pLanguageModel)
635 pLan->LearnSymbol(m_iLearnContext, s);
637 break;