tagging release
[dasher.git] / Src / DasherCore / PinYinConversionHelper.cpp
blob934b8bb0ed1fc7be4113aba0037a5b71527c3d49
2 #ifndef _WIN32
3 #include "config.h"
4 #endif
6 #include "LanguageModelling/LanguageModel.h"
7 #include "LanguageModelling/PPMLanguageModel.h"
8 #include <ChiCEInterface.h>
9 #include <iostream>
10 #include <fstream>
11 #include "PinYinConversionHelper.h"
13 #include <SCENodeNew.h>
15 using namespace Dasher;
17 CPinYinConversionHelper::CPinYinConversionHelper(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, Dasher::CAlphIO *pCAlphIO){
19 //TESTING FOR UTF-8 CHINESE IN C++ STRING
21 FILE *fp;
22 char* SimpChAlphabet = (char*)malloc(62);
23 fp = fopen("test.txt", "rw");
24 fread(SimpChAlphabet, 1, 62, fp);
25 fclose(fp);
28 const std::string SimpChAlphabet = "Chinese / 简体中文 (simplified chinese, in pin yin groups)";
31 Dasher::CAlphIO::AlphInfo oAlphInfo = pCAlphIO->GetInfo(SimpChAlphabet);
33 m_pAlphabet = new CAlphabet(oAlphInfo);
35 // std::cout<<SimpChAlphabet<<std::endl;
36 // for (int i = 0; i < 20; i++)
37 // std::cout<<m_pAlphabet->GetDisplayText(i)<<std::endl;
39 // TODO: Need to figure out what this does - it was previously set to true in PopulateChildren
41 CSymbolAlphabet alphabet(m_pAlphabet->GetNumberTextSymbols());
42 alphabet.SetSpaceSymbol(m_pAlphabet->GetSpaceSymbol()); // FIXME - is this right, or do we have to do some kind of translation?
43 alphabet.SetAlphabetPointer(m_pAlphabet); // Horrible hack, but ignore for now.
45 m_pLanguageModel = new CPPMLanguageModel(pEventHandler, pSettingsStore, alphabet);
48 //Train the PPM Model for Chinese Alphabet
49 TrainChPPM();
53 //Old Code
54 m_bTraceNeeded = true;//reset trace bool
56 //clears the process phrase flags
57 for(int i(0); i<MAX_HZ_NUM; i++)
58 m_bPhrasesProcessed[i]=0;
61 BuildDataBase();
62 CEInitialise();
66 bool CPinYinConversionHelper::Convert(const std::string &strSource, SCENode ** pRoot, int * childCount, int CMid) {
68 SCENodeNew *pConversionList;
69 int iHZCount;
71 if(CEConvert (strSource.c_str(), &pConversionList, &iHZCount, CMid)){
72 SCENodeNew *pHead(pConversionList);
74 std::vector<SCENodeNew *> vHeads;
76 while(pHead) {
77 vHeads.push_back(pHead);
78 pHead = pHead->pChild;
81 SCENode *pTail = NULL;
82 SCENode *pNextTail = NULL;
84 for(std::vector<SCENodeNew *>::reverse_iterator it(vHeads.rbegin()); it != vHeads.rend(); ++it) {
85 SCENodeNew *pCurrentNode(*it);
87 SCENode *pPreviousNode = NULL;
89 while(pCurrentNode) {
90 SCENode *pNewNode = new SCENode;
92 pNewNode->pszConversion = pCurrentNode->pszConversion;
93 pNewNode->IsHeadAndCandNum = pCurrentNode->IsHeadAndCandNum;
94 pNewNode->CandIndex = pCurrentNode->CandIndex;
95 pNewNode->Symbol = pCurrentNode->Symbol;
96 // pNewNode->SumPYProbStore = pCurrentNode->SumPYProbStore;
97 pNewNode->IsComplete = pCurrentNode->IsComplete;
98 pNewNode->AcCharCount = pCurrentNode->AcCharCount; /*accumulative character count*/
99 pNewNode->NodeSize = pCurrentNode->NodeSize;
100 pNewNode->HZFreq = pCurrentNode->HZFreq;
101 pNewNode->HZProb = pCurrentNode->HZProb;
103 if(pTail)
104 pNewNode->SetChild(pTail);
106 if(pPreviousNode) {
107 pPreviousNode->SetNext(pNewNode);
109 else {
110 pNextTail = pNewNode;
111 pNextTail->Ref();
114 if(pPreviousNode)
115 pPreviousNode->Unref();
117 pPreviousNode = pNewNode;
118 pCurrentNode = pCurrentNode->pNext;
121 if(pPreviousNode)
122 pPreviousNode->Unref();
124 if(pTail)
125 pTail->Unref();
127 pTail = pNextTail;
131 *pRoot = pTail;
134 // // TODO: Now need to convert...
136 // *pRoot= pStart;
138 // // Connect up the rest of the nodes to make a lattice
139 // SCENode *pHead(pStart);
141 // while(pHead) {
142 // SCENode *pNewChild(pHead->GetChild());
143 // SCENode *pCurrent(pHead->GetNext());
145 // while(pCurrent) {
146 // pCurrent->SetChild(pNewChild);
147 // pCurrent = pCurrent->GetNext();
148 // }
150 // pHead = pHead->GetChild();
151 // }
153 return 1;
155 else{
156 *pRoot = 0;
157 return 0;
161 unsigned int CPinYinConversionHelper::GetSumPYProbs(Dasher::CLanguageModel::Context context, SCENode * pPYCandStart, int norm){
163 std::vector <unsigned int > Probs;
164 unsigned int sumProb=0;
166 m_pLanguageModel->GetProbs(context, Probs, norm);
168 SCENode * pCurrentNode = pPYCandStart;
170 while(pCurrentNode){
172 std::vector <symbol >Symbols;
173 std::string HZ = static_cast<std::string>(pCurrentNode->pszConversion);
174 // Distribute the remaining space evenly
176 m_pAlphabet->GetSymbols(&Symbols, &HZ, 0);
178 if(Symbols.size()!=0)
179 sumProb += Probs[Symbols[0]];
180 pCurrentNode = pCurrentNode->GetNext();
184 return sumProb;
187 void CPinYinConversionHelper::GetProbs(Dasher::CLanguageModel::Context context, std::vector < unsigned int >&Probs, int norm){
193 void CPinYinConversionHelper::AssignSizes(SCENode * pStart, Dasher::CLanguageModel::Context context, long normalization, int uniform, int iNChildren){
195 SCENode *pNode = pStart;
197 std::vector <unsigned int > Probs;
199 int iSymbols = m_pAlphabet->GetNumberSymbols();
200 int iLeft(iNChildren);
201 int iRemaining(normalization);
203 int uniform_add;
204 int nonuniform_norm;
205 int control_space;
206 int iNorm = normalization;
209 //IGNORE CONTROL MODE FOR NOW
210 // if(!GetBoolParameter(BP_CONTROL_MODE)) {
211 control_space = 0;
212 uniform_add = ((iNorm * uniform) / 1000) / (iSymbols - 2); // Subtract 2 from no symbols to lose control/root nodes
213 nonuniform_norm = iNorm - (iSymbols - 2) * uniform_add;
214 // }
216 else {
217 control_space = int (iNorm * 0.05);
218 uniform_add = (((iNorm - control_space) * uniform / 1000) / (iSymbols - 2)); // Subtract 2 from no symbols to lose control/root nodes
219 nonuniform_norm = iNorm - control_space - (iSymbols - 2) * uniform_add;
224 // context = m_pLanguageModel->CreateEmptyContext();
226 //Testing Code for PYCHelper GetPYSumProbs
229 CLanguageModel::Context iContext = m_pLanguageModel->CreateEmptyContext();
231 SCENode * pTemp = pStart;
232 while(pTemp){
233 std::cout<<"test sum probs"<<GetSumPYProbs(iContext, pTemp, nonuniform_norm)<<std::endl;
234 std::cout<<"test norm"<<nonuniform_norm<<std::endl;
235 pTemp=pTemp->pChild;
239 m_pLanguageModel->GetProbs(context, Probs, nonuniform_norm);
242 std::vector<unsigned int>::iterator it;
243 for(it = Probs.begin();it!=Probs.end(); it++)
244 std::cout<<*it<<",";
246 std::cout<<"end"<<std::endl;
249 // unsigned int sum;
252 unsigned long long int sumProb=0;
254 std::vector <symbol >Symbols;
255 std::string HZ;
256 CLanguageModel::Context iCurrentContext;
259 //std::cout<<"start"<<std::endl;
260 while(pNode){
262 Symbols.clear();
263 HZ = static_cast<std::string>(pNode->pszConversion);
264 m_pAlphabet->GetSymbols(&Symbols, &HZ, 0);
266 if(Symbols.size()!=0){
267 pNode->Symbol = Symbols[0];
268 //sumProb += Probs[Symbols[0]];
271 iCurrentContext=m_pLanguageModel->CloneContext(context);
272 m_pLanguageModel->EnterSymbol(iCurrentContext, pNode->Symbol);
274 if(pStart->GetChild()){
275 pNode->SumPYProbStore = GetSumPYProbs(iCurrentContext, pStart->GetChild(), nonuniform_norm);
276 //std::cout<<"sumpyprobstore"<<pNode->SumPYProbStore<<std::endl;
278 else
279 pNode->SumPYProbStore = 1;
281 sumProb += (Probs[pNode->Symbol]*(pNode->SumPYProbStore));
282 //std::cout<<"Probs[symbol]"<<Probs[Symbols[0]]<<std::endl;
283 //std::cout<<"sumProbs"<<sumProb<<std::endl;
285 else
286 pNode->Symbol = -1;
288 pNode = pNode->GetNext();
293 pNode = pStart;
294 while(pNode){
296 std::vector <symbol >Symbols;
297 std::string HZ = static_cast<std::string>(pNode->pszConversion);
300 m_pAlphabet->GetSymbols(&Symbols, &HZ, 0);
303 if(pNode->Symbol!=-1){
304 if(sumProb!=0){
306 // iCurrentContext=m_pLanguageModel->CloneContext(context);
307 //m_pLanguageModel->EnterSymbol(iCurrentContext, pNode->Symbol);
310 pNode->NodeSize =static_cast<unsigned long long int>(Probs[pNode->Symbol])*(pNode->SumPYProbStore)*normalization/sumProb;
313 std::cout<<"HZ"<<pNode->pszConversion<<std::endl;
314 std::cout<<"Probs"<<Probs[pNode->Symbol]<<std::endl;
315 std::cout<<"SumProbStore"<<pNode->SumPYProbStore<<std::endl;
316 std::cout<<"above"<<Probs[pNode->Symbol]*(pNode->SumPYProbStore)<<std::endl;
317 std::cout<<"sumprob"<<sumProb<<std::endl;
318 std::cout<<"nodesize"<<pNode->NodeSize<<std::endl;
322 else{
323 pNode->NodeSize = 0;//hopefully this will be not be displayed
326 if(pNode->NodeSize < 1)
327 pNode->NodeSize = 1;
329 iRemaining -= pNode->NodeSize;
331 pNode = pNode->GetNext();
334 pNode = pStart;
336 while(pNode){
338 int iDiff(iRemaining / iLeft);
340 pNode->NodeSize += iDiff;
342 iRemaining -= iDiff;
343 --iLeft;
344 pNode = pNode->GetNext();
347 pNode = pStart;
348 while(pNode){
349 std::cout<<"size"<<pNode->NodeSize<<std::endl;
350 pNode = pNode ->pNext;
354 pNode = pStart;
355 while(pNode){
356 std::cout<<pNode->NodeSize<<",";
357 pNode = pNode->pNext;
360 std::cout<<std::endl;
363 //std::cout<<catStr<<std::endl;
365 //for(int i=0; i<Symbols.size(); i++)
366 // std::cout<<Symbols[i]<<",";
368 //std::cout<<std::endl;
370 //for(int i=0; i<Symbols.size();i++)
371 // std::cout<<m_pAlphabet->GetText(Symbols[i]);
373 //std::cout<<std::endl;
375 // pSizes[i] = m_pNCManager->GetLongParameter(LP_NORMALIZATION)*(100+5*freq[i])/(100*iNChildren+5*totalFreq);
379 bool CPinYinConversionHelper::GetPhraseList(int HZIndex, SCENode ** psOutput, int CMid){
380 // SCENode * pStart;
382 // if(CEGetPhraseList(HZIndex, &pStart, CMid)){
384 // *psOutput= pStart;
386 // return 1;
387 // }
388 // else{
389 // *psOutput = 0;
390 // return 0;
391 // }
393 return 0;
396 void CPinYinConversionHelper::BuildDataBase(){
398 std::vector<std::vector<std::vector<std::vector<std::vector<int> > > > >IndexStack;
399 std::vector<std::vector<std::vector<std::vector<int> > > >CandStack;
400 std::vector<std::vector<std::vector<int> > > subIndexStack;
401 std::vector<std::vector<int> > subCandStack;
402 std::vector<int> cell;
404 vContextData.clear();
405 cell.push_back(0);
406 subCandStack.push_back(cell);
409 for(int i(0); i<MAX_HZ_NUM; i++){
410 for(int j(0); j<MAX_CARE_CAND; j++){
411 for(int k(0); k<i+1; k++)
412 subIndexStack.push_back(subCandStack);
413 CandStack.push_back(subIndexStack);
414 subIndexStack.clear();
416 IndexStack.push_back(CandStack);
417 CandStack.clear();
420 for(int k(0); k<MAX_CM_NUM; k++)
421 vContextData.push_back(IndexStack);
425 void CPinYinConversionHelper::ClearData(int CMid){
427 std::vector<int> cell;
428 cell.push_back(0);
430 for(int i(0); i<MAX_HZ_NUM; i++){
431 for(int j(0); j<MAX_CARE_CAND/*(m_pRoot[i]->pChild->IsHeadAndCandNum)*/; j++){
432 for(int k(0); k<i+1; k++){
433 vContextData[CMid][i][j][k].clear();
434 vContextData[CMid][i][j][k].push_back(cell);
442 std::vector<std::vector<std::vector<std::vector<std::vector<int> > > > > * CPinYinConversionHelper::GetDP(int CMid){
444 return &vContextData[CMid];
450 //THIS FUNCTION IS CALLED WHEN A SET OF CHILDREN IS BEING POPULATED
451 //AND BEFORE CALCULATING EACH NODE'S SCORE. THE POSITION IN THE
452 //SENTENCE IS GIVEN TO THE PY HELPER->LIBRARY AND A LIST OF PHRASES
453 //CORRESPONDING TO THE CHARACTER IS RETURNED. THESE PHRASES ARE
454 //PROCESSED INTO THE CONTEXT DATA IN PY HELPER
456 void CPinYinConversionHelper::ProcessPhrase(HZIDX HZIndex){
458 SCENode * pPhraseList;
459 SCENode * pNode;
461 // bool stop=0;
463 int iIdx(0);
464 int i;
465 int score[m_iHZCount-HZIndex];
467 CANDIDX CandIndex[m_iHZCount-HZIndex]; //list to store candidates
468 //returned from HZlookup,
469 //used to allocate data
470 std::string strtemp;
472 std::vector<int> cell;
474 if(!(GetPhraseList(HZIndex, &pPhraseList, m_iCMID)))
475 return;
477 if(pPhraseList->AcCharCount>4)
478 pNode = pPhraseList->GetNext();
479 else
480 pNode = pPhraseList;
482 while((pNode)&&(iIdx<=MAX_CARE_PHRASE)){
484 //this section needs research. What scores would be a good estimate.
486 switch(pNode->AcCharCount){
487 case 2:
488 score[0] = 2;
489 score[1] = 3;
490 break;
491 case 3:
492 score[0] = 3;
493 score[1] = 4;
494 score[2] = 5;
495 break;
496 case 4:
497 score[0] = 4;
498 score[1] = 5;
499 score[2] = 6;
500 score[3] = 7;
501 break;
502 default:
503 for(int j(0); j< m_iHZCount-HZIndex; j++)
504 score[j] = 5+j;
505 break;
509 for(i=0 ; (i<pNode->AcCharCount); i++){
510 strtemp=pNode->pszConversion;
512 //TESTING
513 //std::cout<<"accharcount"<<pNode->AcCharCount<<std::endl;
514 //std::cout<<"the cut string is"<<strtemp.substr(3*i,3)<<std::endl;
515 //std::cout<<"list to look from
516 //is"<<m_pRoot[HZIndex+i]->pChild->pszConversion<<std::endl;
519 CandIndex[i] = HZLookup(HZIndex+i, strtemp.substr(3*i, 3));
521 //TESTING
522 //std::cout<<"the lookup is"<<CandIndex[i]<<std::endl;
524 if(CandIndex[i]==-1)
525 break;
526 else{
527 cell.push_back(score[i]);
528 for(int j(0); j< i; j++)
529 cell.push_back(CandIndex[i-j-1]);
532 //say the phrase is XYZ(this) push back in each cell in the
533 //order: score, Z, Y, X so as to match with vTrace in
534 //calculatescore
536 if(!(HZIndex + i> MAX_HZ_NUM -1))
537 (*(GetDP(m_iCMID)))[HZIndex +i][CandIndex[i]][HZIndex].push_back(cell);
538 cell.clear();
543 pNode = pNode ->GetNext();
544 iIdx ++;
546 m_bPhrasesProcessed[HZIndex]=1;
551 CANDIDX CPinYinConversionHelper::HZLookup(HZIDX HZIndex, const std::string &strSource){
554 // this was done before candindex was put into node member, change
555 // if have time
557 // int iIdx(0);
558 if((HZIndex > m_iHZCount-1)||strSource.size()!=3)
559 return -1;
561 // TODO: Reimplement -----
563 // SCENode * pNode = m_pRoot[HZIndex]->pChild;
565 // while(pNode&&(iIdx<=MAX_CARE_CAND)){
567 // if(strSource== pNode->pszConversion)
568 // return iIdx;
569 // pNode = pNode->pNext;
570 // iIdx++;
571 // }
573 // -----
575 return -1;
578 //CALCULATES SCORE OF A CERTAIN CANDIDATE HZ CHARACTER NODE TO BE
579 //POPULATED, FROM THE CONTEXT DATA IN PY HELPER. FINDS VTRACE TO MATCH
580 //CONTEXT SEQUENCE STORED IN THE LAST LEVEL OF DATABASE
582 int CPinYinConversionHelper::CalculateScore(CDasherNode * pNode, CANDIDX CandIndex){
583 CDasherNode *pIterateDNode(pNode);
584 SCENode *pTemp;
586 HZIDX HZIndex;
587 int score=0;
589 bool addtick=1; //bool to signal add score
593 //THIS SECTION IS TO FIND VTRACE, IN THE SAME WAY AS FINDING INPUT
594 //PY STRING
596 if(m_bTraceNeeded){
597 vTrace.clear();
599 while(pIterateDNode&&(pIterateDNode->m_pNodeManager->GetID() == 2)) {
601 pTemp=static_cast<SCENode*>(pIterateDNode->m_pUserData);
603 if(!pTemp)
604 pIterateDNode=0;
605 else{
606 vTrace.push_back(pTemp->CandIndex);
607 pIterateDNode= pIterateDNode->Parent();
611 //THE FOLLOWING IS TESTING FOR VTRACE
612 // if(vTrace.size()!=0){
613 // std::cout<<"signal"<<std::endl;
614 // for(std::vector<int>::iterator it(vTrace.begin());it!=vTrace.end();it++)
615 // std::cout<< *it <<std::endl;
619 m_bTraceNeeded= false;
624 if(CandIndex>=MAX_CARE_CAND)
625 return 0;
629 pTemp=static_cast<SCENode*>(pNode->m_pUserData);
631 if(pTemp)
632 HZIndex = pTemp->AcCharCount;
633 else
634 HZIndex = 0;
637 //THE DATA IS CONSTRUCTED OF UNITS OF SINGLE CELLS STORING CONTEXT SEQUENCE
638 //AND A CORRESPONDING SCORE
640 //LEVEL 1 : HZ INDEX : NUMBER OF CHARACTERS CONVERTED
641 //LEVEL 2 : CAND INDEX : NUMBER OF CANDIDATES WITH EACH POSITION
642 //LEVEL 3 : SUB HZ INDEX :(COULD BE REDUNDANT) CORRESPONDES TO
643 // INDEX OF CHARACTERS WHICH WERE CONSISTED IN PHRASES
644 //LEVEL 4 : SUB CAND INDEX : WHICH CAND WAS IN THE PHRASE
645 //LEVLE 5 : CELL: STORING PHASES IN HZ INDEX AND ASSIGNED SCORE
646 // IN THE WAY: 1.SCORE 2.Z 3.Y 4.X FOR PHRASE XYZ
647 // PREVIOUSLY PROCESSED
649 for(std::vector<std::vector<std::vector<int> > >::iterator itIndex((*(GetDP(m_iCMID)))[HZIndex][CandIndex].begin()); itIndex!=(*(GetDP(m_iCMID)))[HZIndex][CandIndex].end();itIndex++){
651 for(std::vector<std::vector<int> >::iterator itCand(itIndex->begin()); itCand!=itIndex->end(); itCand++){
653 //IF HAS LEFT CONTEXT INFORMATION, MATCH VTRACE WITH CELL
654 //POSITION 1
656 if((*itCand).size() !=1){
657 for(unsigned int i(0); i<(*itCand).size()-1; i++)
658 if((*itCand)[i+1]!=vTrace[i]){
659 addtick = 0;
660 break;
662 if(addtick)
663 score+=(*itCand)[0];
664 addtick =1;
666 else
667 score+=(*itCand)[0];
671 return score;
675 void CPinYinConversionHelper::TrainChPPM(){
677 // TODO: Changed to 1 from 10
678 for(int i =0; i<1;i++)
679 ProcessFile(i);
683 void CPinYinConversionHelper::ProcessFile(int index){
686 CLanguageModel::Context trainContext;
687 trainContext = m_pLanguageModel->CreateEmptyContext();
689 FILE * fp;
691 char strPath[200];
693 const char* Alph="ABCDEFGHJKLMNPR";
695 char str[4];
696 std::string HZ;
697 // int i, j, iLen;
701 long pos=0;
702 char cget;
704 int trialcount=0;
707 strcpy (strPath, (char *) getenv ("HOME"));
708 strcat (strPath, "/training/corpus/character/");
709 strcat (strPath, "C");
710 strncat (strPath, Alph+index, 1);
711 strcat (strPath, ".txt");
713 printf("strPath is %s\n", strPath);
715 fp = fopen (strPath, "rb");
717 if (!fp)
718 printf("cannot open file or incorrect directory\n");
720 while(!feof(fp)){
721 pos = ftell(fp);
722 cget=fgetc(fp);
724 // printf("OXE$4= %d\n", (unsigned char)0xE4); **228**
725 // printf("OXE9= %d\n", (unsigned char)0xE9); **233**
726 //printf("BEFORE print the integer code for unsigned char %d\n", (unsigned char) cget);
727 while (((unsigned char)cget < (unsigned char) 0xE4 || (unsigned char) cget > (unsigned char) 0xE9)&&!feof(fp)){
729 if((unsigned char) cget > (unsigned char) 0xE9){
731 fseek(fp, pos, SEEK_SET);
732 fread(str, sizeof(char)*3, 1, fp);
733 str[3]='\0';
734 pos = ftell(fp);
735 cget = fgetc(fp);
736 //printf("UNICODE SYMBOL/NUMBER |%s|\n", str);
739 else if(cget == 32){
740 pos = ftell(fp);
741 cget=fgetc(fp);
742 //fputc(32, op);
744 else if(cget ==10){
745 pos = ftell(fp);
746 cget=fgetc(fp);
747 //fputc(10, op);
749 else{
750 //printf("NON-UNICODE character |%c|\n", cget);
752 pos = ftell(fp);
753 cget=fgetc(fp);
754 trialcount ++;
758 if(!feof(fp)){
760 fseek(fp, pos, SEEK_SET);
761 fread(str, sizeof(char)*3, 1, fp);
762 str[3]='\0';
763 //printf("HZ |%s|\n",str);
764 // printf("first byte %d ", (unsigned char)str[0]);
765 // printf("seconde byte %d\n", (unsigned char)str[1]);
766 // printf("third byte %d\n", (unsigned char)str[2]);
768 HZ = static_cast<std::string>(str);
770 // std::cout<<"HZ is "<<HZ<<std::endl;
772 std::vector<symbol> Sym;
773 m_pAlphabet->GetSymbols(&Sym, &HZ, 0);
775 if(Sym.size()!=0)
776 m_pLanguageModel->LearnSymbol(trainContext, Sym[0]);
777 else
778 std::cout<<HZ<<"not found!"<<std::endl;
798 while(!feof(fp)){
799 // pos = ftell(fp);
800 cget=fgetc(fp);
802 if(!feof(fp)){
803 fread(str, sizeof(char)*3, 1, fp);
804 str[3]='\0';
805 printf("HZ |%s|\n",str);
806 // printf("first byte %d ", (unsigned char)str[0]);
807 // printf("seconde byte %d\n", (unsigned char)str[1]);
808 // printf("third byte %d\n", (unsigned char)str[2]);
810 HZ = static_cast<std::string>(str);
812 std::cout<<"HZ is "<<HZ<<std::endl;
814 std::vector<symbol> Sym;
815 m_pAlphabet->GetSymbols(&Sym, &HZ, 0);
817 if(Sym.size()!=0)
818 m_pLanguageModel->LearnSymbol(trainContext, Sym[0]);
819 //else
820 // std::cout<<"not found!"<<std::endl;