5 // Created by Alan Lawrence on 13/12/11.
6 // Copyright 2011 Cambridge University. All rights reserved.
9 #include "RoutingAlphMgr.h"
10 #include "DasherInterfaceBase.h"
12 using namespace Dasher
;
14 // Track memory leaks on Windows to the line that new'd the memory
16 #ifdef _DEBUG_MEMLEAKS
17 #define DEBUG_NEW new( _NORMAL_BLOCK, THIS_FILE, __LINE__ )
20 static char THIS_FILE
[] = __FILE__
;
24 CRoutingAlphMgr::CRoutingAlphMgr(CSettingsUser
*pCreator
, CDasherInterfaceBase
*pInterface
, CNodeCreationManager
*pNCManager
, const CAlphInfo
*pAlphabet
)
25 : CAlphabetManager(pCreator
, pInterface
, pNCManager
, pAlphabet
) {
27 DASHER_ASSERT(pAlphabet
->m_iConversionID
==3 || pAlphabet
->m_iConversionID
==4);
30 void CRoutingAlphMgr::InitMap() {
31 m_vBaseSyms
.reserve(m_pAlphabet
->iEnd
); m_vBaseSyms
.push_back(0); //base for unknown route = unknown!
32 m_vRoutes
.push_back(set
<symbol
>()); //unknown base symbol has no routes
33 for (int i
=1; i
<m_pAlphabet
->iEnd
; i
++) {
34 symbol s
= m_map
.Get(m_pAlphabet
->GetText(i
));
37 m_vRoutes
.push_back(set
<symbol
>());
38 m_map
.Add(m_pAlphabet
->GetText(i
),s
);
40 m_vBaseSyms
.push_back(s
);
41 m_vRoutes
[s
].insert(i
);
43 m_vGroupsByRoute
.resize(m_vBaseSyms
.size());
44 vector
<const SGroupInfo
*> vGroups
;
45 DASHER_ASSERT(!m_pAlphabet
->pNext
);
46 vGroups
.push_back(m_pAlphabet
->pChild
);
47 while (!vGroups
.empty()) {
48 const SGroupInfo
*g(vGroups
.back()); vGroups
.pop_back();
50 for (int i
=g
->iStart
; i
<g
->iEnd
; i
++) m_vGroupsByRoute
[i
]=g
;
51 vGroups
.push_back(g
->pNext
);
52 vGroups
.push_back(g
->pChild
);
56 void CRoutingAlphMgr::CreateLanguageModel() {
57 m_pLanguageModel
= new CRoutingPPMLanguageModel(this, &m_vBaseSyms
, &m_vRoutes
, m_pAlphabet
->m_iConversionID
==4);
60 string
CRoutingAlphMgr::CRoutedSym::trainText() {
61 const set
<symbol
> &routes(mgr()->m_vRoutes
[mgr()->m_vBaseSyms
[iSymbol
]]);
62 DASHER_ASSERT(routes
.count(iSymbol
));
63 string t
=CSymbolNode::trainText();
65 if (const SGroupInfo
*g
= mgr()->m_vGroupsByRoute
[iSymbol
])
66 return mgr()->m_pAlphabet
->m_strConversionTrainStart
+ g
->strName
+ mgr()->m_pAlphabet
->m_strConversionTrainStop
+ t
;
70 CRoutingAlphMgr::CRoutedSym::CRoutedSym(int iOffset
, CDasherScreen::Label
*pLabel
, CRoutingAlphMgr
*pMgr
, symbol iSymbol
)
71 : CSymbolNode(iOffset
, pLabel
, pMgr
, iSymbol
) {
75 CAlphabetManager::CAlphNode
*CRoutingAlphMgr::CreateSymbolRoot(int iOffset
, CLanguageModel::Context ctx
, symbol sym
) {
76 //sym is from the map, so a base symbol. It's at the end of the context,
77 // TODO unless this is the completely-empty context,
78 // so ask the LM for which way it's most likely to have been entered
79 sym
= static_cast<CRoutingPPMLanguageModel
*>(m_pLanguageModel
)->GetBestRoute(ctx
);
80 return new CRoutedSym(iOffset
, m_vLabels
[sym
], this, sym
);
83 int CRoutingAlphMgr::GetColour(symbol route
, int iOffset
) const {
84 int iColour
= m_pAlphabet
->GetColour(route
); //colours were rehashed with CH symbol text
86 //none specified in alphabet
87 static int colourStore
[2][3] = {
95 return colourStore
[iOffset
&1][route
% 3];
97 if ((iOffset
&1)==0 && iColour
<130) iColour
+=130;
102 CDasherNode
*CRoutingAlphMgr::CreateSymbolNode(CAlphNode
*pParent
, symbol iSymbol
) {
104 int iNewOffset
= pParent
->offset()+1;
105 if (m_pAlphabet
->GetText(iSymbol
)=="\r\n") iNewOffset
++;
106 CSymbolNode
*pAlphNode
= new CRoutedSym(iNewOffset
, m_vLabels
[iSymbol
], this, iSymbol
);
108 pAlphNode
->iContext
= m_pLanguageModel
->CloneContext(pParent
->iContext
);
110 //namely, we want to enter only the BASE symbol into the LM, not the route
111 // (which would be out of range):
112 m_pLanguageModel
->EnterSymbol(pAlphNode
->iContext
, m_vBaseSyms
[iSymbol
]);
113 // (Unfortunately, we can't make EnterSymbol take route numbers, because
114 // it has base symbols passed to it from the alphabet map)
119 CRoutingAlphMgr::CRoutingTrainer::CRoutingTrainer(CMessageDisplay
*pMsgs
, CRoutingAlphMgr
*pMgr
)
120 : CTrainer(pMsgs
, pMgr
->m_pLanguageModel
, pMgr
->m_pAlphabet
, &pMgr
->m_map
), m_pMgr(pMgr
) {
123 vector
<symbol
> trainStartSyms
;
124 m_pAlphabet
->GetSymbols(trainStartSyms
, m_pInfo
->m_strConversionTrainStart
);
125 if (trainStartSyms
.size()==1)
126 m_iStartSym
= trainStartSyms
[0];
128 m_pMsgs
->FormatMessageWithString(_("Warning: faulty alphabet definition: training-start delimiter %s must be a single unicode character. May be unable to process training file."),
129 m_pInfo
->m_strConversionTrainStart
.c_str());
132 symbol
CRoutingAlphMgr::CRoutingTrainer::getRoute(bool bHaveRoute
, const string
&strRoute
, symbol baseSym
) {
133 const set
<symbol
> &candidates(m_pMgr
->m_vRoutes
.at(baseSym
));
135 for (set
<symbol
>::iterator it
=candidates
.begin(); it
!=candidates
.end(); it
++)
136 if (const SGroupInfo
*g
=m_pMgr
->m_vGroupsByRoute
[*it
])
137 if (g
->strName
== strRoute
)
139 //if no name was given, but a single group with no name exists, use it!
140 if (named
.size()==1) return *(named
.begin());
141 //otherwise, we will not learn a route - but this is fine, we can learn
142 // that later more-or-less independently
145 m_pMsgs
->FormatMessageWith2Strings((named
.size()==0)
146 ? _("Warning: training file contains character '%s' as member of group '%s', but no group of that name contains the character. Ignoring group specifier.")
147 : _("Warning: training file contains character '%s' as member of group '%s', but alphabet contains several such groups. Dasher will not be able to learn how you want to write this character."),
148 m_pInfo
->GetDisplayText(baseSym
).c_str(),
151 // don't flag a problem if no route specified
156 void CRoutingAlphMgr::CRoutingTrainer::Train(CAlphabetMap::SymbolStream
&syms
) {
157 CLanguageModel::Context trainContext
= m_pLanguageModel
->CreateEmptyContext();
159 string strRoute
; bool bHaveRoute(false);
160 for (symbol sym
; (sym
=syms
.next(m_pAlphabet
))!=-1;) {
161 if (sym
== m_iStartSym
) {
162 if (sym
!=0 || syms
.peekBack()==m_pInfo
->m_strConversionTrainStart
) {
164 m_pMsgs
->FormatMessageWithString(_("Warning: in training file, annotation '<%s>' is followed by another annotation and will be ignored"),
166 strRoute
.clear(); bHaveRoute
=true;
167 for (string s
; (s
=syms
.peekAhead()).length(); strRoute
+=s
) {
168 syms
.next(m_pAlphabet
);
169 if (s
==m_pInfo
->m_strConversionTrainStop
) break;
171 continue; //read next, hopefully a CH (!)
172 } //else, unknown symbol, but does not match pinyin delimiter; fallthrough
174 if (readEscape(trainContext
, sym
, syms
)) continue; //TODO warn if py lost?
175 //OK, sym is a (CH) symbol to learn.
177 if (symbol route
= getRoute(bHaveRoute
, strRoute
, sym
))
178 m_pLanguageModel
->LearnSymbol(trainContext
, route
);
180 static_cast<CRoutingPPMLanguageModel
*>(m_pLanguageModel
)->LearnBaseSymbol(trainContext
, sym
);
181 } //else, silently drop - as standard CTrainer
182 bHaveRoute
=false; strRoute
.clear();
184 m_pLanguageModel
->ReleaseContext(trainContext
);
188 CTrainer
*CRoutingAlphMgr::GetTrainer() {
189 //We pass in the pinyin alphabet to define the context-switch escape character, and the default context.
190 // Although the default context will be symbolified via the _chinese_ alphabet, this seems reasonable
191 // as it is the Pinyin alphabet which defines the conversion mapping (i.e. m_strConversionTarget!)
192 return new CRoutingTrainer(m_pInterface
, this);