3 // Copyright (c) 2007 The Dasher Team
5 // This file is part of Dasher.
7 // Dasher is free software; you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation; either version 2 of the License, or
10 // (at your option) any later version.
12 // Dasher is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
17 // You should have received a copy of the GNU General Public License
18 // along with Dasher; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef __alphabetmanager_h__
22 #define __alphabetmanager_h__
24 #include "LanguageModelling/LanguageModel.h"
25 #include "DasherNode.h"
26 #include "NodeManager.h"
28 #include "Alphabet/AlphInfo.h"
29 #include "SettingsStore.h"
30 #include "Observable.h"
31 #include "WordGeneratorBase.h"
33 class CNodeCreationManager
;
38 class CDasherInterfaceBase
;
43 /// Implementation of CNodeManager for regular 'alphabet' nodes, ie
44 /// the basic Dasher behaviour. Child nodes are populated according
45 /// to the appropriate alphabet file, with sizes given by the
48 /// Note Dec11, refactoring to allow subclasses to change how character
49 /// data is obtained from the alphabet. All information on valid symbol indices
50 /// and the tree of groups, is obtained from m_pBaseGroup, which is created
51 /// by a call to copyGroups. Besides this, the only routines accessing _symbol_
52 /// data from the alphabet are: CreateLanguageModel; GetTrainer;
53 /// GetColour (called from CSymbolNode constructor); CreateSymbolNode and
54 /// CSymbolNode::outputText(). [many other routines access e.g. default context, training file, and so on]
56 class CAlphabetManager
: public CNodeManager
, protected CSettingsUser
{
58 ///Create a new AlphabetManager. Note, not usable until Setup() called.
59 CAlphabetManager(CSettingsUser
*pCreateFrom
, CDasherInterfaceBase
*pInterface
, CNodeCreationManager
*pNCManager
, const CAlphInfo
*pAlphabet
);
61 ///Must be called after construction, before the AlphMgr is used. Calls
62 /// InitMap(), looks for a usable context-switch delimiter, and
63 /// calls CreateLanguageModel.
66 virtual void MakeLabels(CDasherScreen
*pScreen
);
67 ///Gets a new trainer to train this LM. Caller is responsible for deallocating the
69 virtual CTrainer
*GetTrainer();
71 /// Gets a (Game) Word Generator to make target sentences for the current alphabet
72 CWordGeneratorBase
*GetGameWords();
74 virtual ~CAlphabetManager();
75 /// Flush to the user's training file everything written in this AlphMgr
76 /// \param pInterface to use for I/O by calling WriteTrainFile(fname,txt)
77 void WriteTrainFileFull(CDasherInterfaceBase
*pInterface
);
79 ///Initializes the alphabet map (m_map) from the characters in the alphabet.
80 /// Called from Setup(), i.e. before the manager is or need be usable.
81 /// The default adds all symbols in the alphabet to the map (inc. dealing
82 /// with the paragraph symbol, if any), and DASHER_ASSERTs that all such
83 /// characters have distinct texts.
84 virtual void InitMap();
86 ///Creates the LM, and stores in m_pLanguageModel.
87 /// Default implementation switches on LP_LANGUAGE_MODEL_ID.
88 /// Note subclasses changing the interpretation of the AlphInfo, should override
89 /// this to take account of its new meaning.
90 virtual void CreateLanguageModel();
92 ///Base of all group+character information presented to the user;
93 /// created by calling copyGroups on the alphabet.
94 SGroupInfo
*m_pBaseGroup
;
95 ///Called to create the base group the AlphMgr will use from the alphabet.
96 /// The default implementation elides all single-element groups, and fills in
97 /// m_mGroupLabels and m_vLabels using the supplied screen; subclasses may
98 /// override to do more, but should call the superclass method to set up the
100 /// (Note: each invocation creates labels for all symbols in pBase, *and*
101 /// all symbols in any later siblings of pBase (by recursive call on pNext).
102 /// Of those, symbols in any child groups may be made by recursive call on
103 /// pChild, but only if pBase has >1 child node (symbol/group).)
104 virtual SGroupInfo
*copyGroups(const SGroupInfo
*pBase
, CDasherScreen
*pScreen
);
106 ///A label for each group in the elided tree
107 std::map
<const SGroupInfo
*,CDasherScreen::Label
*> m_mGroupLabels
;
108 ///A label for each symbol, indexed by symbol id (element 0 = null)
109 std::vector
<CDasherScreen::Label
*> m_vLabels
;
111 virtual const std::string
&GetLabelText(symbol i
) const;
114 /// Abstract superclass for alphabet manager nodes, provides common implementation
115 /// code for rebuilding parent nodes = reversing.
116 class CAlphBase
: public CDasherNode
{
118 CAlphabetManager
*mgr() const {return m_pMgr
;}
119 ///Rebuilds this node's parent by recreating the previous 'root' node,
120 /// then calling RebuildForwardsFromAncestor
121 CDasherNode
*RebuildParent();
122 ///Called to build a symbol (leaf) node which is a descendant of the symbol or root node preceding this.
123 /// Default implementation just calls the manager's CreateSymbolNode method to create a new node,
124 /// but subclasses can override to graft themselves into the appropriate point beneath the previous node.
125 /// \param pParent parent of the symbol node to create; could be the previous root, or an intervening node (e.g. group)
126 /// \param iBkgCol background colour to show through any new transparent node created;
127 /// if the existing node is grafted in, again this will already have been taken into account.
128 virtual CDasherNode
*RebuildSymbol(CAlphNode
*pParent
, symbol iSymbol
);
129 ///Called to build a group node which is a descendant of the symbol or root node preceding this.
130 /// Default implementation calls the manager's CreateGroupNode method to create a new node,
131 /// but then populates that group (i.e. further descends the hierarchy) _if_ that group
132 /// would contain this node (see IsInGroup). Subclasses can override to graft themselves into the hierarchy, if appropriate.
133 /// \param pParent parent of the symbol node to create; could be the previous root, or an intervening node (e.g. group)
134 virtual CDasherNode
*RebuildGroup(CAlphNode
*pParent
, int iBkgCol
, const SGroupInfo
*pInfo
);
135 ///Just keep track of the last node output (for training file purposes)
137 ///Just keep track of the last node output (for training file purposes)
140 ///Called in process of rebuilding parent: fill in the hierarchy _beneath_ the
141 /// the previous root node, by calling IterateChildGroups passing this node as
142 /// last parameter, until the point where this node fits in is found,
143 /// at which point RebuildSymbol/Group should graft it in.
144 /// \param pNewNode newly-created root node beneath which this node should fit
145 virtual void RebuildForwardsFromAncestor(CAlphNode
*pNewNode
);
146 CAlphBase(int iOffset
, int iColour
, CDasherScreen::Label
*pLabel
, CAlphabetManager
*pMgr
);
147 CAlphabetManager
*m_pMgr
;
148 ///Number of unicode characters entered by this node; i.e., the number
149 /// to take off this node's offset, to get the offset of the most-recent
150 /// root (e.g. previous symbol). Default is 0.
151 virtual int numChars() {return 0;}
152 ///return true if the specified group would contain this node
153 /// (as a symbol or subgroup), any number of levels beneath it
154 virtual bool isInGroup(const SGroupInfo
*pGroup
)=0;
156 ///Additionally stores LM contexts and probabilities calculated therefrom
157 class CAlphNode
: public CAlphBase
{
159 CAlphNode(int iOffset
, int iColour
, CDasherScreen::Label
*pLabel
, CAlphabetManager
*pMgr
);
160 CLanguageModel::Context iContext
;
162 /// Delete any storage alocated for this node
164 virtual ~CAlphNode();
165 ///Have to call this from CAlphabetManager, and from CGroupNode on a _different_ CAlphNode, hence public...
166 virtual std::vector
<unsigned int> *GetProbInfo();
167 virtual int ExpectedNumChildren();
169 std::vector
<unsigned int> *m_pProbInfo
;
171 class CSymbolNode
: public CAlphNode
{
173 ///Standard constructor, gets colour from GetColour(symbol,offset) and label from current alphabet
174 /// Note we treat GetColour() as always returning an opaque color.
175 CSymbolNode(int iOffset
, CDasherScreen::Label
*pLabel
, CAlphabetManager
*pMgr
, symbol iSymbol
);
177 ///Create the children of this node, by starting traversal of the alphabet from the top
178 virtual void PopulateChildren();
179 virtual void Output();
181 ///Override to provide symbol number, probability, _edit_ text from alphabet
182 virtual SymbolProb
GetSymbolProb() const;
184 virtual void SetFlag(int iFlag
, bool bValue
);
186 virtual bool GameSearchNode(symbol sym
);
187 virtual void GetContext(CDasherInterfaceBase
*pInterface
, const CAlphabetMap
*pAlphabetMap
, std::vector
<symbol
> &vContextSymbols
, int iOffset
, int iLength
);
188 virtual symbol
GetAlphSymbol();
189 ///Override: if the symbol to create is the same as this node's symbol, return this node instead of creating a new one
190 virtual CDasherNode
*RebuildSymbol(CAlphNode
*pParent
, symbol iSymbol
);
192 virtual const std::string
&outputText() const;
193 ///Text to write to user training file/buffer when this symbol output.
194 /// Default just returns the output text escaped if necessary.
195 virtual std::string
trainText();
196 /// Number of unicode _characters_ (not octets) for this symbol.
197 /// Uniquely, a paragraph symbol can enter two distinct unicode characters
198 /// (i.e. '\r' and '\n'); every other symbol enters only a single
199 /// unicode char, even if that might take >1 octet.
201 ///Compatibility constructor, so that subclasses can specify their own colour & label
202 CSymbolNode(int iOffset
, int iColour
, CDasherScreen::Label
*pLabel
, CAlphabetManager
*pMgr
, symbol _iSymbol
);
203 ///Override: true iff pGroup encloses this symbol (according to its start/end symbol#)
204 bool isInGroup(const SGroupInfo
*pGroup
);
205 const symbol iSymbol
;
208 class CGroupNode
: public CAlphNode
{
210 CGroupNode(int iOffset
, CDasherScreen::Label
*pLabel
, int iBkgCol
, CAlphabetManager
*pMgr
, const SGroupInfo
*pGroup
);
212 ///Override: if m_pGroup==NULL, i.e. whole/root-of alphabet, cannot rebuild.
213 virtual CDasherNode
*RebuildParent();
215 ///Create children of this group node, by traversing the section of the alphabet
216 /// indicated by m_pGroup.
217 virtual void PopulateChildren();
218 virtual int ExpectedNumChildren();
219 virtual bool GameSearchNode(symbol sym
);
220 std::vector
<unsigned int> *GetProbInfo();
221 ///Override: if the group to create is the same as this node's group, return this node instead of creating a new one
222 virtual CDasherNode
*RebuildGroup(CAlphNode
*pParent
, int iBkgCol
, const SGroupInfo
*pInfo
);
224 ///Override: true if pGroup encloses this one (by start/end symbol#)
225 bool isInGroup(const SGroupInfo
*pGroup
);
227 const SGroupInfo
*m_pGroup
;
232 /// Get a new root node owned by this manager
233 /// pContext - node from which to extract context (e.g. perhaps an un-seen node);
234 /// the new root is NOT made a child, and initially has no parent.
235 /// bEnteredLast - true if this "root" node should be considered as entering the preceding symbol
236 /// Offset is the index of the character which _child_ nodes (i.e. between which this root allows selection)
237 /// will enter. (Also used to build context for preceding characters.)
238 /// Note, the new node will _not_ be NF_SEEN
239 CAlphNode
*GetRoot(CDasherNode
*pContext
, bool bEnteredLast
, int iOffset
);
241 const CAlphInfo
*GetAlphabet() const;
244 ///Called to get the symbols in the context for (preceding) a new node
245 /// \param pParent node to assume has been output, when obtaining context
246 /// \param iRootOffset offset of the node that will be constructed; i.e. context should include symbols
247 /// up to & including this offset.
248 /// \param pAlphMap use to convert entered text into symbol numbers
249 /// (could be the managers m_pAlphabetMap, but subclasses can pass in something different)
250 /// \return pair: first element is the last symbol in the context, _if_ a usable context
251 /// could be extracted, else 0 (=> couldn't get context, using alphabet default); second
252 /// element is the result of entering the symbols retrieved, into a fresh LM context.
253 std::pair
<symbol
, CLanguageModel::Context
> GetContextSymbols(CDasherNode
*pParent
, int iRootOffset
, const CAlphabetMap
*pAlphMap
);
255 ///Called to create a node for a given symbol (leaf), as a child of a specified parent node
256 /// \param iBkgCol colour behind the new node, i.e. that should show through if the (group) node is transparent
257 virtual CDasherNode
*CreateSymbolNode(CAlphNode
*pParent
, symbol iSymbol
);
258 virtual CGroupNode
*CreateGroupNode(CAlphNode
*pParent
, int iBkgCol
, const SGroupInfo
*pInfo
);
259 ///Called to create a new symbol root, e.g. for going backwards
260 /// \param iOffset index of symbol entered by the node
261 /// \param sym symbol number as returned as first element of GetContextSymbols
262 virtual CAlphNode
*CreateSymbolRoot(int iOffset
, CLanguageModel::Context ctx
, symbol sym
);
264 ///Called to compute colour for a symbol at a specified offset.
265 /// Wraps CAlphabet::GetColour(sym), but (a) implements a default
266 /// scheme for symbols not specifying a colour, and (b) implements
267 /// colour-cycling by phase (two cycles, using the LSBit of offset)
268 virtual int GetColour(symbol sym
, int iOffset
) const;
270 CDasherInterfaceBase
* const m_pInterface
;
272 CLanguageModel
*m_pLanguageModel
;
274 CNodeCreationManager
*m_pNCManager
;
275 const CAlphInfo
*m_pAlphabet
;
279 ///Wraps m_pLanguageModel->GetProbs to implement nonuniformity
280 /// (also leaves space for NCManager::AddExtras to add control node)
281 /// Returns array of non-cumulative probs. Should this be protected and/or virtual???
282 void GetProbs(std::vector
<unsigned int> *pProbs
, CLanguageModel::Context iContext
);
284 ///Constructs child nodes under the specified parent according to provided group.
285 /// Nodes are created by calling CreateSymbolNode and CreateGroupNode, unless buildAround is non-null.
286 /// \param pParentGroup group describing which symbols and/or subgroups should be constructed
287 /// (these will fill the parent), or NULL meaning the entire alphabet (i.e. toplevel groups
288 /// and symbols not in any group).
289 /// \param buildAround if non-null, its RebuildSymbol and RebuildGroup methods will be called
290 /// instead of the AlphabetManager's CreateSymbolNode/CreateGroupNode methods. This is used when
291 /// rebuilding parents: passing in the pre-existing node here, allows it to intercept those calls
292 /// and graft itself in in place of a new node, when appropriate.
293 void IterateChildGroups(CAlphNode
*pParent
, const SGroupInfo
*pParentGroup
, CAlphBase
*buildAround
);
295 ///Last node (owned by this manager) that was output; if a node
296 /// is Undo()ne, this is set to its parent. This is used to detect
297 /// context switches.
298 CDasherNode
*m_pLastOutput
;
299 ///Text actually written in the current context; both appended and truncated
300 /// as nodes are Output() and Undo()ne.
301 std::string strTrainfileBuffer
;
302 ///Context in (i.e. after) which anything in strTrainfileBuffer was written.
303 /// Set when first character put in strTrainfileBuffer (following a context switch),
304 /// as we may not be able to get the preceding characters if we wait too long.
305 std::string strTrainfileContext
;
307 ///A character, 33<=c<=255, not in the alphabet; used to delimit contexts.
308 ///"" if no such could be found (=> will be found on a per-context basis)
309 std::string m_sDelim
;