Updated German translation
[dasher.git] / Src / DasherCore / AlphabetManager.h
blob74370d72ee856a07081f56ea2b76c124859be6bd
1 // AlphabetManager.h
2 //
3 // Copyright (c) 2007 The Dasher Team
4 //
5 // This file is part of Dasher.
6 //
7 // Dasher is free software; you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation; either version 2 of the License, or
10 // (at your option) any later version.
12 // Dasher is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
17 // You should have received a copy of the GNU General Public License
18 // along with Dasher; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef __alphabetmanager_h__
22 #define __alphabetmanager_h__
24 #include "LanguageModelling/LanguageModel.h"
25 #include "DasherNode.h"
26 #include "NodeManager.h"
27 #include "Trainer.h"
28 #include "Alphabet/AlphInfo.h"
29 #include "SettingsStore.h"
30 #include "Observable.h"
31 #include "WordGeneratorBase.h"
33 class CNodeCreationManager;
34 struct SGroupInfo;
36 namespace Dasher {
38 class CDasherInterfaceBase;
40 /// \ingroup Model
41 /// @{
43 /// Implementation of CNodeManager for regular 'alphabet' nodes, ie
44 /// the basic Dasher behaviour. Child nodes are populated according
45 /// to the appropriate alphabet file, with sizes given by the
46 /// language model.
47 ///
48 /// Note Dec11, refactoring to allow subclasses to change how character
49 /// data is obtained from the alphabet. All information on valid symbol indices
50 /// and the tree of groups, is obtained from m_pBaseGroup, which is created
51 /// by a call to copyGroups. Besides this, the only routines accessing _symbol_
52 /// data from the alphabet are: CreateLanguageModel; GetTrainer;
53 /// GetColour (called from CSymbolNode constructor); CreateSymbolNode and
54 /// CSymbolNode::outputText(). [many other routines access e.g. default context, training file, and so on]
56 class CAlphabetManager : public CNodeManager, protected CSettingsUser {
57 public:
58 ///Create a new AlphabetManager. Note, not usable until Setup() called.
59 CAlphabetManager(CSettingsUser *pCreateFrom, CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet);
61 ///Must be called after construction, before the AlphMgr is used. Calls
62 /// InitMap(), looks for a usable context-switch delimiter, and
63 /// calls CreateLanguageModel.
64 void Setup();
66 virtual void MakeLabels(CDasherScreen *pScreen);
67 ///Gets a new trainer to train this LM. Caller is responsible for deallocating the
68 /// trainer later.
69 virtual CTrainer *GetTrainer();
71 /// Gets a (Game) Word Generator to make target sentences for the current alphabet
72 CWordGeneratorBase *GetGameWords();
74 virtual ~CAlphabetManager();
75 /// Flush to the user's training file everything written in this AlphMgr
76 /// \param pInterface to use for I/O by calling WriteTrainFile(fname,txt)
77 void WriteTrainFileFull(CDasherInterfaceBase *pInterface);
78 protected:
79 ///Initializes the alphabet map (m_map) from the characters in the alphabet.
80 /// Called from Setup(), i.e. before the manager is or need be usable.
81 /// The default adds all symbols in the alphabet to the map (inc. dealing
82 /// with the paragraph symbol, if any), and DASHER_ASSERTs that all such
83 /// characters have distinct texts.
84 virtual void InitMap();
86 ///Creates the LM, and stores in m_pLanguageModel.
87 /// Default implementation switches on LP_LANGUAGE_MODEL_ID.
88 /// Note subclasses changing the interpretation of the AlphInfo, should override
89 /// this to take account of its new meaning.
90 virtual void CreateLanguageModel();
92 ///Base of all group+character information presented to the user;
93 /// created by calling copyGroups on the alphabet.
94 SGroupInfo *m_pBaseGroup;
95 ///Called to create the base group the AlphMgr will use from the alphabet.
96 /// The default implementation elides all single-element groups, and fills in
97 /// m_mGroupLabels and m_vLabels using the supplied screen; subclasses may
98 /// override to do more, but should call the superclass method to set up the
99 /// labels too.
100 /// (Note: each invocation creates labels for all symbols in pBase, *and*
101 /// all symbols in any later siblings of pBase (by recursive call on pNext).
102 /// Of those, symbols in any child groups may be made by recursive call on
103 /// pChild, but only if pBase has >1 child node (symbol/group).)
104 virtual SGroupInfo *copyGroups(const SGroupInfo *pBase, CDasherScreen *pScreen);
106 ///A label for each group in the elided tree
107 std::map<const SGroupInfo *,CDasherScreen::Label *> m_mGroupLabels;
108 ///A label for each symbol, indexed by symbol id (element 0 = null)
109 std::vector<CDasherScreen::Label *> m_vLabels;
111 virtual const std::string &GetLabelText(symbol i) const;
113 class CAlphNode;
114 /// Abstract superclass for alphabet manager nodes, provides common implementation
115 /// code for rebuilding parent nodes = reversing.
116 class CAlphBase : public CDasherNode {
117 public:
118 CAlphabetManager *mgr() const {return m_pMgr;}
119 ///Rebuilds this node's parent by recreating the previous 'root' node,
120 /// then calling RebuildForwardsFromAncestor
121 CDasherNode *RebuildParent();
122 ///Called to build a symbol (leaf) node which is a descendant of the symbol or root node preceding this.
123 /// Default implementation just calls the manager's CreateSymbolNode method to create a new node,
124 /// but subclasses can override to graft themselves into the appropriate point beneath the previous node.
125 /// \param pParent parent of the symbol node to create; could be the previous root, or an intervening node (e.g. group)
126 /// \param iBkgCol background colour to show through any new transparent node created;
127 /// if the existing node is grafted in, again this will already have been taken into account.
128 virtual CDasherNode *RebuildSymbol(CAlphNode *pParent, symbol iSymbol);
129 ///Called to build a group node which is a descendant of the symbol or root node preceding this.
130 /// Default implementation calls the manager's CreateGroupNode method to create a new node,
131 /// but then populates that group (i.e. further descends the hierarchy) _if_ that group
132 /// would contain this node (see IsInGroup). Subclasses can override to graft themselves into the hierarchy, if appropriate.
133 /// \param pParent parent of the symbol node to create; could be the previous root, or an intervening node (e.g. group)
134 virtual CDasherNode *RebuildGroup(CAlphNode *pParent, int iBkgCol, const SGroupInfo *pInfo);
135 ///Just keep track of the last node output (for training file purposes)
136 void Undo();
137 ///Just keep track of the last node output (for training file purposes)
138 void Output();
139 protected:
140 ///Called in process of rebuilding parent: fill in the hierarchy _beneath_ the
141 /// the previous root node, by calling IterateChildGroups passing this node as
142 /// last parameter, until the point where this node fits in is found,
143 /// at which point RebuildSymbol/Group should graft it in.
144 /// \param pNewNode newly-created root node beneath which this node should fit
145 virtual void RebuildForwardsFromAncestor(CAlphNode *pNewNode);
146 CAlphBase(int iOffset, int iColour, CDasherScreen::Label *pLabel, CAlphabetManager *pMgr);
147 CAlphabetManager *m_pMgr;
148 ///Number of unicode characters entered by this node; i.e., the number
149 /// to take off this node's offset, to get the offset of the most-recent
150 /// root (e.g. previous symbol). Default is 0.
151 virtual int numChars() {return 0;}
152 ///return true if the specified group would contain this node
153 /// (as a symbol or subgroup), any number of levels beneath it
154 virtual bool isInGroup(const SGroupInfo *pGroup)=0;
156 ///Additionally stores LM contexts and probabilities calculated therefrom
157 class CAlphNode : public CAlphBase {
158 public:
159 CAlphNode(int iOffset, int iColour, CDasherScreen::Label *pLabel, CAlphabetManager *pMgr);
160 CLanguageModel::Context iContext;
162 /// Delete any storage alocated for this node
164 virtual ~CAlphNode();
165 ///Have to call this from CAlphabetManager, and from CGroupNode on a _different_ CAlphNode, hence public...
166 virtual std::vector<unsigned int> *GetProbInfo();
167 virtual int ExpectedNumChildren();
168 private:
169 std::vector<unsigned int> *m_pProbInfo;
171 class CSymbolNode : public CAlphNode {
172 public:
173 ///Standard constructor, gets colour from GetColour(symbol,offset) and label from current alphabet
174 /// Note we treat GetColour() as always returning an opaque color.
175 CSymbolNode(int iOffset, CDasherScreen::Label *pLabel, CAlphabetManager *pMgr, symbol iSymbol);
177 ///Create the children of this node, by starting traversal of the alphabet from the top
178 virtual void PopulateChildren();
179 virtual void Output();
180 virtual void Undo();
181 ///Override to provide symbol number, probability, _edit_ text from alphabet
182 virtual SymbolProb GetSymbolProb() const;
184 virtual void SetFlag(int iFlag, bool bValue);
186 virtual bool GameSearchNode(symbol sym);
187 virtual void GetContext(CDasherInterfaceBase *pInterface, const CAlphabetMap *pAlphabetMap, std::vector<symbol> &vContextSymbols, int iOffset, int iLength);
188 virtual symbol GetAlphSymbol();
189 ///Override: if the symbol to create is the same as this node's symbol, return this node instead of creating a new one
190 virtual CDasherNode *RebuildSymbol(CAlphNode *pParent, symbol iSymbol);
191 protected:
192 virtual const std::string &outputText() const;
193 ///Text to write to user training file/buffer when this symbol output.
194 /// Default just returns the output text escaped if necessary.
195 virtual std::string trainText();
196 /// Number of unicode _characters_ (not octets) for this symbol.
197 /// Uniquely, a paragraph symbol can enter two distinct unicode characters
198 /// (i.e. '\r' and '\n'); every other symbol enters only a single
199 /// unicode char, even if that might take >1 octet.
200 int numChars();
201 ///Compatibility constructor, so that subclasses can specify their own colour & label
202 CSymbolNode(int iOffset, int iColour, CDasherScreen::Label *pLabel, CAlphabetManager *pMgr, symbol _iSymbol);
203 ///Override: true iff pGroup encloses this symbol (according to its start/end symbol#)
204 bool isInGroup(const SGroupInfo *pGroup);
205 const symbol iSymbol;
208 class CGroupNode : public CAlphNode {
209 public:
210 CGroupNode(int iOffset, CDasherScreen::Label *pLabel, int iBkgCol, CAlphabetManager *pMgr, const SGroupInfo *pGroup);
212 ///Override: if m_pGroup==NULL, i.e. whole/root-of alphabet, cannot rebuild.
213 virtual CDasherNode *RebuildParent();
215 ///Create children of this group node, by traversing the section of the alphabet
216 /// indicated by m_pGroup.
217 virtual void PopulateChildren();
218 virtual int ExpectedNumChildren();
219 virtual bool GameSearchNode(symbol sym);
220 std::vector<unsigned int> *GetProbInfo();
221 ///Override: if the group to create is the same as this node's group, return this node instead of creating a new one
222 virtual CDasherNode *RebuildGroup(CAlphNode *pParent, int iBkgCol, const SGroupInfo *pInfo);
223 protected:
224 ///Override: true if pGroup encloses this one (by start/end symbol#)
225 bool isInGroup(const SGroupInfo *pGroup);
226 private:
227 const SGroupInfo *m_pGroup;
230 public:
232 /// Get a new root node owned by this manager
233 /// pContext - node from which to extract context (e.g. perhaps an un-seen node);
234 /// the new root is NOT made a child, and initially has no parent.
235 /// bEnteredLast - true if this "root" node should be considered as entering the preceding symbol
236 /// Offset is the index of the character which _child_ nodes (i.e. between which this root allows selection)
237 /// will enter. (Also used to build context for preceding characters.)
238 /// Note, the new node will _not_ be NF_SEEN
239 CAlphNode *GetRoot(CDasherNode *pContext, bool bEnteredLast, int iOffset);
241 const CAlphInfo *GetAlphabet() const;
243 protected:
244 ///Called to get the symbols in the context for (preceding) a new node
245 /// \param pParent node to assume has been output, when obtaining context
246 /// \param iRootOffset offset of the node that will be constructed; i.e. context should include symbols
247 /// up to & including this offset.
248 /// \param pAlphMap use to convert entered text into symbol numbers
249 /// (could be the managers m_pAlphabetMap, but subclasses can pass in something different)
250 /// \return pair: first element is the last symbol in the context, _if_ a usable context
251 /// could be extracted, else 0 (=> couldn't get context, using alphabet default); second
252 /// element is the result of entering the symbols retrieved, into a fresh LM context.
253 std::pair<symbol, CLanguageModel::Context> GetContextSymbols(CDasherNode *pParent, int iRootOffset, const CAlphabetMap *pAlphMap);
255 ///Called to create a node for a given symbol (leaf), as a child of a specified parent node
256 /// \param iBkgCol colour behind the new node, i.e. that should show through if the (group) node is transparent
257 virtual CDasherNode *CreateSymbolNode(CAlphNode *pParent, symbol iSymbol);
258 virtual CGroupNode *CreateGroupNode(CAlphNode *pParent, int iBkgCol, const SGroupInfo *pInfo);
259 ///Called to create a new symbol root, e.g. for going backwards
260 /// \param iOffset index of symbol entered by the node
261 /// \param sym symbol number as returned as first element of GetContextSymbols
262 virtual CAlphNode *CreateSymbolRoot(int iOffset, CLanguageModel::Context ctx, symbol sym);
264 ///Called to compute colour for a symbol at a specified offset.
265 /// Wraps CAlphabet::GetColour(sym), but (a) implements a default
266 /// scheme for symbols not specifying a colour, and (b) implements
267 /// colour-cycling by phase (two cycles, using the LSBit of offset)
268 virtual int GetColour(symbol sym, int iOffset) const;
270 CDasherInterfaceBase * const m_pInterface;
272 CLanguageModel *m_pLanguageModel;
274 CNodeCreationManager *m_pNCManager;
275 const CAlphInfo *m_pAlphabet;
276 CAlphabetMap m_map;
278 private:
279 ///Wraps m_pLanguageModel->GetProbs to implement nonuniformity
280 /// (also leaves space for NCManager::AddExtras to add control node)
281 /// Returns array of non-cumulative probs. Should this be protected and/or virtual???
282 void GetProbs(std::vector<unsigned int> *pProbs, CLanguageModel::Context iContext);
284 ///Constructs child nodes under the specified parent according to provided group.
285 /// Nodes are created by calling CreateSymbolNode and CreateGroupNode, unless buildAround is non-null.
286 /// \param pParentGroup group describing which symbols and/or subgroups should be constructed
287 /// (these will fill the parent), or NULL meaning the entire alphabet (i.e. toplevel groups
288 /// and symbols not in any group).
289 /// \param buildAround if non-null, its RebuildSymbol and RebuildGroup methods will be called
290 /// instead of the AlphabetManager's CreateSymbolNode/CreateGroupNode methods. This is used when
291 /// rebuilding parents: passing in the pre-existing node here, allows it to intercept those calls
292 /// and graft itself in in place of a new node, when appropriate.
293 void IterateChildGroups(CAlphNode *pParent, const SGroupInfo *pParentGroup, CAlphBase *buildAround);
295 ///Last node (owned by this manager) that was output; if a node
296 /// is Undo()ne, this is set to its parent. This is used to detect
297 /// context switches.
298 CDasherNode *m_pLastOutput;
299 ///Text actually written in the current context; both appended and truncated
300 /// as nodes are Output() and Undo()ne.
301 std::string strTrainfileBuffer;
302 ///Context in (i.e. after) which anything in strTrainfileBuffer was written.
303 /// Set when first character put in strTrainfileBuffer (following a context switch),
304 /// as we may not be able to get the preceding characters if we wait too long.
305 std::string strTrainfileContext;
307 ///A character, 33<=c<=255, not in the alphabet; used to delimit contexts.
308 ///"" if no such could be found (=> will be found on a per-context basis)
309 std::string m_sDelim;
311 /// @}
316 #endif