make ValueTransfer easier to understand
[LibreOffice.git] / include / formula / FormulaCompiler.hxx
blobc455ca407d1d3f3cc5e091397c9b5a454147823e
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_FORMULA_FORMULACOMPILER_HXX
21 #define INCLUDED_FORMULA_FORMULACOMPILER_HXX
23 #include <memory>
24 #include <unordered_map>
25 #include <vector>
27 #include <com/sun/star/uno/Sequence.hxx>
28 #include <formula/formuladllapi.h>
29 #include <formula/grammar.hxx>
30 #include <formula/opcode.hxx>
31 #include <formula/tokenarray.hxx>
32 #include <formula/types.hxx>
33 #include <formula/paramclass.hxx>
34 #include <rtl/ustrbuf.hxx>
35 #include <rtl/ustring.hxx>
36 #include <sal/types.h>
37 #include <tools/debug.hxx>
39 #define FORMULA_MAXJUMPCOUNT 32 /* maximum number of jumps (ocChoose) */
40 #define FORMULA_MAXTOKENS 8192 /* maximum number of tokens in formula */
41 #define FORMULA_MAXPARAMS 255 /* maximum number of parameters per function (byte) */
42 #define FORMULA_MAXPARAMSII 8 /* maximum number of parameters for functions that have implicit intersection ranges */
45 namespace com::sun::star {
46 namespace sheet {
47 struct FormulaOpCodeMapEntry;
48 struct FormulaToken;
52 class CharClass;
53 enum class FormulaError : sal_uInt16;
54 enum class SvNumFormatType : sal_Int16;
56 namespace formula
59 struct FormulaArrayStack
61 FormulaArrayStack* pNext;
62 FormulaTokenArray* pArr;
63 FormulaTokenRef mpLastToken;
64 sal_uInt16 nIndex;
65 bool bTemp;
68 typedef std::unordered_map< OUString, OpCode > OpCodeHashMap;
69 typedef std::unordered_map< OUString, OUString > ExternalHashMap;
71 class FORMULA_DLLPUBLIC FormulaCompiler
73 private:
74 FormulaCompiler(const FormulaCompiler&) = delete;
75 FormulaCompiler& operator=(const FormulaCompiler&) = delete;
76 public:
77 FormulaCompiler(bool bComputeII = false, bool bMatrixFlag = false);
78 FormulaCompiler(FormulaTokenArray& _rArr, bool bComputeII = false, bool bMatrixFlag = false);
79 virtual ~FormulaCompiler();
81 /** Mappings from strings to OpCodes and vice versa. */
82 class FORMULA_DLLPUBLIC OpCodeMap final
84 OpCodeHashMap maHashMap; /// Hash map of symbols, OUString -> OpCode
85 std::unique_ptr<OUString[]> mpTable; /// Array of symbols, OpCode -> OUString, offset==OpCode
86 ExternalHashMap maExternalHashMap; /// Hash map of ocExternal, Filter String -> AddIn String
87 ExternalHashMap maReverseExternalHashMap; /// Hash map of ocExternal, AddIn String -> Filter String
88 FormulaGrammar::Grammar meGrammar; /// Grammar, language and reference convention
89 sal_uInt16 mnSymbols; /// Count of OpCode symbols
90 bool mbCore : 1; /// If mapping was setup by core, not filters
91 bool mbEnglish : 1; /// If English symbols and external names
93 OpCodeMap( const OpCodeMap& ) = delete;
94 OpCodeMap& operator=( const OpCodeMap& ) = delete;
96 public:
98 OpCodeMap(sal_uInt16 nSymbols, bool bCore, FormulaGrammar::Grammar eGrammar ) :
99 maHashMap(nSymbols),
100 mpTable( new OUString[ nSymbols ]),
101 meGrammar( eGrammar),
102 mnSymbols( nSymbols),
103 mbCore( bCore),
104 mbEnglish ( FormulaGrammar::isEnglish(eGrammar) )
108 /** Copy mappings from r into this map, effectively replacing this map.
110 Override known legacy bad function names with
111 correct ones if the conditions can be derived from the
112 current maps.
114 void copyFrom( const OpCodeMap& r );
116 /// Get the symbol String -> OpCode hash map for finds.
117 const OpCodeHashMap& getHashMap() const { return maHashMap; }
119 /// Get the symbol String -> AddIn String hash map for finds.
120 const ExternalHashMap& getExternalHashMap() const { return maExternalHashMap; }
122 /// Get the AddIn String -> symbol String hash map for finds.
123 const ExternalHashMap& getReverseExternalHashMap() const { return maReverseExternalHashMap; }
125 /// Get the symbol string matching an OpCode.
126 const OUString& getSymbol( const OpCode eOp ) const
128 DBG_ASSERT( sal_uInt16(eOp) < mnSymbols, "OpCodeMap::getSymbol: OpCode out of range");
129 if (sal_uInt16(eOp) < mnSymbols)
130 return mpTable[ eOp ];
131 static OUString s_sEmpty;
132 return s_sEmpty;
135 /// Get the first character of the symbol string matching an OpCode.
136 sal_Unicode getSymbolChar( const OpCode eOp ) const { return getSymbol(eOp)[0]; };
138 /// Get the grammar.
139 FormulaGrammar::Grammar getGrammar() const { return meGrammar; }
141 /// Get the symbol count.
142 sal_uInt16 getSymbolCount() const { return mnSymbols; }
144 /** Are these English symbols, as opposed to native language (which may
145 be English as well)? */
146 bool isEnglish() const { return mbEnglish; }
148 /// Is it an ODF 1.1 compatibility mapping?
149 bool isPODF() const { return FormulaGrammar::isPODF( meGrammar); }
151 /* TODO: add isAPI() once a FormulaLanguage was added. */
153 /// Is it an ODFF / ODF 1.2 mapping?
154 bool isODFF() const { return FormulaGrammar::isODFF( meGrammar); }
156 /// Is it an OOXML mapping?
157 bool isOOXML() const { return FormulaGrammar::isOOXML( meGrammar); }
159 /// Does it have external symbol/name mappings?
160 bool hasExternals() const { return !maExternalHashMap.empty(); }
162 /// Put entry of symbol String and OpCode pair.
163 void putOpCode( const OUString & rStr, const OpCode eOp, const CharClass* pCharClass );
165 /// Put entry of symbol String and AddIn international String pair.
166 void putExternal( const OUString & rSymbol, const OUString & rAddIn );
168 /** Put entry of symbol String and AddIn international String pair,
169 failing silently if rAddIn name already exists. */
170 void putExternalSoftly( const OUString & rSymbol, const OUString & rAddIn );
172 /// Core implementation of XFormulaOpCodeMapper::getMappings()
173 css::uno::Sequence< css::sheet::FormulaToken >
174 createSequenceOfFormulaTokens(const FormulaCompiler& _rCompiler,
175 const css::uno::Sequence< OUString >& rNames ) const;
177 /// Core implementation of XFormulaOpCodeMapper::getAvailableMappings()
178 css::uno::Sequence< css::sheet::FormulaOpCodeMapEntry >
179 createSequenceOfAvailableMappings( const FormulaCompiler& _rCompiler,const sal_Int32 nGroup ) const;
181 /** The value used in createSequenceOfAvailableMappings() and thus in
182 XFormulaOpCodeMapper::getMappings() for an unknown symbol. */
183 static sal_Int32 getOpCodeUnknown() { return -1; }
185 private:
187 /** Conditionally put a mapping in copyFrom() context.
189 Does NOT check eOp range!
191 void putCopyOpCode( const OUString& rSymbol, OpCode eOp );
194 public:
195 typedef std::shared_ptr< const OpCodeMap > OpCodeMapPtr;
196 typedef std::shared_ptr< OpCodeMap > NonConstOpCodeMapPtr;
198 /** Get OpCodeMap for formula language.
199 @param nLanguage
200 One of css::sheet::FormulaLanguage constants.
201 @return Map for nLanguage. If nLanguage is unknown, a NULL map is returned.
203 OpCodeMapPtr GetOpCodeMap( const sal_Int32 nLanguage ) const;
205 /** Create an internal symbol map from API mapping.
206 @param bEnglish
207 Use English number parser / formatter instead of native.
209 static OpCodeMapPtr CreateOpCodeMap(
210 const css::uno::Sequence< const css::sheet::FormulaOpCodeMapEntry > & rMapping,
211 bool bEnglish );
213 /** Get current OpCodeMap in effect. */
214 const OpCodeMapPtr& GetCurrentOpCodeMap() const { return mxSymbols; }
216 /** Get OpCode for English symbol.
217 Used in XFunctionAccess to create token array.
218 @param rName
219 Symbol to lookup. MUST be upper case.
221 OpCode GetEnglishOpCode( const OUString& rName ) const;
223 FormulaError GetErrorConstant( const OUString& rName ) const;
224 void AppendErrorConstant( OUStringBuffer& rBuffer, FormulaError nError ) const;
226 void EnableJumpCommandReorder( bool bEnable );
227 void EnableStopOnError( bool bEnable );
229 static bool IsOpCodeVolatile( OpCode eOp );
230 static bool IsOpCodeJumpCommand( OpCode eOp );
232 static bool DeQuote( OUString& rStr );
235 static const OUString& GetNativeSymbol( OpCode eOp );
236 static sal_Unicode GetNativeSymbolChar( OpCode eOp );
237 static bool IsMatrixFunction(OpCode _eOpCode); // if a function _always_ returns a Matrix
239 SvNumFormatType GetNumFormatType() const { return nNumFmt; }
240 bool CompileTokenArray();
242 void CreateStringFromTokenArray( OUString& rFormula );
243 void CreateStringFromTokenArray( OUStringBuffer& rBuffer );
244 const FormulaToken* CreateStringFromToken( OUString& rFormula, const FormulaToken* pToken );
245 const FormulaToken* CreateStringFromToken( OUStringBuffer& rBuffer, const FormulaToken* pToken,
246 bool bAllowArrAdvance = false );
248 void AppendBoolean( OUStringBuffer& rBuffer, bool bVal ) const;
249 void AppendDouble( OUStringBuffer& rBuffer, double fVal ) const;
250 static void AppendString( OUStringBuffer& rBuffer, const OUString & rStr );
252 /** Set symbol map corresponding to one of predefined formula::FormulaGrammar::Grammar,
253 including an address reference convention. */
254 FormulaGrammar::Grammar GetGrammar() const { return meGrammar; }
256 /** Whether current symbol set and grammar need transformation of Table
257 structured references to A1 style references when writing / exporting
258 (creating strings).
260 bool NeedsTableRefTransformation() const;
262 /** If a parameter nParam (0-based) is to be forced to array for OpCode
263 eOp, i.e. classified as ParamClass::ForceArray or
264 ParamClass::ReferenceOrForceArray type. */
265 virtual formula::ParamClass GetForceArrayParameter( const FormulaToken* pToken, sal_uInt16 nParam ) const;
267 static void UpdateSeparatorsNative( const OUString& rSep, const OUString& rArrayColSep, const OUString& rArrayRowSep );
268 static void ResetNativeSymbols();
269 static void SetNativeSymbols( const OpCodeMapPtr& xMap );
271 /** Sets the implicit intersection compute flag */
272 void SetComputeIIFlag(bool bSet) { mbComputeII = bSet; }
274 /** Sets the matrix flag for the formula*/
275 void SetMatrixFlag(bool bSet) { mbMatrixFlag = bSet; }
277 /** Separators mapped when loading opcodes from the resource, values other
278 than RESOURCE_BASE may override the resource strings. Used by OpCodeList
279 implementation via loadSymbols().
281 enum class SeparatorType
283 RESOURCE_BASE,
284 SEMICOLON_BASE
287 protected:
288 virtual OUString FindAddInFunction( const OUString& rUpperName, bool bLocalFirst ) const;
289 virtual void fillFromAddInCollectionUpperName( const NonConstOpCodeMapPtr& xMap ) const;
290 virtual void fillFromAddInMap( const NonConstOpCodeMapPtr& xMap, FormulaGrammar::Grammar _eGrammar ) const;
291 virtual void fillFromAddInCollectionEnglishName( const NonConstOpCodeMapPtr& xMap ) const;
292 virtual void fillAddInToken(::std::vector< css::sheet::FormulaOpCodeMapEntry >& _rVec, bool _bIsEnglish) const;
294 virtual void SetError(FormulaError nError);
295 virtual FormulaTokenRef ExtendRangeReference( FormulaToken & rTok1, FormulaToken & rTok2 );
296 virtual bool HandleExternalReference(const FormulaToken& _aToken);
297 virtual bool HandleRange();
298 virtual bool HandleColRowName();
299 virtual bool HandleDbData();
300 virtual bool HandleTableRef();
302 virtual void CreateStringFromExternal( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
303 virtual void CreateStringFromSingleRef( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
304 virtual void CreateStringFromDoubleRef( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
305 virtual void CreateStringFromMatrix( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
306 virtual void CreateStringFromIndex( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
307 virtual void LocalizeString( OUString& rName ) const; // modify rName - input: exact name
309 bool GetToken();
310 OpCode NextToken();
311 void PutCode( FormulaTokenRef& );
312 void Factor();
313 void RangeLine();
314 void UnionLine();
315 void IntersectionLine();
316 void UnaryLine();
317 void PostOpLine();
318 void PowLine();
319 void MulDivLine();
320 void AddSubLine();
321 void ConcatLine();
322 void CompareLine();
323 OpCode Expression();
324 void PopTokenArray();
325 void PushTokenArray( FormulaTokenArray*, bool );
327 bool MergeRangeReference( FormulaToken * * const pCode1, FormulaToken * const * const pCode2 );
329 // Returns whether the opcode has implicit intersection ranges as parameters.
330 // Called for (most) opcodes to possibly handle implicit intersection for the parameters.
331 virtual void HandleIIOpCode(FormulaToken* /*token*/,
332 FormulaToken*** /*pppToken*/, sal_uInt8 /*nNumParams*/) {}
334 // Called from CompileTokenArray() after RPN code generation is done.
335 virtual void PostProcessCode() {}
337 virtual void AnnotateOperands() {}
339 OUString aCorrectedFormula; // autocorrected Formula
340 OUString aCorrectedSymbol; // autocorrected Symbol
342 OpCodeMapPtr mxSymbols; // which symbols are used
344 FormulaTokenRef mpToken; // current token
345 FormulaTokenRef pCurrentFactorToken; // current factor token (of Factor() method)
346 sal_uInt16 nCurrentFactorParam; // current factor token's parameter, 1-based
347 FormulaTokenArray* pArr;
348 FormulaTokenArrayPlainIterator maArrIterator;
349 FormulaTokenRef mpLastToken; // last token
351 FormulaToken** pCode;
352 FormulaArrayStack* pStack;
354 OpCode eLastOp;
355 short nRecursion; // GetToken() recursions
356 SvNumFormatType nNumFmt; // set during CompileTokenArray()
357 sal_uInt16 pc; // program counter
359 FormulaGrammar::Grammar meGrammar; // The grammar used, language plus convention.
361 bool bAutoCorrect; // whether to apply AutoCorrection
362 bool bCorrected; // AutoCorrection was applied
363 bool glSubTotal; // if code contains one or more subtotal functions
364 bool needsRPNTokenCheck; // whether to make FormulaTokenArray check all tokens at the end
366 bool mbJumpCommandReorder; /// Whether or not to reorder RPN for jump commands.
367 bool mbStopOnError; /// Whether to stop compilation on first encountered error.
369 bool mbComputeII; // whether to attempt computing implicit intersection ranges while building the RPN array.
370 bool mbMatrixFlag; // whether the formula is a matrix formula (needed for II computation)
372 private:
373 void InitSymbolsNative() const; /// only SymbolsNative, on first document creation
374 void InitSymbolsEnglish() const; /// only SymbolsEnglish, maybe later
375 void InitSymbolsPODF() const; /// only SymbolsPODF, on demand
376 void InitSymbolsAPI() const; /// only SymbolsAPI, on demand
377 void InitSymbolsODFF() const; /// only SymbolsODFF, on demand
378 void InitSymbolsEnglishXL() const; /// only SymbolsEnglishXL, on demand
379 void InitSymbolsOOXML() const; /// only SymbolsOOXML, on demand
381 void loadSymbols(const std::pair<const char*, int>* pSymbols, FormulaGrammar::Grammar eGrammar, NonConstOpCodeMapPtr& rxMap,
382 SeparatorType eSepType = SeparatorType::SEMICOLON_BASE) const;
384 /** Check pCurrentFactorToken for nParam's (0-based) ForceArray types and
385 set ForceArray at rCurr if so. Set nParam+1 as 1-based
386 nCurrentFactorParam for subsequent ForceArrayOperator() calls.
388 void CheckSetForceArrayParameter( FormulaTokenRef const & rCurr, sal_uInt8 nParam );
390 void ForceArrayOperator( FormulaTokenRef const & rCurr );
392 class CurrentFactor
394 FormulaTokenRef pPrevFac;
395 sal_uInt16 nPrevParam;
396 FormulaCompiler* pCompiler;
397 CurrentFactor( const CurrentFactor& ) = delete;
398 CurrentFactor& operator=( const CurrentFactor& ) = delete;
399 public:
400 explicit CurrentFactor( FormulaCompiler* pComp )
401 : pPrevFac( pComp->pCurrentFactorToken )
402 , nPrevParam( pComp->nCurrentFactorParam )
403 , pCompiler( pComp )
405 ~CurrentFactor()
407 pCompiler->pCurrentFactorToken = pPrevFac;
408 pCompiler->nCurrentFactorParam = nPrevParam;
410 // yes, this operator= may modify the RValue
411 void operator=( FormulaTokenRef const & r )
413 pCompiler->ForceArrayOperator( r );
414 pCompiler->pCurrentFactorToken = r;
415 pCompiler->nCurrentFactorParam = 0;
417 void operator=( FormulaToken* p )
419 FormulaTokenRef xTemp( p );
420 *this = xTemp;
422 operator FormulaTokenRef&()
423 { return pCompiler->pCurrentFactorToken; }
424 FormulaToken* operator->()
425 { return pCompiler->pCurrentFactorToken.operator->(); }
426 operator FormulaToken*()
427 { return operator->(); }
431 mutable NonConstOpCodeMapPtr mxSymbolsODFF; // ODFF symbols
432 mutable NonConstOpCodeMapPtr mxSymbolsPODF; // ODF 1.1 symbols
433 mutable NonConstOpCodeMapPtr mxSymbolsAPI; // XFunctionAccess API symbols
434 mutable NonConstOpCodeMapPtr mxSymbolsNative; // native symbols
435 mutable NonConstOpCodeMapPtr mxSymbolsEnglish; // English symbols
436 mutable NonConstOpCodeMapPtr mxSymbolsEnglishXL; // English Excel symbols (for VBA formula parsing)
437 mutable NonConstOpCodeMapPtr mxSymbolsOOXML; // Excel OOXML symbols
439 static FormulaTokenArray smDummyTokenArray;
442 } // formula
445 #endif // INCLUDED_FORMULA_FORMULACOMPILER_HXX
448 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */