1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_SC_INC_COMPILER_HXX
21 #define INCLUDED_SC_INC_COMPILER_HXX
27 #include "refdata.hxx"
29 #include <formula/token.hxx>
30 #include <formula/grammar.hxx>
31 #include <rtl/ustrbuf.hxx>
32 #include <com/sun/star/sheet/ExternalLinkInfo.hpp>
33 #include <com/sun/star/i18n/ParseResult.hpp>
37 #include <com/sun/star/uno/Sequence.hxx>
39 #include <formula/FormulaCompiler.hxx>
41 namespace o3tl
{ template <typename T
> struct typed_flags
; }
43 // constants and data types also for external modules (ScInterpreter et al)
45 #define MAXSTRLEN 1024 /* maximum length of input string of one symbol */
47 // flag values of CharTable
48 enum class ScCharFlags
: sal_uInt32
{
52 CharBool
= 0x00000002,
53 CharWord
= 0x00000004,
54 CharValue
= 0x00000008,
55 CharString
= 0x00000010,
56 CharDontCare
= 0x00000020,
61 ValueSep
= 0x00000400,
62 ValueExp
= 0x00000800,
63 ValueSign
= 0x00001000,
64 ValueValue
= 0x00002000,
65 StringSep
= 0x00004000,
66 NameSep
= 0x00008000, // there can be only one! '\''
67 CharIdent
= 0x00010000, // identifier (built-in function) or reference start
68 Ident
= 0x00020000, // identifier or reference continuation
69 OdfLBracket
= 0x00040000, // ODF '[' reference bracket
70 OdfRBracket
= 0x00080000, // ODF ']' reference bracket
71 OdfLabelOp
= 0x00100000, // ODF '!!' automatic intersection of labels
72 OdfNameMarker
= 0x00200000, // ODF '$$' marker that starts a defined (range) name
73 CharName
= 0x00400000, // start character of a defined name
74 Name
= 0x00800000, // continuation character of a defined name
75 CharErrConst
= 0x01000000, // start character of an error constant ('#')
78 template<> struct typed_flags
<ScCharFlags
> : is_typed_flags
<ScCharFlags
, 0x01ffffff> {};
81 #define SC_COMPILER_FILE_TAB_SEP '#' // 'Doc'#Tab
87 struct ScInterpreterContext
;
92 class CompileFormulaContext
;
96 // constants and data types internal to compiler
98 struct ScRawToken final
100 friend class ScCompiler
;
101 // Friends that use a temporary ScRawToken on the stack (and therefore need
102 // the private dtor) and know what they're doing..
103 friend class ScTokenArray
;
105 formula::StackVar eType
; // type of data; this determines how the unions are used
111 formula::ParamClass eInForceArray
;
113 ScComplexRefData aRef
;
116 ScComplexRefData aRef
;
127 ScTableRefToken::Item eItem
;
131 rtl_uString
* mpDataIgnoreCase
;
135 short nJump
[ FORMULA_MAXJUMPCOUNT
+ 1 ]; // If/Chose token
137 OUString maExternalName
; // depending on the opcode, this is either the external, or the external name, or the external table name
139 // coverity[uninit_member] - members deliberately not initialized
142 ~ScRawToken() {} //! only delete via Delete()
144 formula::StackVar
GetType() const { return eType
; }
145 OpCode
GetOpCode() const { return eOp
; }
146 void NewOpCode( OpCode e
) { eOp
= e
; }
148 // Use these methods only on tokens that are not part of a token array,
149 // since the reference count is cleared!
150 void SetOpCode( OpCode eCode
);
151 void SetString( rtl_uString
* pData
, rtl_uString
* pDataIgoreCase
);
152 void SetSingleReference( const ScSingleRefData
& rRef
);
153 void SetDoubleReference( const ScComplexRefData
& rRef
);
154 void SetDouble( double fVal
);
155 void SetErrorConstant( FormulaError nErr
);
157 // These methods are ok to use, reference count not cleared.
158 void SetName(sal_Int16 nSheet
, sal_uInt16 nIndex
);
159 void SetExternalSingleRef( sal_uInt16 nFileId
, const OUString
& rTabName
, const ScSingleRefData
& rRef
);
160 void SetExternalDoubleRef( sal_uInt16 nFileId
, const OUString
& rTabName
, const ScComplexRefData
& rRef
);
161 void SetExternalName( sal_uInt16 nFileId
, const OUString
& rName
);
162 void SetExternal(const OUString
& rStr
);
164 /** If the token is a non-external reference, determine if the reference is
165 valid. If the token is an external reference, return true. Else return
166 false. Used only in ScCompiler::NextNewToken() to preserve non-existing
167 sheet names in otherwise valid references.
169 bool IsValidReference() const;
171 formula::FormulaToken
* CreateToken() const; // create typified token
174 class SC_DLLPUBLIC ScCompiler
: public formula::FormulaCompiler
178 enum ExtendedErrorDetection
180 EXTENDED_ERROR_DETECTION_NONE
= 0, // no error on unknown symbols, default (interpreter handles it)
181 EXTENDED_ERROR_DETECTION_NAME_BREAK
, // name error on unknown symbols and break, pCode incomplete
182 EXTENDED_ERROR_DETECTION_NAME_NO_BREAK
// name error on unknown symbols, don't break, continue
187 const formula::FormulaGrammar::AddressConvention meConv
;
189 Convention( formula::FormulaGrammar::AddressConvention eConvP
);
190 virtual ~Convention();
192 virtual void makeRefStr(
193 OUStringBuffer
& rBuffer
,
194 formula::FormulaGrammar::Grammar eGram
,
195 const ScAddress
& rPos
,
196 const OUString
& rErrRef
, const std::vector
<OUString
>& rTabNames
,
197 const ScComplexRefData
& rRef
, bool bSingleRef
, bool bFromRangeName
) const = 0;
199 virtual css::i18n::ParseResult
200 parseAnyToken( const OUString
& rFormula
,
202 const CharClass
* pCharClass
) const = 0;
205 * Parse the symbol string and pick up the file name and the external
208 * @return true on successful parse, or false otherwise.
210 virtual bool parseExternalName( const OUString
& rSymbol
, OUString
& rFile
, OUString
& rName
,
211 const ScDocument
* pDoc
,
212 const css::uno::Sequence
< css::sheet::ExternalLinkInfo
>* pExternalLinks
) const = 0;
214 virtual OUString
makeExternalNameStr( sal_uInt16 nFileId
, const OUString
& rFile
,
215 const OUString
& rName
) const = 0;
217 virtual void makeExternalRefStr(
218 OUStringBuffer
& rBuffer
, const ScAddress
& rPos
, sal_uInt16 nFileId
, const OUString
& rFileName
,
219 const OUString
& rTabName
, const ScSingleRefData
& rRef
) const = 0;
221 virtual void makeExternalRefStr(
222 OUStringBuffer
& rBuffer
, const ScAddress
& rPos
,
223 sal_uInt16 nFileId
, const OUString
& rFileName
, const std::vector
<OUString
>& rTabNames
,
224 const OUString
& rTabName
, const ScComplexRefData
& rRef
) const = 0;
226 enum SpecialSymbolType
229 * Character between sheet name and address. In OOO A1 this is
230 * '.', while XL A1 and XL R1C1 this is '!'.
235 * In OOO A1, a sheet name may be prefixed with '$' to indicate an
236 * absolute sheet position.
240 virtual sal_Unicode
getSpecialSymbol( SpecialSymbolType eSymType
) const = 0;
242 virtual ScCharFlags
getCharTableFlags( sal_Unicode c
, sal_Unicode cLast
) const = 0;
245 std::unique_ptr
<ScCharFlags
[]> mpCharTable
;
247 friend struct Convention
;
251 static CharClass
*pCharClassEnglish
; // character classification for en_US locale
252 static const Convention
*pConventions
[ formula::FormulaGrammar::CONV_LAST
];
254 static const struct AddInMap
257 const char* pEnglish
;
258 const char* pOriginal
; // programmatical name
259 const char* pUpper
; // upper case programmatical name
261 static size_t GetAddInMapCount();
266 SvNumberFormatter
* mpFormatter
;
267 const ScInterpreterContext
* mpInterpreterContext
;
269 SCTAB mnCurrentSheetTab
; // indicates current sheet number parsed so far
270 sal_Int32 mnCurrentSheetEndPos
; // position after current sheet name if parsed
272 // For CONV_XL_OOX, may be set via API by MOOXML filter.
273 css::uno::Sequence
<css::sheet::ExternalLinkInfo
> maExternalLinks
;
275 sal_Unicode cSymbol
[MAXSTRLEN
+1]; // current Symbol + 0
276 OUString aFormula
; // formula source code
277 sal_Int32 nSrcPos
; // tokenizer position (source code)
278 mutable ScRawToken maRawToken
;
280 const CharClass
* pCharClass
; // which character classification is used for parseAnyToken
281 sal_uInt16 mnPredetectedReference
; // reference when reading ODF, 0 (none), 1 (single) or 2 (double)
282 sal_Int32 mnRangeOpPosInSymbol
; // if and where a range operator is in symbol
283 const Convention
*pConv
;
284 ExtendedErrorDetection meExtendedErrorDetection
;
285 bool mbCloseBrackets
; // whether to close open brackets automatically, default TRUE
286 bool mbRewind
; // whether symbol is to be rewound to some step during lexical analysis
287 std::vector
<sal_uInt16
> maExternalFiles
;
289 std::vector
<OUString
> maTabNames
; /// sheet names mangled for the current grammar for output
290 std::vector
<OUString
> &GetSetupTabNames() const; /// get or setup tab names for the current grammar
296 TableRefEntry( formula::FormulaToken
* p
) : mxToken(p
), mnLevel(0) {}
298 std::vector
<TableRefEntry
> maTableRefs
; /// "stack" of currently active ocTableRef tokens
300 // Optimizing implicit intersection is done only at the end of code generation, because the usage context may
301 // be important. Store candidate parameters and the operation they are the argument for.
302 struct PendingImplicitIntersectionOptimization
304 PendingImplicitIntersectionOptimization(formula::FormulaToken
** p
, formula::FormulaToken
* o
)
305 : parameterLocation( p
), parameter( *p
), operation( o
) {}
306 formula::FormulaToken
** parameterLocation
;
307 formula::FormulaTokenRef parameter
;
308 formula::FormulaTokenRef operation
;
310 std::vector
< PendingImplicitIntersectionOptimization
> mPendingImplicitIntersectionOptimizations
;
311 std::set
<formula::FormulaTokenRef
> mUnhandledPossibleImplicitIntersections
;
313 std::set
<OpCode
> mUnhandledPossibleImplicitIntersectionsOpCodes
;
316 bool NextNewToken(bool bInArray
);
318 virtual void SetError(FormulaError nError
) override
;
319 sal_Int32
NextSymbol(bool bInArray
);
320 bool IsValue( const OUString
& );
321 bool IsOpCode( const OUString
&, bool bInArray
);
322 bool IsOpCode2( const OUString
& );
324 bool IsReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
325 bool IsSingleReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
326 bool IsDoubleReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
327 bool IsPredetectedReference( const OUString
& rSymbol
);
328 bool IsPredetectedErrRefReference( const OUString
& rName
, const OUString
* pErrRef
);
329 bool IsMacro( const OUString
& );
330 bool IsNamedRange( const OUString
& );
331 bool IsExternalNamedRange( const OUString
& rSymbol
, bool& rbInvalidExternalNameRange
);
332 bool IsDBRange( const OUString
& );
333 bool IsColRowName( const OUString
& );
334 bool IsBoolean( const OUString
& );
335 void AutoCorrectParsedSymbol();
337 void AdjustSheetLocalNameRelReferences( SCTAB nDelta
);
338 void SetRelNameReference();
340 /** Obtain range data for ocName token, global or sheet local.
342 Prerequisite: rToken is a FormulaIndexToken so IsGlobal() and
343 GetIndex() can be called on it. We don't check with RTTI.
345 ScRangeData
* GetRangeData( const formula::FormulaToken
& pToken
) const;
347 static void InitCharClassEnglish();
350 ScCompiler( sc::CompileFormulaContext
& rCxt
, const ScAddress
& rPos
,
351 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
353 /** If eGrammar == GRAM_UNSPECIFIED then the grammar of pDocument is used,
354 if pDocument==nullptr then GRAM_DEFAULT.
356 ScCompiler( ScDocument
* pDocument
, const ScAddress
&,
357 formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_UNSPECIFIED
,
358 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
360 ScCompiler( sc::CompileFormulaContext
& rCxt
, const ScAddress
& rPos
, ScTokenArray
& rArr
,
361 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
363 /** If eGrammar == GRAM_UNSPECIFIED then the grammar of pDocument is used,
364 if pDocument==nullptr then GRAM_DEFAULT.
366 ScCompiler( ScDocument
* pDocument
, const ScAddress
&, ScTokenArray
& rArr
,
367 formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_UNSPECIFIED
,
368 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
370 virtual ~ScCompiler() override
;
373 static void DeInit(); /// all
375 // for ScAddress::Format()
376 static void CheckTabQuotes( OUString
& aTabName
,
377 const formula::FormulaGrammar::AddressConvention eConv
= formula::FormulaGrammar::CONV_OOO
);
379 /** Analyzes a string for a 'Doc'#Tab construct, or 'Do''c'#Tab etc..
381 @returns the position of the unquoted # hash mark in 'Doc'#Tab, or
383 static sal_Int32
GetDocTabPos( const OUString
& rString
);
385 static bool EnQuote( OUString
& rStr
);
386 sal_Unicode
GetNativeAddressSymbol( Convention::SpecialSymbolType eType
) const;
388 // Check if it is a valid english function name
389 bool IsEnglishSymbol( const OUString
& rName
);
391 bool IsErrorConstant( const OUString
& ) const;
392 bool IsTableRefItem( const OUString
& ) const;
393 bool IsTableRefColumn( const OUString
& ) const;
395 /** Calls GetToken() if PeekNextNoSpaces() is of given OpCode. */
396 bool GetTokenIfOpCode( OpCode eOp
);
399 * When auto correction is set, the jump command reorder must be enabled.
401 void SetAutoCorrection( bool bVal
);
402 void SetCloseBrackets( bool bVal
) { mbCloseBrackets
= bVal
; }
403 void SetRefConvention( const Convention
*pConvP
);
404 void SetRefConvention( const formula::FormulaGrammar::AddressConvention eConv
);
406 static const Convention
* GetRefConvention( formula::FormulaGrammar::AddressConvention eConv
);
408 /// Set symbol map if not empty.
409 void SetFormulaLanguage( const OpCodeMapPtr
& xMap
);
411 void SetGrammar( const formula::FormulaGrammar::Grammar eGrammar
);
413 void SetNumberFormatter( SvNumberFormatter
* pFormatter
);
416 /** Set grammar and reference convention from within SetFormulaLanguage()
420 The new grammar to be set and the associated reference convention.
423 The previous grammar that was active before SetFormulaLanguage().
425 void SetGrammarAndRefConvention(
426 const formula::FormulaGrammar::Grammar eNewGrammar
,
427 const formula::FormulaGrammar::Grammar eOldGrammar
);
430 /// Set external link info for ScAddress::CONV_XL_OOX.
431 void SetExternalLinks(
432 const css::uno::Sequence
<
433 css::sheet::ExternalLinkInfo
>& rLinks
)
435 maExternalLinks
= rLinks
;
438 void CreateStringFromXMLTokenArray( OUString
& rFormula
, OUString
& rFormulaNmsp
);
440 void SetExtendedErrorDetection( ExtendedErrorDetection eVal
) { meExtendedErrorDetection
= eVal
; }
442 bool IsCorrected() { return bCorrected
; }
443 const OUString
& GetCorrectedFormula() { return aCorrectedFormula
; }
446 * Tokenize formula expression string into an array of tokens.
448 * @param rFormula formula expression to tokenize.
450 * @return heap allocated token array object. The caller <i>must</i>
451 * manage the life cycle of this object.
453 ScTokenArray
* CompileString( const OUString
& rFormula
);
454 ScTokenArray
* CompileString( const OUString
& rFormula
, const OUString
& rFormulaNmsp
);
455 const ScAddress
& GetPos() const { return aPos
; }
457 void MoveRelWrap( SCCOL nMaxCol
, SCROW nMaxRow
);
458 static void MoveRelWrap( const ScTokenArray
& rArr
, const ScDocument
* pDoc
, const ScAddress
& rPos
,
459 SCCOL nMaxCol
, SCROW nMaxRow
);
461 /** If the character is allowed as tested by nFlags (SC_COMPILER_C_...
462 bits) for all known address conventions. If more than one bit is given
463 in nFlags, all bits must match. */
464 static bool IsCharFlagAllConventions(
465 OUString
const & rStr
, sal_Int32 nPos
, ScCharFlags nFlags
);
467 /** TODO : Move this to somewhere appropriate. */
468 static bool DoubleRefToPosSingleRefScalarCase(const ScRange
& rRange
, ScAddress
& rAdr
,
469 const ScAddress
& rFormulaPos
);
471 bool HasUnhandledPossibleImplicitIntersections() const { return !mUnhandledPossibleImplicitIntersections
.empty(); }
473 const std::set
<OpCode
>& UnhandledPossibleImplicitIntersectionsOpCodes() { return mUnhandledPossibleImplicitIntersectionsOpCodes
; }
478 virtual OUString
FindAddInFunction( const OUString
& rUpperName
, bool bLocalFirst
) const override
;
479 virtual void fillFromAddInCollectionUpperName( const NonConstOpCodeMapPtr
& xMap
) const override
;
480 virtual void fillFromAddInCollectionEnglishName( const NonConstOpCodeMapPtr
& xMap
) const override
;
481 virtual void fillFromAddInMap( const NonConstOpCodeMapPtr
& xMap
, formula::FormulaGrammar::Grammar _eGrammar
) const override
;
482 virtual void fillAddInToken(::std::vector
< css::sheet::FormulaOpCodeMapEntry
>& _rVec
,bool _bIsEnglish
) const override
;
484 virtual bool HandleExternalReference(const formula::FormulaToken
& _aToken
) override
;
485 virtual bool HandleRange() override
;
486 virtual bool HandleColRowName() override
;
487 virtual bool HandleDbData() override
;
488 virtual bool HandleTableRef() override
;
490 virtual formula::FormulaTokenRef
ExtendRangeReference( formula::FormulaToken
& rTok1
, formula::FormulaToken
& rTok2
) override
;
491 virtual void CreateStringFromExternal( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
492 virtual void CreateStringFromSingleRef( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
493 virtual void CreateStringFromDoubleRef( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
494 virtual void CreateStringFromMatrix( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
495 virtual void CreateStringFromIndex( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
496 virtual void LocalizeString( OUString
& rName
) const override
; // modify rName - input: exact name
498 virtual formula::ParamClass
GetForceArrayParameter( const formula::FormulaToken
* pToken
, sal_uInt16 nParam
) const override
;
500 /// Access the CharTable flags
501 ScCharFlags
GetCharTableFlags( sal_Unicode c
, sal_Unicode cLast
)
502 { return c
< 128 ? pConv
->getCharTableFlags(c
, cLast
) : ScCharFlags::NONE
; }
504 virtual void HandleIIOpCode(formula::FormulaToken
* token
, formula::FormulaToken
*** pppToken
, sal_uInt8 nNumParams
) override
;
505 bool HandleIIOpCodeInternal(formula::FormulaToken
* token
, formula::FormulaToken
*** pppToken
, sal_uInt8 nNumParams
);
506 bool SkipImplicitIntersectionOptimization(const formula::FormulaToken
* token
) const;
507 virtual void PostProcessCode() override
;
508 static bool ParameterMayBeImplicitIntersection(const formula::FormulaToken
* token
, int parameter
);
509 void ReplaceDoubleRefII(formula::FormulaToken
** ppDoubleRefTok
);
510 bool AdjustSumRangeShape(const ScComplexRefData
& rBaseRange
, ScComplexRefData
& rSumRange
);
511 void CorrectSumRange(const ScComplexRefData
& rBaseRange
, ScComplexRefData
& rSumRange
, formula::FormulaToken
** ppSumRangeToken
);
516 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */