Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #ifndef INCLUDED_FORMULA_FORMULACOMPILER_HXX
21 : #define INCLUDED_FORMULA_FORMULACOMPILER_HXX
22 :
23 : #include <formula/formuladllapi.h>
24 : #include <rtl/ustrbuf.hxx>
25 : #include <rtl/ustring.hxx>
26 : #include <tools/debug.hxx>
27 :
28 : #include <com/sun/star/uno/Sequence.hxx>
29 :
30 : #include <formula/opcode.hxx>
31 : #include <formula/grammar.hxx>
32 : #include <formula/token.hxx>
33 : #include <formula/ExternalReferenceHelper.hxx>
34 :
35 : #include <memory>
36 : #include <unordered_map>
37 :
38 : #define FORMULA_MAXJUMPCOUNT 32 /* maximum number of jumps (ocChoose) */
39 : #define FORMULA_MAXTOKENS 8192 /* maximum number of tokens in formula */
40 :
41 :
42 : namespace com { namespace sun { namespace star {
43 : namespace sheet {
44 : struct FormulaOpCodeMapEntry;
45 : struct FormulaToken;
46 : }
47 : }}}
48 :
49 : class CharClass;
50 :
51 : namespace formula
52 : {
53 : class FormulaTokenArray;
54 :
55 : struct FormulaArrayStack
56 : {
57 : FormulaArrayStack* pNext;
58 : FormulaTokenArray* pArr;
59 : bool bTemp;
60 : };
61 :
62 :
63 : typedef std::unordered_map< OUString, OpCode, OUStringHash, ::std::equal_to< OUString > > OpCodeHashMap;
64 : typedef std::unordered_map< OUString, OUString, OUStringHash, ::std::equal_to< OUString > > ExternalHashMap;
65 :
66 : class FORMULA_DLLPUBLIC FormulaCompiler
67 : {
68 : private:
69 : FormulaCompiler(const FormulaCompiler&) SAL_DELETED_FUNCTION;
70 : FormulaCompiler& operator=(const FormulaCompiler&) SAL_DELETED_FUNCTION;
71 : public:
72 : FormulaCompiler();
73 : FormulaCompiler(FormulaTokenArray& _rArr);
74 : virtual ~FormulaCompiler();
75 :
76 : /** Mappings from strings to OpCodes and vice versa. */
77 : class FORMULA_DLLPUBLIC OpCodeMap
78 : {
79 : OpCodeHashMap * mpHashMap; /// Hash map of symbols, OUString -> OpCode
80 : OUString * mpTable; /// Array of symbols, OpCode -> OUString, offset==OpCode
81 : ExternalHashMap * mpExternalHashMap; /// Hash map of ocExternal, Filter String -> AddIn String
82 : ExternalHashMap * mpReverseExternalHashMap; /// Hash map of ocExternal, AddIn String -> Filter String
83 : FormulaGrammar::Grammar meGrammar; /// Grammar, language and reference convention
84 : sal_uInt16 mnSymbols; /// Count of OpCode symbols
85 : bool mbCore : 1; /// If mapping was setup by core, not filters
86 : bool mbEnglish : 1; /// If English symbols and external names
87 :
88 : OpCodeMap( const OpCodeMap& ) SAL_DELETED_FUNCTION;
89 : OpCodeMap& operator=( const OpCodeMap& ) SAL_DELETED_FUNCTION;
90 :
91 : public:
92 :
93 622 : OpCodeMap(sal_uInt16 nSymbols, bool bCore, FormulaGrammar::Grammar eGrammar ) :
94 622 : mpHashMap( new OpCodeHashMap( nSymbols)),
95 1244 : mpTable( new OUString[ nSymbols ]),
96 622 : mpExternalHashMap( new ExternalHashMap),
97 622 : mpReverseExternalHashMap( new ExternalHashMap),
98 : meGrammar( eGrammar),
99 : mnSymbols( nSymbols),
100 3732 : mbCore( bCore)
101 : {
102 622 : mbEnglish = FormulaGrammar::isEnglish( meGrammar);
103 622 : }
104 : virtual ~OpCodeMap();
105 :
106 : /** Copy mappings from r into this map, effectively replacing this map.
107 :
108 : @param bOverrideKnownBad
109 : If TRUE, override known legacy bad function names with
110 : correct ones if the conditions can be derived from the
111 : current maps.
112 : */
113 : void copyFrom( const OpCodeMap& r, bool bOverrideKnownBad );
114 :
115 : /// Get the symbol String -> OpCode hash map for finds.
116 33217 : inline const OpCodeHashMap* getHashMap() const { return mpHashMap; }
117 :
118 : /// Get the symbol String -> AddIn String hash map for finds.
119 1286 : inline const ExternalHashMap* getExternalHashMap() const { return mpExternalHashMap; }
120 :
121 : /// Get the AddIn String -> symbol String hash map for finds.
122 176 : inline const ExternalHashMap* getReverseExternalHashMap() const { return mpReverseExternalHashMap; }
123 :
124 : /// Get the symbol string matching an OpCode.
125 205478 : inline const OUString& getSymbol( const OpCode eOp ) const
126 : {
127 : DBG_ASSERT( sal_uInt16(eOp) < mnSymbols, "OpCodeMap::getSymbol: OpCode out of range");
128 205478 : if (sal_uInt16(eOp) < mnSymbols)
129 205478 : return mpTable[ eOp ];
130 0 : static OUString s_sEmpty;
131 0 : return s_sEmpty;
132 : }
133 :
134 : /// Get the first character of the symbol string matching an OpCode.
135 92427 : inline sal_Unicode getSymbolChar( const OpCode eOp ) const { return getSymbol(eOp)[0]; };
136 :
137 : /// Get the grammar.
138 45844 : inline FormulaGrammar::Grammar getGrammar() const { return meGrammar; }
139 :
140 : /// Get the symbol count.
141 50121 : inline sal_uInt16 getSymbolCount() const { return mnSymbols; }
142 :
143 : /** Are these English symbols, as opposed to native language (which may
144 : be English as well)? */
145 96520 : inline bool isEnglish() const { return mbEnglish; }
146 :
147 : /// Is it an internal core mapping, or setup by filters?
148 : inline bool isCore() const { return mbCore; }
149 :
150 : /// Is it an ODF 1.1 compatibility mapping?
151 89 : inline bool isPODF() const { return FormulaGrammar::isPODF( meGrammar); }
152 :
153 : /// Is it an ODFF / ODF 1.2 mapping?
154 4046 : inline bool isODFF() const { return FormulaGrammar::isODFF( meGrammar); }
155 :
156 : /// Is it an OOXML mapping?
157 6 : inline bool isOOXML() const { return FormulaGrammar::isOOXML( meGrammar); }
158 :
159 : /// Does it have external symbol/name mappings?
160 4263 : inline bool hasExternals() const { return !mpExternalHashMap->empty(); }
161 :
162 : /// Put entry of symbol String and OpCode pair.
163 : void putOpCode( const OUString & rStr, const OpCode eOp, const CharClass* pCharClass );
164 :
165 : /// Put entry of symbol String and AddIn international String pair.
166 : void putExternal( const OUString & rSymbol, const OUString & rAddIn );
167 :
168 : /** Put entry of symbol String and AddIn international String pair,
169 : failing silently if rAddIn name already exists. */
170 : void putExternalSoftly( const OUString & rSymbol, const OUString & rAddIn );
171 :
172 : /// Core implementation of XFormulaOpCodeMapper::getMappings()
173 : ::com::sun::star::uno::Sequence< ::com::sun::star::sheet::FormulaToken >
174 : createSequenceOfFormulaTokens(const FormulaCompiler& _rCompiler,
175 : const ::com::sun::star::uno::Sequence< OUString >& rNames ) const;
176 :
177 : /// Core implementation of XFormulaOpCodeMapper::getAvailableMappings()
178 : ::com::sun::star::uno::Sequence<
179 : ::com::sun::star::sheet::FormulaOpCodeMapEntry >
180 : createSequenceOfAvailableMappings( const FormulaCompiler& _rCompiler,const sal_Int32 nGroup ) const;
181 :
182 : /** The value used in createSequenceOfAvailableMappings() and thus in
183 : XFormulaOpCodeMapper::getMappings() for an unknown symbol. */
184 : static sal_Int32 getOpCodeUnknown();
185 :
186 : private:
187 :
188 : /** Conditionally put a mapping in copyFrom() context.
189 :
190 : Does NOT check eOp range!
191 : */
192 : void putCopyOpCode( const OUString& rSymbol, OpCode eOp );
193 : };
194 :
195 : public:
196 : typedef std::shared_ptr< const OpCodeMap > OpCodeMapPtr;
197 : typedef std::shared_ptr< OpCodeMap > NonConstOpCodeMapPtr;
198 :
199 : /** Get OpCodeMap for formula language.
200 : @param nLanguage
201 : One of ::com::sun::star::sheet::FormulaLanguage constants.
202 : @return Map for nLanguage. If nLanguage is unknown, a NULL map is returned.
203 : */
204 : OpCodeMapPtr GetOpCodeMap( const sal_Int32 nLanguage ) const;
205 :
206 : /** Create an internal symbol map from API mapping.
207 : @param bEnglish
208 : Use English number parser / formatter instead of native.
209 : */
210 : static OpCodeMapPtr CreateOpCodeMap(
211 : const ::com::sun::star::uno::Sequence<
212 : const ::com::sun::star::sheet::FormulaOpCodeMapEntry > & rMapping,
213 : bool bEnglish );
214 :
215 : /** Get current OpCodeMap in effect. */
216 28654 : inline OpCodeMapPtr GetCurrentOpCodeMap() const { return mxSymbols; }
217 :
218 : /** Get OpCode for English symbol.
219 : Used in XFunctionAccess to create token array.
220 : @param rName
221 : Symbol to lookup. MUST be upper case.
222 : */
223 : OpCode GetEnglishOpCode( const OUString& rName ) const;
224 :
225 : sal_uInt16 GetErrorConstant( const OUString& rName ) const;
226 :
227 : void EnableJumpCommandReorder( bool bEnable );
228 : void EnableStopOnError( bool bEnable );
229 :
230 : static bool IsOpCodeVolatile( OpCode eOp );
231 : static bool IsOpCodeJumpCommand( OpCode eOp );
232 :
233 : static bool DeQuote( OUString& rStr );
234 :
235 :
236 : static const OUString& GetNativeSymbol( OpCode eOp );
237 : static sal_Unicode GetNativeSymbolChar( OpCode eOp );
238 : static bool IsMatrixFunction(OpCode _eOpCode); // if a function _always_ returns a Matrix
239 :
240 9278 : short GetNumFormatType() const { return nNumFmt; }
241 : bool CompileTokenArray();
242 :
243 : void CreateStringFromTokenArray( OUString& rFormula );
244 : void CreateStringFromTokenArray( OUStringBuffer& rBuffer );
245 : const FormulaToken* CreateStringFromToken( OUString& rFormula, const FormulaToken* pToken,
246 : bool bAllowArrAdvance = false );
247 : const FormulaToken* CreateStringFromToken( OUStringBuffer& rBuffer, const FormulaToken* pToken,
248 : bool bAllowArrAdvance = false );
249 :
250 : void AppendBoolean( OUStringBuffer& rBuffer, bool bVal ) const;
251 : void AppendDouble( OUStringBuffer& rBuffer, double fVal ) const;
252 : static void AppendString( OUStringBuffer& rBuffer, const OUString & rStr );
253 :
254 : /** Set symbol map corresponding to one of predefined formula::FormulaGrammar::Grammar,
255 : including an address reference convention. */
256 185357 : inline FormulaGrammar::Grammar GetGrammar() const { return meGrammar; }
257 :
258 : /** Whether current symbol set and grammar need transformation of Table
259 : structured references to A1 style references when writing / exporting
260 : (creating strings).
261 : */
262 : bool NeedsTableRefTransformation() const;
263 :
264 : static void UpdateSeparatorsNative( const OUString& rSep, const OUString& rArrayColSep, const OUString& rArrayRowSep );
265 : static void ResetNativeSymbols();
266 : static void SetNativeSymbols( const OpCodeMapPtr& xMap );
267 :
268 : /** Separators mapped when loading opcodes from the resource, values other
269 : than RESOURCE_BASE may override the resource strings. Used by OpCodeList
270 : implementation via loadSymbols().
271 : */
272 : enum SeparatorType
273 : {
274 : RESOURCE_BASE,
275 : SEMICOLON_BASE,
276 : COMMA_BASE
277 : };
278 :
279 : protected:
280 : virtual OUString FindAddInFunction( const OUString& rUpperName, bool bLocalFirst ) const;
281 : virtual void fillFromAddInCollectionUpperName( NonConstOpCodeMapPtr xMap ) const;
282 : virtual void fillFromAddInMap( NonConstOpCodeMapPtr xMap, FormulaGrammar::Grammar _eGrammar ) const;
283 : virtual void fillFromAddInCollectionEnglishName( NonConstOpCodeMapPtr xMap ) const;
284 : virtual void fillAddInToken(::std::vector< ::com::sun::star::sheet::FormulaOpCodeMapEntry >& _rVec,bool _bIsEnglish) const;
285 :
286 : virtual void SetError(sal_uInt16 nError);
287 : virtual FormulaTokenRef ExtendRangeReference( FormulaToken & rTok1, FormulaToken & rTok2, bool bReuseDoubleRef );
288 : virtual bool HandleExternalReference(const FormulaToken& _aToken);
289 : virtual bool HandleRange();
290 : virtual bool HandleColRowName();
291 : virtual bool HandleDbData();
292 : virtual bool HandleTableRef();
293 :
294 : virtual void CreateStringFromExternal( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
295 : virtual void CreateStringFromSingleRef( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
296 : virtual void CreateStringFromDoubleRef( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
297 : virtual void CreateStringFromMatrix( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
298 : virtual void CreateStringFromIndex( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
299 : virtual void LocalizeString( OUString& rName ) const; // modify rName - input: exact name
300 :
301 : void AppendErrorConstant( OUStringBuffer& rBuffer, sal_uInt16 nError ) const;
302 :
303 : bool GetToken();
304 : OpCode NextToken();
305 : void PutCode( FormulaTokenRef& );
306 : void Factor();
307 : void RangeLine();
308 : void UnionLine();
309 : void IntersectionLine();
310 : void UnaryLine();
311 : void PostOpLine();
312 : void PowLine();
313 : void MulDivLine();
314 : void AddSubLine();
315 : void ConcatLine();
316 : void CompareLine();
317 : void NotLine();
318 : OpCode Expression();
319 : void PopTokenArray();
320 : void PushTokenArray( FormulaTokenArray*, bool = false );
321 :
322 : bool MergeRangeReference( FormulaToken * * const pCode1, FormulaToken * const * const pCode2 );
323 :
324 : OUString aCorrectedFormula; // autocorrected Formula
325 : OUString aCorrectedSymbol; // autocorrected Symbol
326 :
327 : OpCodeMapPtr mxSymbols; // which symbols are used
328 :
329 : FormulaTokenRef mpToken; // current token
330 : FormulaTokenRef pCurrentFactorToken; // current factor token (of Factor() method)
331 : FormulaTokenArray* pArr;
332 :
333 : FormulaToken** pCode;
334 : FormulaArrayStack* pStack;
335 :
336 : OpCode eLastOp;
337 : short nRecursion; // GetToken() recursions
338 : short nNumFmt; // set during CompileTokenArray()
339 : sal_uInt16 pc; // program counter
340 :
341 : FormulaGrammar::Grammar meGrammar; // The grammar used, language plus convention.
342 :
343 : bool bAutoCorrect; // whether to apply AutoCorrection
344 : bool bCorrected; // AutoCorrection was applied
345 : bool glSubTotal; // if code contains one or more subtotal functions
346 :
347 : bool mbJumpCommandReorder; /// Whether or not to reorder RPN for jump commands.
348 : bool mbStopOnError; /// Whether to stop compilation on first encountered error.
349 :
350 : private:
351 : void InitSymbolsNative() const; /// only SymbolsNative, on first document creation
352 : void InitSymbolsEnglish() const; /// only SymbolsEnglish, maybe later
353 : void InitSymbolsPODF() const; /// only SymbolsPODF, on demand
354 : void InitSymbolsODFF() const; /// only SymbolsODFF, on demand
355 : void InitSymbolsEnglishXL() const; /// only SymbolsEnglishXL, on demand
356 : void InitSymbolsOOXML() const; /// only SymbolsOOXML, on demand
357 :
358 : void loadSymbols( sal_uInt16 nSymbols, FormulaGrammar::Grammar eGrammar, NonConstOpCodeMapPtr& rxMap,
359 : SeparatorType eSepType = SEMICOLON_BASE ) const;
360 :
361 42828 : static inline void ForceArrayOperator( FormulaTokenRef& rCurr, const FormulaTokenRef& rPrev )
362 : {
363 107015 : if ( rPrev && rPrev->HasForceArray() && rCurr->GetOpCode() != ocPush &&
364 43018 : (rCurr->GetType() == svByte || rCurr->GetType() == svJump) &&
365 95 : !rCurr->HasForceArray() )
366 13 : rCurr->SetForceArray( true);
367 42828 : }
368 :
369 : class CurrentFactor
370 : {
371 : FormulaTokenRef pPrevFac;
372 : FormulaCompiler* pCompiler;
373 : CurrentFactor( const CurrentFactor& ) SAL_DELETED_FUNCTION;
374 : CurrentFactor& operator=( const CurrentFactor& ) SAL_DELETED_FUNCTION;
375 : public:
376 31764 : explicit CurrentFactor( FormulaCompiler* pComp )
377 : : pPrevFac( pComp->pCurrentFactorToken )
378 31764 : , pCompiler( pComp )
379 31764 : {}
380 31764 : ~CurrentFactor()
381 31764 : { pCompiler->pCurrentFactorToken = pPrevFac; }
382 : // yes, this operator= may modify the RValue
383 6803 : void operator=( FormulaTokenRef& r )
384 : {
385 6803 : ForceArrayOperator( r, pPrevFac);
386 6803 : pCompiler->pCurrentFactorToken = r;
387 6803 : }
388 1 : void operator=( FormulaToken* p )
389 : {
390 1 : FormulaTokenRef xTemp( p );
391 1 : *this = xTemp;
392 1 : }
393 6802 : operator FormulaTokenRef&()
394 6802 : { return pCompiler->pCurrentFactorToken; }
395 6997 : FormulaToken* operator->()
396 6997 : { return pCompiler->pCurrentFactorToken.operator->(); }
397 1 : operator FormulaToken*()
398 1 : { return operator->(); }
399 : };
400 :
401 :
402 : mutable NonConstOpCodeMapPtr mxSymbolsODFF; // ODFF symbols
403 : mutable NonConstOpCodeMapPtr mxSymbolsPODF; // ODF 1.1 symbols
404 : mutable NonConstOpCodeMapPtr mxSymbolsNative; // native symbols
405 : mutable NonConstOpCodeMapPtr mxSymbolsEnglish; // English symbols
406 : mutable NonConstOpCodeMapPtr mxSymbolsEnglishXL; // English Excel symbols (for VBA formula parsing)
407 : mutable NonConstOpCodeMapPtr mxSymbolsOOXML; // Excel OOXML symbols
408 : };
409 :
410 : } // formula
411 :
412 :
413 : #endif // INCLUDED_FORMULA_FORMULACOMPILER_HXX
414 :
415 :
416 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|