Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #ifndef SC_COMPILER_HXX
21 : #define SC_COMPILER_HXX
22 :
23 : #include <string.h>
24 :
25 : #include <tools/mempool.hxx>
26 : #include "scdllapi.h"
27 : #include "global.hxx"
28 : #include "refdata.hxx"
29 : #include "formula/token.hxx"
30 : #include "formula/grammar.hxx"
31 : #include <unotools/charclass.hxx>
32 : #include <rtl/ustrbuf.hxx>
33 : #include <com/sun/star/sheet/ExternalLinkInfo.hpp>
34 : #include <vector>
35 :
36 : #include <formula/FormulaCompiler.hxx>
37 :
38 : #include <boost/intrusive_ptr.hpp>
39 : #include <boost/unordered_map.hpp>
40 :
41 : // constants and data types also for external modules (ScInterpreter et al)
42 :
43 : #define MAXSTRLEN 1024 /* maximum length of input string of one symbol */
44 :
45 : // flag values of CharTable
46 : #define SC_COMPILER_C_ILLEGAL 0x00000000
47 : #define SC_COMPILER_C_CHAR 0x00000001
48 : #define SC_COMPILER_C_CHAR_BOOL 0x00000002
49 : #define SC_COMPILER_C_CHAR_WORD 0x00000004
50 : #define SC_COMPILER_C_CHAR_VALUE 0x00000008
51 : #define SC_COMPILER_C_CHAR_STRING 0x00000010
52 : #define SC_COMPILER_C_CHAR_DONTCARE 0x00000020
53 : #define SC_COMPILER_C_BOOL 0x00000040
54 : #define SC_COMPILER_C_WORD 0x00000080
55 : #define SC_COMPILER_C_WORD_SEP 0x00000100
56 : #define SC_COMPILER_C_VALUE 0x00000200
57 : #define SC_COMPILER_C_VALUE_SEP 0x00000400
58 : #define SC_COMPILER_C_VALUE_EXP 0x00000800
59 : #define SC_COMPILER_C_VALUE_SIGN 0x00001000
60 : #define SC_COMPILER_C_VALUE_VALUE 0x00002000
61 : #define SC_COMPILER_C_STRING_SEP 0x00004000
62 : #define SC_COMPILER_C_NAME_SEP 0x00008000 // there can be only one! '\''
63 : #define SC_COMPILER_C_CHAR_IDENT 0x00010000 // identifier (built-in function) or reference start
64 : #define SC_COMPILER_C_IDENT 0x00020000 // identifier or reference continuation
65 : #define SC_COMPILER_C_ODF_LBRACKET 0x00040000 // ODF '[' reference bracket
66 : #define SC_COMPILER_C_ODF_RBRACKET 0x00080000 // ODF ']' reference bracket
67 : #define SC_COMPILER_C_ODF_LABEL_OP 0x00100000 // ODF '!!' automatic intersection of labels
68 : #define SC_COMPILER_C_ODF_NAME_MARKER 0x00200000 // ODF '$$' marker that starts a defined (range) name
69 : #define SC_COMPILER_C_CHAR_NAME 0x00400000 // start character of a defined name
70 : #define SC_COMPILER_C_NAME 0x00800000 // continuation character of a defined name
71 : #define SC_COMPILER_C_CHAR_ERRCONST 0x01000000 // start character of an error constant ('#')
72 :
73 : #define SC_COMPILER_FILE_TAB_SEP '#' // 'Doc'#Tab
74 :
75 : class ScDocument;
76 : class ScMatrix;
77 : class ScRangeData;
78 : class ScExternalRefManager;
79 : class ScTokenArray;
80 :
81 : namespace sc {
82 :
83 : class CompileFormulaContext;
84 :
85 : }
86 :
87 : // constants and data types internal to compiler
88 :
89 : /*
90 : OpCode eOp; // OpCode
91 : formula::StackVar eType; // type of data
92 : sal_uInt16 nRefCnt; // reference count
93 : bool bRaw; // not cloned yet and trimmed to real size
94 : */
95 :
96 0 : struct ScRawTokenBase
97 : {
98 : protected:
99 : OpCode eOp;
100 : formula::StackVar eType;
101 : mutable sal_uInt16 nRefCnt;
102 : bool bRaw;
103 : };
104 :
105 : struct ScDoubleRawToken: private ScRawTokenBase
106 : {
107 : public:
108 : union
109 : { // union only to assure alignment identical to ScRawToken
110 : double nValue;
111 : struct {
112 : sal_uInt8 cByte;
113 : bool bHasForceArray;
114 : } sbyte;
115 : };
116 0 : DECL_FIXEDMEMPOOL_NEWDEL( ScDoubleRawToken );
117 : };
118 :
119 : struct ScRawToken: private ScRawTokenBase
120 : {
121 : friend class ScCompiler;
122 : // Friends that use a temporary ScRawToken on the stack (and therefor need
123 : // the private dtor) and know what they're doing..
124 : friend class ScTokenArray;
125 : static sal_uInt16 sbyteOffset();
126 : public:
127 : union {
128 : double nValue;
129 : struct {
130 : sal_uInt8 cByte;
131 : bool bHasForceArray;
132 : } sbyte;
133 : ScComplexRefData aRef;
134 : struct {
135 : sal_uInt16 nFileId;
136 : sal_Unicode cTabName[MAXSTRLEN+1];
137 : ScComplexRefData aRef;
138 : } extref;
139 : struct {
140 : sal_uInt16 nFileId;
141 : sal_Unicode cName[MAXSTRLEN+1];
142 : } extname;
143 : struct {
144 : bool bGlobal;
145 : sal_uInt16 nIndex;
146 : } name;
147 : struct {
148 : rtl_uString* mpData;
149 : rtl_uString* mpDataIgnoreCase;
150 : } sharedstring;
151 : ScMatrix* pMat;
152 : sal_uInt16 nError;
153 : sal_Unicode cStr[ MAXSTRLEN+1 ]; // string (up to 255 characters + 0)
154 : short nJump[ FORMULA_MAXJUMPCOUNT + 1 ]; // If/Chose token
155 : };
156 :
157 : //! other members not initialized
158 0 : ScRawToken() { bRaw = true; }
159 : private:
160 0 : ~ScRawToken() {} //! only delete via Delete()
161 : public:
162 0 : DECL_FIXEDMEMPOOL_NEWDEL( ScRawToken );
163 0 : formula::StackVar GetType() const { return (formula::StackVar) eType; }
164 0 : OpCode GetOpCode() const { return (OpCode) eOp; }
165 0 : void NewOpCode( OpCode e ) { eOp = e; }
166 0 : void IncRef() { nRefCnt++; }
167 0 : void DecRef() { if( !--nRefCnt ) Delete(); }
168 : sal_uInt16 GetRef() const { return nRefCnt; }
169 : SC_DLLPUBLIC void Delete();
170 :
171 : // Use these methods only on tokens that are not part of a token array,
172 : // since the reference count is cleared!
173 : void SetOpCode( OpCode eCode );
174 : void SetString( rtl_uString* pData, rtl_uString* pDataIgoreCase );
175 : void SetSingleReference( const ScSingleRefData& rRef );
176 : void SetDoubleReference( const ScComplexRefData& rRef );
177 : void SetDouble( double fVal );
178 : void SetErrorConstant( sal_uInt16 nErr );
179 :
180 : // These methods are ok to use, reference count not cleared.
181 : void SetName(bool bGlobal, sal_uInt16 nIndex);
182 : void SetExternalSingleRef( sal_uInt16 nFileId, const OUString& rTabName, const ScSingleRefData& rRef );
183 : void SetExternalDoubleRef( sal_uInt16 nFileId, const OUString& rTabName, const ScComplexRefData& rRef );
184 : void SetExternalName( sal_uInt16 nFileId, const OUString& rName );
185 : void SetMatrix( ScMatrix* p );
186 : void SetExternal(const sal_Unicode* pStr);
187 :
188 : /** If the token is a non-external reference, determine if the reference is
189 : valid. If the token is an external reference, return true. Else return
190 : false. Used only in ScCompiler::NextNewToken() to preserve non-existing
191 : sheet names in otherwise valid references.
192 : */
193 : bool IsValidReference() const;
194 :
195 : ScRawToken* Clone() const; // real copy!
196 : formula::FormulaToken* CreateToken() const; // create typified token
197 : void Load( SvStream&, sal_uInt16 nVer );
198 :
199 : static sal_Int32 GetStrLen( const sal_Unicode* pStr ); // as long as a "string" is an array
200 0 : static size_t GetStrLenBytes( sal_Int32 nLen )
201 0 : { return nLen * sizeof(sal_Unicode); }
202 0 : static size_t GetStrLenBytes( const sal_Unicode* pStr )
203 0 : { return GetStrLenBytes( GetStrLen( pStr ) ); }
204 : };
205 :
206 0 : inline void intrusive_ptr_add_ref(ScRawToken* p)
207 : {
208 0 : p->IncRef();
209 0 : }
210 :
211 0 : inline void intrusive_ptr_release(ScRawToken* p)
212 : {
213 0 : p->DecRef();
214 0 : }
215 :
216 : typedef ::boost::intrusive_ptr<ScRawToken> ScRawTokenRef;
217 :
218 : class SC_DLLPUBLIC ScCompiler : public formula::FormulaCompiler
219 : {
220 : public:
221 :
222 : enum ExtendedErrorDetection
223 : {
224 : EXTENDED_ERROR_DETECTION_NONE = 0, // no error on unknown symbols, default (interpreter handles it)
225 : EXTENDED_ERROR_DETECTION_NAME_BREAK, // name error on unknown symbols and break, pCode incomplete
226 : EXTENDED_ERROR_DETECTION_NAME_NO_BREAK // name error on unknown symbols, don't break, continue
227 : };
228 :
229 : struct Convention
230 : {
231 : const formula::FormulaGrammar::AddressConvention meConv;
232 :
233 : Convention( formula::FormulaGrammar::AddressConvention eConvP );
234 : virtual ~Convention();
235 :
236 : virtual void makeRefStr(
237 : OUStringBuffer& rBuffer,
238 : formula::FormulaGrammar::Grammar eGram,
239 : const ScAddress& rPos,
240 : const OUString& rErrRef, const std::vector<OUString>& rTabNames,
241 : const ScComplexRefData& rRef, bool bSingleRef ) const = 0;
242 :
243 : virtual ::com::sun::star::i18n::ParseResult
244 : parseAnyToken( const OUString& rFormula,
245 : sal_Int32 nSrcPos,
246 : const CharClass* pCharClass) const = 0;
247 :
248 : /**
249 : * Parse the symbol string and pick up the file name and the external
250 : * range name.
251 : *
252 : * @return true on successful parse, or false otherwise.
253 : */
254 : virtual bool parseExternalName( const OUString& rSymbol, OUString& rFile, OUString& rName,
255 : const ScDocument* pDoc,
256 : const ::com::sun::star::uno::Sequence<
257 : com::sun::star::sheet::ExternalLinkInfo>* pExternalLinks ) const = 0;
258 :
259 : virtual OUString makeExternalNameStr( const OUString& rFile, const OUString& rName ) const = 0;
260 :
261 : virtual void makeExternalRefStr(
262 : OUStringBuffer& rBuffer, const ScAddress& rPos, const OUString& rFileName,
263 : const OUString& rTabName, const ScSingleRefData& rRef ) const = 0;
264 :
265 : virtual void makeExternalRefStr(
266 : OUStringBuffer& rBuffer, const ScAddress& rPos,
267 : const OUString& rFileName, const std::vector<OUString>& rTabNames,
268 : const OUString& rTabName, const ScComplexRefData& rRef ) const = 0;
269 :
270 : enum SpecialSymbolType
271 : {
272 : /**
273 : * Character between sheet name and address. In OOO A1 this is
274 : * '.', while XL A1 and XL R1C1 this is '!'.
275 : */
276 : SHEET_SEPARATOR,
277 :
278 : /**
279 : * In OOO A1, a sheet name may be prefixed with '$' to indicate an
280 : * absolute sheet position.
281 : */
282 : ABS_SHEET_PREFIX
283 : };
284 : virtual sal_Unicode getSpecialSymbol( SpecialSymbolType eSymType ) const = 0;
285 :
286 : virtual sal_uLong getCharTableFlags( sal_Unicode c, sal_Unicode cLast ) const = 0;
287 :
288 : protected:
289 : const sal_uLong* mpCharTable;
290 : };
291 : friend struct Convention;
292 :
293 : private:
294 :
295 : static CharClass *pCharClassEnglish; // character classification for en_US locale
296 : static const Convention *pConventions[ formula::FormulaGrammar::CONV_LAST ];
297 :
298 : static struct AddInMap
299 : {
300 : const char* pODFF;
301 : const char* pEnglish;
302 : bool bMapDupToInternal; // when writing ODFF
303 : const char* pOriginal; // programmatical name
304 : const char* pUpper; // upper case programmatical name
305 : } maAddInMap[];
306 : static const AddInMap* GetAddInMap();
307 : static size_t GetAddInMapCount();
308 :
309 : ScDocument* pDoc;
310 : ScAddress aPos;
311 :
312 : SvNumberFormatter* mpFormatter;
313 :
314 : // For CONV_XL_OOX, may be set via API by MOOXML filter.
315 : com::sun::star::uno::Sequence<com::sun::star::sheet::ExternalLinkInfo> maExternalLinks;
316 :
317 : sal_Unicode cSymbol[MAXSTRLEN]; // current Symbol
318 : OUString aFormula; // formula source code
319 : sal_Int32 nSrcPos; // tokenizer position (source code)
320 : mutable ScRawTokenRef pRawToken;
321 :
322 : const CharClass* pCharClass; // which character classification is used for parseAnyToken
323 : sal_uInt16 mnPredetectedReference; // reference when reading ODF, 0 (none), 1 (single) or 2 (double)
324 : SCsTAB nMaxTab; // last sheet in document
325 : sal_Int32 mnRangeOpPosInSymbol; // if and where a range operator is in symbol
326 : const Convention *pConv;
327 : ExtendedErrorDetection meExtendedErrorDetection;
328 : bool mbCloseBrackets; // whether to close open brackets automatically, default TRUE
329 : bool mbRewind; // whether symbol is to be rewound to some step during lexical analysis
330 : std::vector<sal_uInt16> maExternalFiles;
331 : std::vector<OUString> maTabNames;
332 :
333 : bool NextNewToken(bool bInArray = false);
334 :
335 : virtual void SetError(sal_uInt16 nError) SAL_OVERRIDE;
336 : sal_Int32 NextSymbol(bool bInArray);
337 : bool IsValue( const OUString& );
338 : bool IsOpCode( const OUString&, bool bInArray );
339 : bool IsOpCode2( const OUString& );
340 : bool IsString();
341 : bool IsReference( const OUString& );
342 : bool IsSingleReference( const OUString& );
343 : bool IsPredetectedReference(const OUString&);
344 : bool IsDoubleReference( const OUString& );
345 : bool IsMacro( const OUString& );
346 : bool IsNamedRange( const OUString& );
347 : bool IsExternalNamedRange( const OUString& rSymbol );
348 : bool IsDBRange( const OUString& );
349 : bool IsColRowName( const OUString& );
350 : bool IsBoolean( const OUString& );
351 : void AutoCorrectParsedSymbol();
352 :
353 : void SetRelNameReference();
354 :
355 : /** Obtain range data for ocName token, global or sheet local.
356 :
357 : Prerequisite: rToken is a FormulaIndexToken so IsGlobal() and
358 : GetIndex() can be called on it. We don't check with RTTI.
359 : */
360 : ScRangeData* GetRangeData( const formula::FormulaToken& pToken ) const;
361 :
362 : static void InitCharClassEnglish();
363 :
364 : public:
365 : ScCompiler( sc::CompileFormulaContext& rCxt, const ScAddress& rPos );
366 :
367 : ScCompiler( ScDocument* pDocument, const ScAddress&);
368 :
369 : ScCompiler( sc::CompileFormulaContext& rCxt, const ScAddress& rPos, ScTokenArray& rArr );
370 :
371 : ScCompiler( ScDocument* pDocument, const ScAddress&,ScTokenArray& rArr);
372 :
373 : virtual ~ScCompiler();
374 :
375 : public:
376 : static void DeInit(); /// all
377 :
378 : // for ScAddress::Format()
379 : static void CheckTabQuotes( OUString& aTabName,
380 : const formula::FormulaGrammar::AddressConvention eConv = formula::FormulaGrammar::CONV_OOO );
381 :
382 : /** Analyzes a string for a 'Doc'#Tab construct, or 'Do''c'#Tab etc..
383 :
384 : @returns the position of the unquoted # hash mark in 'Doc'#Tab, or
385 : -1 if none. */
386 : static sal_Int32 GetDocTabPos( const OUString& rString );
387 :
388 : static bool EnQuote( OUString& rStr );
389 : sal_Unicode GetNativeAddressSymbol( Convention::SpecialSymbolType eType ) const;
390 :
391 : // Check if it is a valid english function name
392 : bool IsEnglishSymbol( const OUString& rName );
393 : bool IsErrorConstant( const OUString& ) const;
394 :
395 : //! _either_ CompileForFAP _or_ AutoCorrection, _not_ both
396 : // #i101512# SetCompileForFAP is in formula::FormulaCompiler
397 0 : void SetAutoCorrection( bool bVal )
398 0 : { bAutoCorrect = bVal; bIgnoreErrors = bVal; }
399 0 : void SetCloseBrackets( bool bVal ) { mbCloseBrackets = bVal; }
400 : void SetRefConvention( const Convention *pConvP );
401 : void SetRefConvention( const formula::FormulaGrammar::AddressConvention eConv );
402 :
403 : static const Convention* GetRefConvention( formula::FormulaGrammar::AddressConvention eConv );
404 :
405 : /// Set symbol map if not empty.
406 : void SetFormulaLanguage( const OpCodeMapPtr & xMap );
407 :
408 : void SetGrammar( const formula::FormulaGrammar::Grammar eGrammar );
409 :
410 : void SetNumberFormatter( SvNumberFormatter* pFormatter );
411 :
412 : private:
413 : /** Set grammar and reference convention from within SetFormulaLanguage()
414 : or SetGrammar().
415 :
416 : @param eNewGrammar
417 : The new grammar to be set and the associated reference convention.
418 :
419 : @param eOldGrammar
420 : The previous grammar that was active before SetFormulaLanguage().
421 : */
422 : void SetGrammarAndRefConvention(
423 : const formula::FormulaGrammar::Grammar eNewGrammar,
424 : const formula::FormulaGrammar::Grammar eOldGrammar );
425 : public:
426 :
427 : /// Set external link info for ScAddress::CONV_XL_OOX.
428 0 : void SetExternalLinks(
429 : const ::com::sun::star::uno::Sequence<
430 : com::sun::star::sheet::ExternalLinkInfo>& rLinks )
431 : {
432 0 : maExternalLinks = rLinks;
433 0 : }
434 :
435 : void CreateStringFromXMLTokenArray( OUString& rFormula, OUString& rFormulaNmsp );
436 :
437 0 : void SetExtendedErrorDetection( ExtendedErrorDetection eVal ) { meExtendedErrorDetection = eVal; }
438 :
439 0 : bool IsCorrected() { return bCorrected; }
440 0 : const OUString& GetCorrectedFormula() { return aCorrectedFormula; }
441 :
442 : /**
443 : * Tokenize formula expression string into an array of tokens.
444 : *
445 : * @param rFormula formula expression to tokenize.
446 : *
447 : * @return heap allocated token array object. The caller <i>must</i>
448 : * manage the life cycle of this object.
449 : */
450 : ScTokenArray* CompileString( const OUString& rFormula );
451 : ScTokenArray* CompileString( const OUString& rFormula, const OUString& rFormulaNmsp );
452 : const ScDocument* GetDoc() const { return pDoc; }
453 0 : const ScAddress& GetPos() const { return aPos; }
454 :
455 : void MoveRelWrap( SCCOL nMaxCol, SCROW nMaxRow );
456 : static void MoveRelWrap( ScTokenArray& rArr, ScDocument* pDoc, const ScAddress& rPos,
457 : SCCOL nMaxCol, SCROW nMaxRow );
458 :
459 : /** If the character is allowed as tested by nFlags (SC_COMPILER_C_...
460 : bits) for all known address conventions. If more than one bit is given
461 : in nFlags, all bits must match. If bTestLetterNumeric is false and
462 : char>=128, no LetterNumeric test is done and false is returned. */
463 : static bool IsCharFlagAllConventions(
464 : OUString const & rStr, sal_Int32 nPos, sal_uLong nFlags, bool bTestLetterNumeric = true );
465 :
466 : private:
467 : // FormulaCompiler
468 : virtual OUString FindAddInFunction( const OUString& rUpperName, bool bLocalFirst ) const SAL_OVERRIDE;
469 : virtual void fillFromAddInCollectionUpperName( NonConstOpCodeMapPtr xMap ) const SAL_OVERRIDE;
470 : virtual void fillFromAddInCollectionEnglishName( NonConstOpCodeMapPtr xMap ) const SAL_OVERRIDE;
471 : virtual void fillFromAddInMap( NonConstOpCodeMapPtr xMap, formula::FormulaGrammar::Grammar _eGrammar ) const SAL_OVERRIDE;
472 : virtual void fillAddInToken(::std::vector< ::com::sun::star::sheet::FormulaOpCodeMapEntry >& _rVec,bool _bIsEnglish) const SAL_OVERRIDE;
473 :
474 : virtual bool HandleExternalReference(const formula::FormulaToken& _aToken) SAL_OVERRIDE;
475 : virtual bool HandleRange() SAL_OVERRIDE;
476 : virtual bool HandleSingleRef() SAL_OVERRIDE;
477 : virtual bool HandleDbData() SAL_OVERRIDE;
478 :
479 : virtual formula::FormulaTokenRef ExtendRangeReference( formula::FormulaToken & rTok1, formula::FormulaToken & rTok2, bool bReuseDoubleRef ) SAL_OVERRIDE;
480 : virtual void CreateStringFromExternal(OUStringBuffer& rBuffer, formula::FormulaToken* pTokenP) const SAL_OVERRIDE;
481 : virtual void CreateStringFromSingleRef(OUStringBuffer& rBuffer,formula::FormulaToken* _pTokenP) const SAL_OVERRIDE;
482 : virtual void CreateStringFromDoubleRef(OUStringBuffer& rBuffer,formula::FormulaToken* _pTokenP) const SAL_OVERRIDE;
483 : virtual void CreateStringFromMatrix( OUStringBuffer& rBuffer, formula::FormulaToken* _pTokenP) const SAL_OVERRIDE;
484 : virtual void CreateStringFromIndex(OUStringBuffer& rBuffer,formula::FormulaToken* _pTokenP) const SAL_OVERRIDE;
485 : virtual void LocalizeString( OUString& rName ) const SAL_OVERRIDE; // modify rName - input: exact name
486 :
487 : /// Access the CharTable flags
488 0 : inline sal_uLong GetCharTableFlags( sal_Unicode c, sal_Unicode cLast )
489 0 : { return c < 128 ? pConv->getCharTableFlags(c, cLast) : 0; }
490 : };
491 :
492 : #endif
493 :
494 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|