Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #ifndef INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
21 : #define INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
22 :
23 : #include <com/sun/star/uno/Sequence.hxx>
24 : #include <rtl/ustring.hxx>
25 :
26 : class Date;
27 : class SvNumberformat;
28 : class SvNumberFormatter;
29 :
30 : #define SV_MAX_ANZ_INPUT_STRINGS 20 // max count of substrings in input scanner
31 :
32 : class ImpSvNumberInputScan
33 : {
34 : public:
35 : ImpSvNumberInputScan( SvNumberFormatter* pFormatter );
36 : ~ImpSvNumberInputScan();
37 :
38 : /*!*/ void ChangeIntl(); // MUST be called if language changes
39 :
40 : /// set reference date for offset calculation
41 : void ChangeNullDate( const sal_uInt16 nDay,
42 : const sal_uInt16 nMonth,
43 : const sal_uInt16 nYear );
44 :
45 : /// convert input string to number
46 : bool IsNumberFormat( const OUString& rString, /// input string
47 : short& F_Type, /// format type (in + out)
48 : double& fOutNumber, /// value determined (out)
49 : const SvNumberformat* pFormat = NULL); /// optional a number format to which compare against
50 :
51 : /// after IsNumberFormat: get decimal position
52 104 : short GetDecPos() const { return nDecPos; }
53 : /// after IsNumberFormat: get count of numeric substrings in input string
54 0 : sal_uInt16 GetAnzNums() const { return nAnzNums; }
55 :
56 : /// set threshold of two-digit year input
57 9180 : void SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; }
58 : /// get threshold of two-digit year input
59 88 : sal_uInt16 GetYear2000() const { return nYear2000; }
60 :
61 : /** Whether input can be forced to ISO 8601 format.
62 :
63 : Depends on locale's date separator and a specific date format order.
64 :
65 : @param eDateFormat
66 : Evaluated only on first call during one scan process, subsequent
67 : calls return state of nCanForceToIso8601!
68 :
69 : @see nCanForceToIso8601
70 : */
71 : bool CanForceToIso8601( DateFormat eDateFormat );
72 :
73 : void InvalidateDateAcceptancePatterns();
74 :
75 : private:
76 : SvNumberFormatter* pFormatter;
77 : OUString* pUpperMonthText; //* Array of month names, uppercase
78 : OUString* pUpperAbbrevMonthText; //* Array of month names, abbreviated, uppercase
79 : OUString* pUpperGenitiveMonthText; //* Array of genitive month names, uppercase
80 : OUString* pUpperGenitiveAbbrevMonthText; //* Array of genitive month names, abbreviated, uppercase
81 : OUString* pUpperPartitiveMonthText; //* Array of partitive month names, uppercase
82 : OUString* pUpperPartitiveAbbrevMonthText; //* Array of partitive month names, abbreviated, uppercase
83 : OUString* pUpperDayText; //* Array of day of week names, uppercase
84 : OUString* pUpperAbbrevDayText; //* Array of day of week names, abbreviated, uppercase
85 : OUString aUpperCurrSymbol; //* Currency symbol, uppercase
86 : bool bTextInitialized; //* Whether days and months are initialized
87 : bool bScanGenitiveMonths; //* Whether to scan an input for genitive months
88 : bool bScanPartitiveMonths; //* Whether to scan an input for partitive months
89 : Date* pNullDate; //* 30Dec1899
90 : // Variables for provisional results:
91 : OUString sStrArray[SV_MAX_ANZ_INPUT_STRINGS]; //* Array of scanned substrings
92 : bool IsNum[SV_MAX_ANZ_INPUT_STRINGS]; //* Whether a substring is numeric
93 : sal_uInt16 nNums[SV_MAX_ANZ_INPUT_STRINGS]; //* Sequence of offsets to numeric strings
94 : sal_uInt16 nAnzStrings; //* Total count of scanned substrings
95 : sal_uInt16 nAnzNums; //* Count of numeric substrings
96 : bool bDecSepInDateSeps; //* True <=> DecSep in {.,-,/,DateSep}
97 : sal_uInt8 nMatchedAllStrings; //* Scan...String() matched all substrings,
98 :
99 : // bit mask of nMatched... constants
100 : static const sal_uInt8 nMatchedEndString; // 0x01
101 : static const sal_uInt8 nMatchedMidString; // 0x02
102 : static const sal_uInt8 nMatchedStartString; // 0x04
103 : static const sal_uInt8 nMatchedVirgin; // 0x08
104 : static const sal_uInt8 nMatchedUsedAsReturn; // 0x10
105 :
106 : int nSign; // Sign of number
107 : int nMonth; // Month (1..x) if date
108 : // negative => short format
109 : short nMonthPos; // 1 = front, 2 = middle
110 : // 3 = end
111 : sal_uInt16 nTimePos; // Index of first time separator (+1)
112 : short nDecPos; // Index of substring containing "," (+1)
113 : short nNegCheck; // '( )' for negative
114 : short nESign; // Sign of exponent
115 : short nAmPm; // +1 AM, -1 PM, 0 if none
116 : short nLogical; // -1 => False, 1 => True
117 : sal_uInt16 nThousand; // Count of group (AKA thousand) separators
118 : sal_uInt16 nPosThousandString; // Position of concatenaded 000,000,000 string
119 : short eScannedType; // Scanned type
120 : short eSetType; // Preset Type
121 :
122 : sal_uInt16 nStringScanNumFor; // Fixed strings recognized in
123 : // pFormat->NumFor[nNumForStringScan]
124 : short nStringScanSign; // Sign resulting of FixString
125 : sal_uInt16 nYear2000; // Two-digit threshold
126 : // Year as 20xx
127 : // default 18
128 : // number <= nYear2000 => 20xx
129 : // number > nYear2000 => 19xx
130 : sal_uInt16 nTimezonePos; // Index of timezone separator (+1)
131 :
132 : /** State of ISO 8601 detection.
133 :
134 : 0:= don't know yet
135 : 1:= no
136 : 2:= yes, <=2 digits in year
137 : 3:= yes, 3 digits in year
138 : 4:= yes, >=4 digits in year
139 :
140 : @see MayBeIso8601()
141 : */
142 : sal_uInt8 nMayBeIso8601;
143 :
144 : /** State of ISO 8601 can be forced.
145 :
146 : 0:= don't know yet
147 : 1:= no
148 : 2:= yes
149 :
150 : @see CanForceToIso8601()
151 : */
152 : sal_uInt8 nCanForceToIso8601;
153 :
154 : /** State of dd-month-yy or yy-month-dd detection, with month name.
155 :
156 : 0:= don't know yet
157 : 1:= no
158 : 2:= yes, dd-month-yy
159 : 3:= yes, yy-month-dd
160 :
161 : @see MayBeMonthDate()
162 : */
163 : sal_uInt8 nMayBeMonthDate;
164 :
165 : /** Input matched this locale dependent date acceptance pattern.
166 : -2 if not checked yet, -1 if no match, >=0 matched pattern.
167 :
168 : @see IsAcceptedDatePattern()
169 : */
170 : sal_Int32 nAcceptedDatePattern;
171 : com::sun::star::uno::Sequence< OUString > sDateAcceptancePatterns;
172 :
173 : /** If input matched a date acceptance pattern that starts at input
174 : particle sStrArray[nDatePatternStart].
175 :
176 : @see IsAcceptedDatePattern()
177 : */
178 : sal_uInt16 nDatePatternStart;
179 :
180 : /** Count of numbers that matched the accepted pattern, if any, else 0.
181 :
182 : @see GetDatePatternNumbers()
183 : */
184 : sal_uInt16 nDatePatternNumbers;
185 :
186 : void Reset(); // Reset all variables before start of analysis
187 :
188 : void InitText(); // Init of months and days of week
189 :
190 : // Convert string to double.
191 : // Only simple unsigned floating point values without any error detection,
192 : // decimal separator has to be '.'
193 : // If bForceFraction==true the string is taken to be the fractional part
194 : // of 0.1234 without the leading 0. (thus being just "1234").
195 : double StringToDouble( const OUString& rStr,
196 : bool bForceFraction = false );
197 :
198 : // Next number/string symbol
199 : bool NextNumberStringSymbol( const sal_Unicode*& pStr,
200 : OUString& rSymbol );
201 :
202 : // Concatenate ,000,23 blocks
203 : // in input to 000123
204 : bool SkipThousands( const sal_Unicode*& pStr, OUString& rSymbol );
205 :
206 : // Divide numbers/strings into
207 : // arrays and variables above.
208 : // Leading blanks and blanks
209 : // after numbers are thrown away
210 : void NumberStringDivision( const OUString& rString );
211 :
212 :
213 : /** Whether rString contains word (!) rWhat at nPos.
214 : rWhat will not be matched if it is a substring of a word.
215 : */
216 : bool StringContainsWord( const OUString& rWhat,
217 : const OUString& rString,
218 : sal_Int32 nPos );
219 :
220 : // optimized substring versions
221 :
222 : // Whether rString contains rWhat at nPos
223 14542 : static inline bool StringContains( const OUString& rWhat,
224 : const OUString& rString,
225 : sal_Int32 nPos )
226 : {
227 14542 : if (rWhat.isEmpty() || rString.getLength() <= nPos)
228 : {
229 2932 : return false;
230 : }
231 : // mostly used with one character
232 11610 : if ( rWhat[ 0 ] != rString[ nPos ] )
233 : {
234 10362 : return false;
235 : }
236 1248 : return StringContainsImpl( rWhat, rString, nPos );
237 : }
238 :
239 : // Whether pString contains rWhat at nPos
240 0 : static inline bool StringPtrContains( const OUString& rWhat,
241 : const sal_Unicode* pString,
242 : sal_Int32 nPos ) // nPos MUST be a valid offset from pString
243 : {
244 : // mostly used with one character
245 0 : if ( rWhat[ 0 ] != pString[ nPos ] )
246 : {
247 0 : return false;
248 : }
249 0 : return StringPtrContainsImpl( rWhat, pString, nPos );
250 : }
251 :
252 : //! DO NOT use directly
253 : static bool StringContainsImpl( const OUString& rWhat,
254 : const OUString& rString,
255 : sal_Int32 nPos );
256 : //! DO NOT use directly
257 : static bool StringPtrContainsImpl( const OUString& rWhat,
258 : const sal_Unicode* pString,
259 : sal_Int32 nPos );
260 :
261 : // Skip a special character
262 : static inline bool SkipChar( sal_Unicode c,
263 : const OUString& rString,
264 : sal_Int32& nPos );
265 :
266 : // Skip blank
267 : static inline void SkipBlanks( const OUString& rString,
268 : sal_Int32& nPos );
269 :
270 : // Jump over rWhat in rString at nPos
271 : static inline bool SkipString( const OUString& rWhat,
272 : const OUString& rString,
273 : sal_Int32& nPos );
274 :
275 : // Recognizes exactly ,111 as group separator
276 : inline bool GetThousandSep( const OUString& rString,
277 : sal_Int32& nPos,
278 : sal_uInt16 nStringPos );
279 : // Get boolean value
280 : short GetLogical( const OUString& rString );
281 :
282 : // Get month and advance string position
283 : short GetMonth( const OUString& rString,
284 : sal_Int32& nPos );
285 :
286 : // Get day of week and advance string position
287 : int GetDayOfWeek( const OUString& rString,
288 : sal_Int32& nPos );
289 :
290 : // Get currency symbol and advance string position
291 : bool GetCurrency( const OUString& rString,
292 : sal_Int32& nPos,
293 : const SvNumberformat* pFormat = NULL ); // optional number format to match against
294 :
295 : // Get symbol AM or PM and advance string position
296 : bool GetTimeAmPm( const OUString& rString,
297 : sal_Int32& nPos );
298 :
299 : // Get decimal separator and advance string position
300 : inline bool GetDecSep( const OUString& rString,
301 : sal_Int32& nPos );
302 :
303 : // Get hundredth seconds separator and advance string position
304 : inline bool GetTime100SecSep( const OUString& rString,
305 : sal_Int32& nPos );
306 :
307 : // Get sign and advance string position
308 : // Including special case '('
309 : int GetSign( const OUString& rString,
310 : sal_Int32& nPos );
311 :
312 : // Get sign of exponent and advance string position
313 : short GetESign( const OUString& rString,
314 : sal_Int32& nPos );
315 :
316 : // Get next number as array offset
317 : inline bool GetNextNumber( sal_uInt16& i,
318 : sal_uInt16& j );
319 :
320 : /** Converts time -> double (only decimals)
321 :
322 : @return TRUE if time, FALSE if not (e.g. hours >12 with AM/PM)
323 : */
324 : bool GetTimeRef( double& fOutNumber, // result as double
325 : sal_uInt16 nIndex, // Index of hour in input
326 : sal_uInt16 nAnz ); // Count of time substrings in input
327 : sal_uInt16 ImplGetDay ( sal_uInt16 nIndex ); // Day input, 0 if no match
328 : sal_uInt16 ImplGetMonth( sal_uInt16 nIndex ); // Month input, zero based return, NumberOfMonths if no match
329 : sal_uInt16 ImplGetYear ( sal_uInt16 nIndex ); // Year input, 0 if no match
330 :
331 : // Conversion of date to number
332 : bool GetDateRef( double& fDays, // OUT: days diff to null date
333 : sal_uInt16& nCounter, // Count of date substrings
334 : const SvNumberformat* pFormat = NULL ); // optional number format to match against
335 :
336 : // Analyze start of string
337 : bool ScanStartString( const OUString& rString,
338 : const SvNumberformat* pFormat = NULL );
339 :
340 : // Analyze middle substring
341 : bool ScanMidString( const OUString& rString,
342 : sal_uInt16 nStringPos,
343 : const SvNumberformat* pFormat = NULL );
344 :
345 :
346 : // Analyze end of string
347 : bool ScanEndString( const OUString& rString,
348 : const SvNumberformat* pFormat = NULL );
349 :
350 : // Compare rString to substring of array indexed by nString
351 : // nString == 0xFFFF => last substring
352 : bool ScanStringNumFor( const OUString& rString,
353 : sal_Int32 nPos,
354 : const SvNumberformat* pFormat,
355 : sal_uInt16 nString,
356 : bool bDontDetectNegation = false );
357 :
358 : // if nMatchedAllStrings set nMatchedUsedAsReturn and return true,
359 : // else do nothing and return false
360 : bool MatchedReturn();
361 :
362 : //! Be sure that the string to be analyzed is already converted to upper
363 : //! case and if it contained native humber digits that they are already
364 : //! converted to ASCII.
365 :
366 : // Main anlyzing function
367 : bool IsNumberFormatMain( const OUString& rString,
368 : const SvNumberformat* pFormat = NULL); // optional number format to match against
369 :
370 : static inline bool MyIsdigit( sal_Unicode c );
371 :
372 : /** Whether input matches locale dependent date acceptance pattern.
373 :
374 : @param nStartPatternAt
375 : The pattern matching starts at input particle
376 : sStrArray[nStartPatternAt].
377 :
378 : NOTE: once called the result is remembered, subsequent calls with
379 : different parameters do not check for a match and do not lead to a
380 : different result.
381 : */
382 : bool IsAcceptedDatePattern( sal_uInt16 nStartPatternAt );
383 :
384 : /** Sets (not advances!) rPos to sStrArray[nParticle].getLength() if string
385 : matches separator in pattern at nParticle.
386 :
387 : @returns TRUE if separator matched.
388 : */
389 : bool SkipDatePatternSeparator( sal_uInt16 nParticle, sal_Int32 & rPos );
390 :
391 : /** Returns count of numbers in accepted date pattern.
392 : */
393 : sal_uInt16 GetDatePatternNumbers();
394 :
395 : /** Obtain order of accepted date pattern coded as, for example,
396 : ('D'<<16)|('M'<<8)|'Y'
397 : */
398 : sal_uInt32 GetDatePatternOrder();
399 :
400 : /** Obtain date format order, from accepted date pattern if available or
401 : otherwise the locale's default order.
402 : */
403 : DateFormat GetDateOrder();
404 :
405 : /** Whether input may be an ISO 8601 date format, yyyy-mm-dd...
406 :
407 : Checks if input has at least 3 numbers for yyyy-mm-dd and the separator
408 : is '-', and 1<=mm<=12 and 1<=dd<=31.
409 :
410 : @see nMayBeIso8601
411 : */
412 : bool MayBeIso8601();
413 :
414 : /** Whether input may be a dd-month-yy format, with month name, not
415 : number.
416 :
417 : @see nMayBeMonthDate
418 : */
419 : bool MayBeMonthDate();
420 : };
421 :
422 : #endif // INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
423 :
424 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|