summaryrefslogtreecommitdiff
path: root/svl/source/numbers/zforfind.hxx
blob: 43787391ced623acd017b2aa7a285cc0eee4b7c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */

#ifndef INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
#define INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX

#include <com/sun/star/uno/Sequence.hxx>
#include <rtl/ustring.hxx>

class Date;
class SvNumberformat;
class SvNumberFormatter;

#define SV_MAX_ANZ_INPUT_STRINGS  20    // max count of substrings in input scanner

class ImpSvNumberInputScan
{
public:
    explicit ImpSvNumberInputScan( SvNumberFormatter* pFormatter );
    ~ImpSvNumberInputScan();

/*!*/   void ChangeIntl();                      // MUST be called if language changes

    /// set reference date for offset calculation
    void ChangeNullDate( const sal_uInt16 nDay,
                         const sal_uInt16 nMonth,
                         const sal_uInt16 nYear );

    /// convert input string to number
    bool IsNumberFormat( const OUString& rString,            /// input string
                         short& F_Type,                      /// format type (in + out)
                         double& fOutNumber,                 /// value determined (out)
                         const SvNumberformat* pFormat = NULL);  /// optional a number format to which compare against

    /// after IsNumberFormat: get decimal position
    short   GetDecPos() const { return nDecPos; }
    /// after IsNumberFormat: get count of numeric substrings in input string
    sal_uInt16  GetAnzNums() const { return nAnzNums; }

    /// set threshold of two-digit year input
    void    SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; }
    /// get threshold of two-digit year input
    sal_uInt16  GetYear2000() const { return nYear2000; }

    /** Whether input can be forced to ISO 8601 format.

        Depends on locale's date separator and a specific date format order.

        @param eDateFormat
            Evaluated only on first call during one scan process, subsequent
            calls return state of nCanForceToIso8601!

        @see nCanForceToIso8601
     */
    bool CanForceToIso8601( DateFormat eDateFormat );

    void InvalidateDateAcceptancePatterns();

private:
    SvNumberFormatter*  pFormatter;
    OUString* pUpperMonthText;                  //* Array of month names, uppercase
    OUString* pUpperAbbrevMonthText;            //* Array of month names, abbreviated, uppercase
    OUString* pUpperGenitiveMonthText;          //* Array of genitive month names, uppercase
    OUString* pUpperGenitiveAbbrevMonthText;    //* Array of genitive month names, abbreviated, uppercase
    OUString* pUpperPartitiveMonthText;         //* Array of partitive month names, uppercase
    OUString* pUpperPartitiveAbbrevMonthText;   //* Array of partitive month names, abbreviated, uppercase
    OUString* pUpperDayText;                    //* Array of day of week names, uppercase
    OUString* pUpperAbbrevDayText;              //* Array of day of week names, abbreviated, uppercase
    OUString  aUpperCurrSymbol;                 //* Currency symbol, uppercase
    bool    bTextInitialized;                   //* Whether days and months are initialized
    bool    bScanGenitiveMonths;                //* Whether to scan an input for genitive months
    bool    bScanPartitiveMonths;               //* Whether to scan an input for partitive months
    Date* pNullDate;                            //* 30Dec1899
    // Variables for provisional results:
    OUString sStrArray[SV_MAX_ANZ_INPUT_STRINGS]; //* Array of scanned substrings
    bool       IsNum[SV_MAX_ANZ_INPUT_STRINGS]; //* Whether a substring is numeric
    sal_uInt16 nNums[SV_MAX_ANZ_INPUT_STRINGS]; //* Sequence of offsets to numeric strings
    sal_uInt16 nAnzStrings;                     //* Total count of scanned substrings
    sal_uInt16 nAnzNums;                        //* Count of numeric substrings
    bool   bDecSepInDateSeps;                   //* True <=> DecSep in {.,-,/,DateSep}
    sal_uInt8   nMatchedAllStrings;             //* Scan...String() matched all substrings,

    // bit mask of nMatched... constants
    static const sal_uInt8 nMatchedEndString;        // 0x01
    static const sal_uInt8 nMatchedMidString;        // 0x02
    static const sal_uInt8 nMatchedStartString;      // 0x04
    static const sal_uInt8 nMatchedVirgin;           // 0x08
    static const sal_uInt8 nMatchedUsedAsReturn;     // 0x10

    int    nSign;                               // Sign of number
    int    nMonth;                              // Month (1..x) if date
                                                // negative => short format
    short  nMonthPos;                           // 1 = front, 2 = middle
                                                // 3 = end
    sal_uInt16 nTimePos;                        // Index of first time separator (+1)
    short  nDecPos;                             // Index of substring containing "," (+1)
    short  nNegCheck;                           // '( )' for negative
    short  nESign;                              // Sign of exponent
    short  nAmPm;                               // +1 AM, -1 PM, 0 if none
    short  nLogical;                            // -1 => False, 1 => True
    sal_uInt16 nThousand;                       // Count of group (AKA thousand) separators
    sal_uInt16 nPosThousandString;              // Position of concatenated 000,000,000 string
    short  eScannedType;                        // Scanned type
    short  eSetType;                            // Preset Type

    sal_uInt16 nStringScanNumFor;               // Fixed strings recognized in
                                                // pFormat->NumFor[nNumForStringScan]
    short  nStringScanSign;                     // Sign resulting of FixString
    sal_uInt16 nYear2000;                       // Two-digit threshold
                                                // Year as 20xx
                                                // default 18
                                                // number <= nYear2000 => 20xx
                                                // number >  nYear2000 => 19xx
    sal_uInt16  nTimezonePos;                   // Index of timezone separator (+1)

    /** State of ISO 8601 detection.

        0:= don't know yet
        1:= no
        2:= yes, <=2 digits in year
        3:= yes,   3 digits in year
        4:= yes, >=4 digits in year

        @see MayBeIso8601()
     */
    sal_uInt8    nMayBeIso8601;

    /** State of ISO 8601 can be forced.

        0:= don't know yet
        1:= no
        2:= yes

        @see CanForceToIso8601()
     */
    sal_uInt8   nCanForceToIso8601;

    /** State of dd-month-yy or yy-month-dd detection, with month name.

        0:= don't know yet
        1:= no
        2:= yes, dd-month-yy
        3:= yes, yy-month-dd

        @see MayBeMonthDate()
     */
    sal_uInt8   nMayBeMonthDate;

    /** Input matched this locale dependent date acceptance pattern.
        -2 if not checked yet, -1 if no match, >=0 matched pattern.

        @see IsAcceptedDatePattern()
     */
    sal_Int32   nAcceptedDatePattern;
    com::sun::star::uno::Sequence< OUString >  sDateAcceptancePatterns;

    /** If input matched a date acceptance pattern that starts at input
        particle sStrArray[nDatePatternStart].

        @see IsAcceptedDatePattern()
     */
    sal_uInt16  nDatePatternStart;

    /** Count of numbers that matched the accepted pattern, if any, else 0.

        @see GetDatePatternNumbers()
     */
    sal_uInt16  nDatePatternNumbers;

    void Reset();                               // Reset all variables before start of analysis

    void InitText();                            // Init of months and days of week

    // Convert string to double.
    // Only simple unsigned floating point values without any error detection,
    // decimal separator has to be '.'
    // If bForceFraction==true the string is taken to be the fractional part
    // of 0.1234 without the leading 0. (thus being just "1234").
    static double StringToDouble( const OUString& rStr,
                                  bool bForceFraction = false );

    // Next number/string symbol
    static bool NextNumberStringSymbol( const sal_Unicode*& pStr,
                                        OUString& rSymbol );

    // Concatenate ,000,23 blocks
    // in input to 000123
    bool SkipThousands( const sal_Unicode*& pStr, OUString& rSymbol );

    // Divide numbers/strings into
    // arrays and variables above.
    // Leading blanks and blanks
    // after numbers are thrown away
    void NumberStringDivision( const OUString& rString );


    /** Whether rString contains word (!) rWhat at nPos.
        rWhat will not be matched if it is a substring of a word.
     */
    bool StringContainsWord( const OUString& rWhat,
                             const OUString& rString,
                             sal_Int32 nPos );

    // optimized substring versions

    // Whether rString contains rWhat at nPos
    static inline bool StringContains( const OUString& rWhat,
                                       const OUString& rString,
                                       sal_Int32 nPos )
        {
            if (rWhat.isEmpty() || rString.getLength() <= nPos)
            {
                return false;
            }
            // mostly used with one character
            if ( rWhat[ 0 ] != rString[ nPos ] )
            {
                return false;
            }
            return StringContainsImpl( rWhat, rString, nPos );
        }

    // Whether pString contains rWhat at nPos
    static inline bool StringPtrContains( const OUString& rWhat,
                                          const sal_Unicode* pString,
                                          sal_Int32 nPos ) // nPos MUST be a valid offset from pString
        {
            // mostly used with one character
            if ( rWhat[ 0 ] != pString[ nPos ] )
            {
                return false;
            }
            return StringPtrContainsImpl( rWhat, pString, nPos );
        }

    //! DO NOT use directly
    static bool StringContainsImpl( const OUString& rWhat,
                                    const OUString& rString,
                                    sal_Int32 nPos );
    //! DO NOT use directly
    static bool StringPtrContainsImpl( const OUString& rWhat,
                                       const sal_Unicode* pString,
                                       sal_Int32 nPos );

    // Skip a special character
    static inline bool SkipChar( sal_Unicode c,
                                 const OUString& rString,
                                 sal_Int32& nPos );

    // Skip blank
    static inline void SkipBlanks( const OUString& rString,
                                   sal_Int32& nPos );

    // Jump over rWhat in rString at nPos
    static inline bool SkipString( const OUString& rWhat,
                                   const OUString& rString,
                                   sal_Int32& nPos );

    // Recognizes exactly ,111 as group separator
    inline bool GetThousandSep( const OUString& rString,
                                sal_Int32& nPos,
                                sal_uInt16 nStringPos );
    // Get boolean value
    short GetLogical( const OUString& rString );

    // Get month and advance string position
    short GetMonth( const OUString& rString,
                    sal_Int32& nPos );

    // Get day of week and advance string position
    int GetDayOfWeek( const OUString& rString,
                      sal_Int32& nPos );

    // Get currency symbol and advance string position
    bool GetCurrency( const OUString& rString,
                      sal_Int32& nPos,
                      const SvNumberformat* pFormat = NULL ); // optional number format to match against

    // Get symbol AM or PM and advance string position
    bool GetTimeAmPm( const OUString& rString,
                      sal_Int32& nPos );

    // Get decimal separator and advance string position
    inline bool GetDecSep( const OUString& rString,
                           sal_Int32& nPos );

    // Get hundredth seconds separator and advance string position
    inline bool GetTime100SecSep( const OUString& rString,
                                  sal_Int32& nPos );

    // Get sign  and advance string position
    // Including special case '('
    int GetSign( const OUString& rString,
                 sal_Int32& nPos );

    // Get sign of exponent and advance string position
    static short GetESign( const OUString& rString,
                           sal_Int32& nPos );

    // Get next number as array offset
    inline bool GetNextNumber( sal_uInt16& i,
                               sal_uInt16& j );

    /** Converts time -> double (only decimals)

        @return TRUE if time, FALSE if not (e.g. hours >12 with AM/PM)
     */
    bool GetTimeRef( double& fOutNumber,                     // result as double
                     sal_uInt16 nIndex,                      // Index of hour in input
                     sal_uInt16 nAnz );                      // Count of time substrings in input
    sal_uInt16 ImplGetDay  ( sal_uInt16 nIndex );   // Day input, 0 if no match
    sal_uInt16 ImplGetMonth( sal_uInt16 nIndex );   // Month input, zero based return, NumberOfMonths if no match
    sal_uInt16 ImplGetYear ( sal_uInt16 nIndex );   // Year input, 0 if no match

    // Conversion of date to number
    bool GetDateRef( double& fDays,                          // OUT: days diff to null date
                     sal_uInt16& nCounter,                   // Count of date substrings
                     const SvNumberformat* pFormat = NULL ); // optional number format to match against

    // Analyze start of string
    bool ScanStartString( const OUString& rString,
                          const SvNumberformat* pFormat = NULL );

    // Analyze middle substring
    bool ScanMidString( const OUString& rString,
                        sal_uInt16 nStringPos,
                        const SvNumberformat* pFormat = NULL );


    // Analyze end of string
    bool ScanEndString( const OUString& rString,
                        const SvNumberformat* pFormat = NULL );

    // Compare rString to substring of array indexed by nString
    // nString == 0xFFFF => last substring
    bool ScanStringNumFor( const OUString& rString,
                           sal_Int32 nPos,
                           const SvNumberformat* pFormat,
                           sal_uInt16 nString,
                           bool bDontDetectNegation = false );

    // if nMatchedAllStrings set nMatchedUsedAsReturn and return true,
    // else do nothing and return false
    bool MatchedReturn();

    //! Be sure that the string to be analyzed is already converted to upper
    //! case and if it contained native humber digits that they are already
    //! converted to ASCII.

    // Main anlyzing function
    bool IsNumberFormatMain( const OUString& rString,
                             const SvNumberformat* pFormat = NULL);    // optional number format to match against

    static inline bool MyIsdigit( sal_Unicode c );

    /** Whether input matches locale dependent date acceptance pattern.

        @param nStartPatternAt
               The pattern matching starts at input particle
               sStrArray[nStartPatternAt].

        NOTE: once called the result is remembered, subsequent calls with
        different parameters do not check for a match and do not lead to a
        different result.
     */
    bool IsAcceptedDatePattern( sal_uInt16 nStartPatternAt );

    /** Sets (not advances!) rPos to sStrArray[nParticle].getLength() if string
        matches separator in pattern at nParticle.

        @returns TRUE if separator matched.
     */
    bool SkipDatePatternSeparator( sal_uInt16 nParticle, sal_Int32 & rPos );

    /** Returns count of numbers in accepted date pattern.
     */
    sal_uInt16 GetDatePatternNumbers();

    /** Obtain order of accepted date pattern coded as, for example,
        ('D'<<16)|('M'<<8)|'Y'
    */
    sal_uInt32 GetDatePatternOrder();

    /** Obtain date format order, from accepted date pattern if available or
        otherwise the locale's default order.
     */
    DateFormat GetDateOrder();

    /** Whether input may be an ISO 8601 date format, yyyy-mm-dd...

        Checks if input has at least 3 numbers for yyyy-mm-dd and the separator
        is '-', and 1<=mm<=12 and 1<=dd<=31.

        @see nMayBeIso8601
     */
    bool MayBeIso8601();

    /** Whether input may be a dd-month-yy format, with month name, not
        number.

        @see nMayBeMonthDate
     */
    bool MayBeMonthDate();
};

#endif // INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */