summaryrefslogtreecommitdiff
path: root/offapi/com/sun/star/i18n/XCharacterClassification.idl
blob: 2d8da610f4f759be028554421a7e7c60eae22efb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */


module com { module sun { module star { module i18n {


/*

Possible tokens to be parsed with  parse...Token():

UPASCALPHA=[A-Z]
LOASCALPHA=[a-z]
ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
ASCDIGIT=[0-9]
ASC_UNDERSCORE='_'
ASC_SPACE=' '
ASC_HT='\0x9'
ASC_VT='\0xb'
ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
ASC_DBL_QUOTE=\";
ASC_QUOTE=\'
UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)

ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
ALNUM=ALPHA|DIGIT
CHAR=anycharacter
WS=isWhiteSpace()
SIGN='+'|'-'
DECSEP=<locale dependent decimal separator>
GRPSEP=<locale dependent thousand separator>
EXPONENT=(E|e)[SIGN]1*ASC_DIGIT

IDENTIFIER=ALPHA *ALNUM
UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
ANY_NAME=1*(ALNUM|DEFCHARS)
SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
ASC_NUMBER=[SIGN]*(1*ASC_DIGIT  *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
NUMBER=[SIGN]*(1*DIGIT  *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]

*/


/**
    Character classification (upper, lower, digit, letter, number, ...)
    and generic Unicode enabled parser.
 */

published interface XCharacterClassification : com::sun::star::uno::XInterface
{
    /** Convert lower case alpha to upper case alpha, starting at
        position <em>nPos</em> for <em>nCount</em> code points.
     */
    string   toUpper( [in] string aText, [in] long nPos, [in] long nCount,
                      [in] com::sun::star::lang::Locale aLocale );

    /** Convert upper case alpha to lower case alpha, starting at
        position <em>nPos</em> for <em>nCount</em> code points.
     */
    string   toLower( [in] string aText, [in] long nPos, [in] long nCount,
                      [in] com::sun::star::lang::Locale aLocale );

    /** Convert to title case, starting at
        position <em>nPos</em> for <em>nCount</em> code points.
     */
    string   toTitle( [in] string aText, [in] long nPos, [in] long nCount,
                      [in] com::sun::star::lang::Locale aLocale );

    /// Get UnicodeType of character at position <em>nPos</em>.
    short    getType( [in] string aText, [in] long nPos );

    /** Get DirectionProperty of character at position
        <em>nPos</em>.
     */
    short    getCharacterDirection( [in] string aText, [in] long nPos );

    /// Get UnicodeScript of character at position <em>nPos</em>.
    short    getScript( [in] string aText, [in] long nPos );

    /// Get KCharacterType of character at position <em>nPos</em>.
    long getCharacterType( [in] string aText, [in] long nPos,
                           [in] com::sun::star::lang::Locale aLocale );

    /** Get accumulated KCharacterTypes of string starting
        at position <em>nPos</em> of length <em>nCount</em> code points.

        @returns
            A number with appropriate flags set to indicate what type of
            characters the string contains, each flag value being one of
            KCharacterType values.

        @note The accumulated bits of several characters are meaningless
            as soon as characters of different classifications are
            involved, which even may have a common subset like
            KCharacterType::LETTER or KCharacterType::PRINTABLE, unless
            it is to be determined what overall character properties are
            present in the string. Use getCharacterType() of single
            characters instead and handle bits as needed if sets of
            character properties are to be obtained.

    */
    long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
                        [in] com::sun::star::lang::Locale aLocale );


    /**
        Parse a string for a token starting at position <em>nPos</em>.

        <p> A name or identifier must match the
        KParseTokens criteria passed in
        <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
        additionally contain characters of
        <em>aUserDefinedCharactersStart</em> and/or
        <em>aUserDefinedCharactersCont</em>. </p>


        @returns
            A filled ParseResult structure. If no
            unambiguous token could be parsed,
            ParseResult::TokenType will be set to
            <b>0</b> (zero), other fields will contain the values parsed
            so far.

            <p> If a token may represent either a numeric value or a
            name according to the passed Start/Cont-Flags/Chars, both
            KParseType::ASC_NUM (or
            KParseType::UNI_NUM) and
            KParseType::IDENTNAME are set in
            ParseResult::TokenType.

        @param  aText
            Text to be parsed.

        @param  nPos
            Position where parsing starts.

        @param  aLocale
            The locale, for example, for decimal and group separator or
            character type determination.

        @param  nStartCharFlags
            A set of KParseTokens constants determining the
            allowed characters a name or identifier may start with.

        @param  aUserDefinedCharactersStart
            A set of additionally allowed characters a name or
            identifier may start with.

        @param  nContCharFlags
            A set of KParseTokens constants determining the
            allowed characters a name or identifier may continue with.

        @param  aUserDefinedCharactersCont
            A set of additionally allowed characters a name or
            identifier may continue with.

        @code{.cpp}
            using namespace ::com::sun::star::i18n;
            // First character of an identifier may be any alphabetic or underscore.
            sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
            // Continuing characters may be any alphanumeric or underscore or dot.
            sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
            // No further characters assumed to be contained in an identifier
            OUString aEmptyString;
            // Parse any token.
            ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
                nStartFlags, aEmptyString, nContFlags, aEmptyString );
            // Get parsed token.
            if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
                fValue = rRes.Value;
            if ( rRes.TokenType & KParseType::IDENTNAME )
                aName = aText.copy( nPos, rRes.EndPos - nPos );
            else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
                aName = rRes.DequotedNameOrString;
            else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
                aString = rRes.DequotedNameOrString;
            else if ( rRes.TokenType & KParseType::BOOLEAN )
                aSymbol = aText.copy( nPos, rRes.EndPos - nPos );
            else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
                aSymbol = aText.copy( nPos, rRes.EndPos - nPos );
        @endcode
     */

    ParseResult parseAnyToken(
                            [in] string aText,
                            [in] long nPos,
                            [in] com::sun::star::lang::Locale aLocale,
                            [in] long nStartCharFlags,
                            [in] string aUserDefinedCharactersStart,
                            [in] long nContCharFlags,
                            [in] string aUserDefinedCharactersCont
                            );

    /**
        Parse a string for a token of type <em>nTokenType</em> starting
        at position <em>nPos</em>.

        <p> Other parameters are the same as in
        parseAnyToken(). If the actual token does not
        match the passed <em>nTokenType</em> a
        ParseResult::TokenType set to <b>0</b> (zero)
        is returned. </p>

        @param  nTokenType
            One or more of the KParseType constants.

        @param aText
            See #parseAnyToken
        @param nPos
            See #parseAnyToken
        @param aLocale
            See #parseAnyToken
        @param nStartCharFlags
            See #parseAnyToken
        @param aUserDefinedCharactersStart
            See #parseAnyToken
        @param nContCharFlags
            See #parseAnyToken
        @param aUserDefinedCharactersCont
            See #parseAnyToken

        @code{.cpp}
            // Determine if a given name is a valid name (not quoted) and contains
            // only allowed characters.
            using namespace ::com::sun::star::i18n;
            // First character of an identifier may be any alphanumeric or underscore.
            sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
            // No further characters assumed to be contained in an identifier start.
            OUString aEmptyString;
            // Continuing characters may be any alphanumeric or underscore.
            sal_Int32 nContFlags = nStartFlags;
            // Additionally, continuing characters may contain a blank.
            OUString aContChars( " " );
            // Parse predefined (must be an IDENTNAME) token.
            ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
                nStartFlags, aEmptyString, nContFlags, aContChars );
            // Test if it is an identifier name and if it only is one
            // and no more else is following it.
            bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
        @endcode
     */

    ParseResult parsePredefinedToken(
                            [in] long nTokenType,
                            [in] string aText,
                            [in] long nPos,
                            [in] com::sun::star::lang::Locale aLocale,
                            [in] long nStartCharFlags,
                            [in] string aUserDefinedCharactersStart,
                            [in] long nContCharFlags,
                            [in] string aUserDefinedCharactersCont
                            );
};

}; }; }; };

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */