/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ module com { module sun { module star { module i18n { /* Possible tokens to be parsed with parse...Token(): UPASCALPHA=[A-Z] LOASCALPHA=[a-z] ASCALPHA=1*(UPASCALPHA|LOASCALPHA) ASCDIGIT=[0-9] ASC_UNDERSCORE='_' ASC_SPACE=' ' ASC_HT='\0x9' ASC_VT='\0xb' ASC_WS=ASC_SPACE|ASC_HT|ASC_VT ASC_DBL_QUOTE=\"; ASC_QUOTE=\' UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE) ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit ALNUM=ALPHA|DIGIT CHAR=anycharacter WS=isWhiteSpace() SIGN='+'|'-' DECSEP= GRPSEP= EXPONENT=(E|e)[SIGN]1*ASC_DIGIT IDENTIFIER=ALPHA *ALNUM UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE) ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS) ANY_NAME=1*(ALNUM|DEFCHARS) SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE ASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT] NUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT] */ /** Character classification (upper, lower, digit, letter, number, ...) and generic Unicode enabled parser. */ published interface XCharacterClassification : com::sun::star::uno::XInterface { /** Convert lower case alpha to upper case alpha, starting at position nPos for nCount code points. */ string toUpper( [in] string aText, [in] long nPos, [in] long nCount, [in] com::sun::star::lang::Locale aLocale ); /** Convert upper case alpha to lower case alpha, starting at position nPos for nCount code points. */ string toLower( [in] string aText, [in] long nPos, [in] long nCount, [in] com::sun::star::lang::Locale aLocale ); /** Convert to title case, starting at position nPos for nCount code points. */ string toTitle( [in] string aText, [in] long nPos, [in] long nCount, [in] com::sun::star::lang::Locale aLocale ); /// Get UnicodeType of character at position nPos. short getType( [in] string aText, [in] long nPos ); /** Get DirectionProperty of character at position nPos. */ short getCharacterDirection( [in] string aText, [in] long nPos ); /// Get UnicodeScript of character at position nPos. short getScript( [in] string aText, [in] long nPos ); /// Get KCharacterType of character at position nPos. long getCharacterType( [in] string aText, [in] long nPos, [in] com::sun::star::lang::Locale aLocale ); /** Get accumulated KCharacterTypes of string starting at position nPos of length nCount code points. @returns A number with appropriate flags set to indicate what type of characters the string contains, each flag value being one of KCharacterType values. @note The accumulated bits of several characters are meaningless as soon as characters of different classifications are involved, which even may have a common subset like KCharacterType::LETTER or KCharacterType::PRINTABLE, unless it is to be determined what overall character properties are present in the string. Use getCharacterType() of single characters instead and handle bits as needed if sets of character properties are to be obtained. */ long getStringType( [in] string aText, [in] long nPos, [in] long nCount, [in] com::sun::star::lang::Locale aLocale ); /** Parse a string for a token starting at position nPos.

A name or identifier must match the KParseTokens criteria passed in nStartCharFlags and nContCharFlags and may additionally contain characters of aUserDefinedCharactersStart and/or aUserDefinedCharactersCont.

@returns A filled ParseResult structure. If no unambiguous token could be parsed, ParseResult::TokenType will be set to 0 (zero), other fields will contain the values parsed so far.

If a token may represent either a numeric value or a name according to the passed Start/Cont-Flags/Chars, both KParseType::ASC_NUM (or KParseType::UNI_NUM) and KParseType::IDENTNAME are set in ParseResult::TokenType. @param aText Text to be parsed. @param nPos Position where parsing starts. @param aLocale The locale, for example, for decimal and group separator or character type determination. @param nStartCharFlags A set of KParseTokens constants determining the allowed characters a name or identifier may start with. @param aUserDefinedCharactersStart A set of additionally allowed characters a name or identifier may start with. @param nContCharFlags A set of KParseTokens constants determining the allowed characters a name or identifier may continue with. @param aUserDefinedCharactersCont A set of additionally allowed characters a name or identifier may continue with. @code{.cpp} using namespace ::com::sun::star::i18n; // First character of an identifier may be any alphabetic or underscore. sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE; // Continuing characters may be any alphanumeric or underscore or dot. sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT; // No further characters assumed to be contained in an identifier OUString aEmptyString; // Parse any token. ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale, nStartFlags, aEmptyString, nContFlags, aEmptyString ); // Get parsed token. if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) ) fValue = rRes.Value; if ( rRes.TokenType & KParseType::IDENTNAME ) aName = aText.copy( nPos, rRes.EndPos - nPos ); else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME ) aName = rRes.DequotedNameOrString; else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING ) aString = rRes.DequotedNameOrString; else if ( rRes.TokenType & KParseType::BOOLEAN ) aSymbol = aText.copy( nPos, rRes.EndPos - nPos ); else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR ) aSymbol = aText.copy( nPos, rRes.EndPos - nPos ); @endcode */ ParseResult parseAnyToken( [in] string aText, [in] long nPos, [in] com::sun::star::lang::Locale aLocale, [in] long nStartCharFlags, [in] string aUserDefinedCharactersStart, [in] long nContCharFlags, [in] string aUserDefinedCharactersCont ); /** Parse a string for a token of type nTokenType starting at position nPos.

Other parameters are the same as in parseAnyToken(). If the actual token does not match the passed nTokenType a ParseResult::TokenType set to 0 (zero) is returned.

@param nTokenType One or more of the KParseType constants. @param aText See #parseAnyToken @param nPos See #parseAnyToken @param aLocale See #parseAnyToken @param nStartCharFlags See #parseAnyToken @param aUserDefinedCharactersStart See #parseAnyToken @param nContCharFlags See #parseAnyToken @param aUserDefinedCharactersCont See #parseAnyToken @code{.cpp} // Determine if a given name is a valid name (not quoted) and contains // only allowed characters. using namespace ::com::sun::star::i18n; // First character of an identifier may be any alphanumeric or underscore. sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE; // No further characters assumed to be contained in an identifier start. OUString aEmptyString; // Continuing characters may be any alphanumeric or underscore. sal_Int32 nContFlags = nStartFlags; // Additionally, continuing characters may contain a blank. OUString aContChars( " " ); // Parse predefined (must be an IDENTNAME) token. ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale, nStartFlags, aEmptyString, nContFlags, aContChars ); // Test if it is an identifier name and if it only is one // and no more else is following it. bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len(); @endcode */ ParseResult parsePredefinedToken( [in] long nTokenType, [in] string aText, [in] long nPos, [in] com::sun::star::lang::Locale aLocale, [in] long nStartCharFlags, [in] string aUserDefinedCharactersStart, [in] long nContCharFlags, [in] string aUserDefinedCharactersCont ); }; }; }; }; }; /* vim:set shiftwidth=4 softtabstop=4 expandtab: */