summaryrefslogtreecommitdiff
path: root/i18npool/inc/i18npool/languagetag.hxx
blob: 5666fd158600e1df3d18f161add458d1dc936f78 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

#ifndef INCLUDED_I18NPOOL_LANGUAGETAG_HXX
#define INCLUDED_I18NPOOL_LANGUAGETAG_HXX

#include <sal/config.h>
#include <rtl/ustring.hxx>
#include <com/sun/star/lang/Locale.hpp>
#include <i18npool/i18npooldllapi.h>
#include <i18npool/lang.h>


/** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and
    conversions in between.

    Note that member variables are mutable and may change their values even in
    const methods. Getter methods return either the original value or matching
    converted values.
 */
class I18NISOLANG_DLLPUBLIC LanguageTag
{
public:

    /** Init LanguageTag with existing BCP 47 language tag string.

        @param bCanonicalize
            If TRUE, canonicalize tag and reparse, the resulting tag string may
            be different.
            IF FALSE, the tag is simply stored and can be retrieved with
            getBcp47().

        Note that conversions to ISO codes, locales or LanguageType or
        obtaining language or script will canonicalize the tag string anyway,
        so specifying bCanonicalize=false is not a guarantee that the tag will
        stay identical to what was passed.
     */
    explicit LanguageTag( const rtl::OUString & rBcp47LanguageTag, bool bCanonicalize = false );

    /** Init LanguageTag with Locale. */
    explicit LanguageTag( const com::sun::star::lang::Locale & rLocale );

    /** Init LanguageTag with LanguageType MS-LangID. */
    explicit LanguageTag( LanguageType nLanguage );

    /** Init LanguageTag with language and country strings.

        This is a convenience ctor for places that so far use only language and
        country to replace the MsLangId::convert...IsoNames...() calls. Avoid
        use in new code.
     */
    explicit LanguageTag( const rtl::OUString& rLanguage, const rtl::OUString& rCountry );

    LanguageTag( const LanguageTag & rLanguageTag );
    ~LanguageTag();
    LanguageTag& operator=( const LanguageTag & rLanguageTag );

    /** Obtain BCP 47 language tag.

        @param bResolveSystem
               If TRUE, resolve an empty language tag denoting the system
               locale to the real locale used.
               If FALSE, return an empty OUString for such a tag.
     */
    const rtl::OUString &           getBcp47( bool bResolveSystem = true ) const;

    /** Obtain language tag as Locale.

        As a convention, language tags that can not be expressed as "pure"
        com::sun::star::lang::Locale content using Language and Country fields
        store "qlt" (ISO 639 reserved for local use) in the Language field and
        the entire BCP 47 language tag in the Variant field. The Country field
        contains the corresponding ISO 3166 country code _if_ there is one, or
        otherwise is empty.

        @param bResolveSystem
               If TRUE, resolve an empty language tag denoting the system
               locale to the real locale used.
               If FALSE, return an empty Locale for such a tag.
     */
    const com::sun::star::lang::Locale &    getLocale( bool bResolveSystem = true ) const;

    /** Obtain mapping to MS-LangID.

        @param bResolveSystem
               If TRUE, resolve an empty language tag denoting the system
               locale to the real locale used.
               If FALSE, return LANGUAGE_SYSTEM for such a tag.
     */
    LanguageType                    getLanguageType( bool bResolveSystem = true ) const;

    /** Get ISO 639 language code, or BCP 47 language.

        Always resolves an empty tag to the system locale.
     */
    rtl::OUString                   getLanguage() const;

    /** Get ISO 15924 script code, if not the default script according to
        BCP 47. For default script an empty string is returned.

        Always resolves an empty tag to the system locale.
     */
    rtl::OUString                   getScript() const;

    /** Get combined language and script code, separated by '-' if
        non-default script, if default script only language.

        Always resolves an empty tag to the system locale.
     */
    rtl::OUString                   getLanguageAndScript() const;

    /** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a
        region not expressable as 2 character country code.

        Always resolves an empty tag to the system locale.
     */
    rtl::OUString                   getCountry() const;

    /** Get BCP 47 region tag, which may be an ISO 3166 country alpha code or
        any other BCP 47 region tag.

        Always resolves an empty tag to the system locale.
     */
    rtl::OUString                   getRegion() const;

    /** If language tag is a locale that can be expressed using only ISO 639
        language codes and ISO 3166 country codes, thus is convertible to a
        conforming Locale struct without using extension mechanisms.

        Note that an empty language tag or empty Locale::Language field or
        LanguageType LANGUAGE_SYSTEM could be treated as a valid ISO locale in
        some context, but here is not. If you want that ask for
        aTag.isSystemLocale() || aTag.isIsoLocale()

        Always resolves an empty tag to the system locale.
     */
    bool                            isIsoLocale() const;

    /** If language tag is a locale that can be expressed using only ISO 639
        language codes and ISO 15924 script codes and ISO 3166 country codes,
        thus can be stored in an ODF document using only fo:language, fo:script
        and fo:country attributes. If this is FALSE, the locale must be stored
        as a <*:rfc-language-tag> element.

        Always resolves an empty tag to the system locale.
     */
    bool                            isIsoODF() const;

    /** If this is a valid BCP 47 language tag.

        Always resolves an empty tag to the system locale.
     */
    bool                            isValidBcp47() const;

    /** If this tag was contructed as an empty tag denoting the system locale.
      */
    bool                            isSystemLocale() const;


    /** Reset with existing BCP 47 language tag string. See ctor. */
    void                            reset( const rtl::OUString & rBcp47LanguageTag, bool bCanonicalize = false );

    /** Reset with Locale. */
    void                            reset( const com::sun::star::lang::Locale & rLocale );

    /** Reset with LanguageType MS-LangID. */
    void                            reset( LanguageType nLanguage );

private:

    enum Decision
    {
        DECISION_DONTKNOW,
        DECISION_NO,
        DECISION_YES
    };

    mutable com::sun::star::lang::Locale    maLocale;
    mutable rtl::OUString                   maBcp47;
    mutable rtl::OUString                   maCachedLanguage;   ///< cache getLanguage()
    mutable rtl::OUString                   maCachedScript;     ///< cache getScript()
    mutable rtl::OUString                   maCachedCountry;    ///< cache getCountry()
    mutable void*                           mpImplLangtag;      ///< actually lt_tag_t pointer, encapsulated
    mutable LanguageType                    mnLangID;
    mutable Decision                        meIsValid;
    mutable Decision                        meIsIsoLocale;
    mutable Decision                        meIsIsoODF;
            bool                            mbSystemLocale      : 1;
    mutable bool                            mbInitializedBcp47  : 1;
    mutable bool                            mbInitializedLocale : 1;
    mutable bool                            mbInitializedLangID : 1;
    mutable bool                            mbCachedLanguage    : 1;
    mutable bool                            mbCachedScript      : 1;
    mutable bool                            mbCachedCountry     : 1;

    void    convertLocaleToBcp47();
    void    convertLocaleToLang();
    void    convertBcp47ToLocale();
    void    convertBcp47ToLang();
    void    convertLangToLocale();
    void    convertLangToBcp47();

    bool    canonicalize() const;

    rtl::OUString   getLanguageFromLangtag() const;
    rtl::OUString   getScriptFromLangtag() const;
    rtl::OUString   getRegionFromLangtag() const;

    void            resetVars();

    static bool     isIsoLanguage( const rtl::OUString& rLanguage );
    static bool     isIsoScript( const rtl::OUString& rScript );
    static bool     isIsoCountry( const rtl::OUString& rRegion );
};

#endif  // INCLUDED_I18NPOOL_LANGUAGETAG_HXX

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */