summaryrefslogtreecommitdiff
path: root/svl/source/misc/sharedstringpool.cxx
blob: 997648fac36311ebc306ae9d8a9d425b18c47cb9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

#include <svl/sharedstringpool.hxx>
#include <svl/sharedstring.hxx>
#include <unotools/charclass.hxx>
#include <osl/mutex.hxx>

#include <unordered_map>
#include <unordered_set>

/** create a key class that caches the hashcode */
namespace
{
struct StringWithHash
{
    OUString str;
    sal_Int32 hashCode;
    StringWithHash(OUString s)
        : str(s)
        , hashCode(s.hashCode())
    {
    }

    bool operator==(StringWithHash const& rhs) const
    {
        if (hashCode != rhs.hashCode)
            return false;
        return str == rhs.str;
    }
};
}

namespace std
{
template <> struct hash<StringWithHash>
{
    std::size_t operator()(const StringWithHash& k) const { return k.hashCode; }
};
}

namespace svl
{
namespace
{
sal_Int32 getRefCount(const rtl_uString* p) { return (p->refCount & 0x3FFFFFFF); }
}

struct SharedStringPool::Impl
{
    mutable osl::Mutex maMutex;
    // We use this map for two purposes - to store lower->upper case mappings
    // and to retrieve a shared uppercase object, so the management logic
    // is quite complex.
    std::unordered_map<StringWithHash, OUString> maStrMap;
    const CharClass& mrCharClass;

    explicit Impl(const CharClass& rCharClass)
        : mrCharClass(rCharClass)
    {
    }
};

SharedStringPool::SharedStringPool(const CharClass& rCharClass)
    : mpImpl(new Impl(rCharClass))
{
}

SharedStringPool::~SharedStringPool() {}

SharedString SharedStringPool::intern(const OUString& rStr)
{
    StringWithHash aStrWithHash(rStr);
    osl::MutexGuard aGuard(&mpImpl->maMutex);

    auto[mapIt, bInserted] = mpImpl->maStrMap.emplace(aStrWithHash, rStr);
    if (!bInserted)
        // there is already a mapping
        return SharedString(mapIt->first.str.pData, mapIt->second.pData);

    // This is a new string insertion. Establish mapping to upper-case variant.
    OUString aUpper = mpImpl->mrCharClass.uppercase(rStr);
    if (aUpper == rStr)
        // no need to do anything more, because we inserted an upper->upper mapping
        return SharedString(mapIt->first.str.pData, mapIt->second.pData);

    // We need to insert a lower->upper mapping, so also insert
    // an upper->upper mapping, which we can use both for when an upper string
    // is interned, and to look up a shared upper string.
    StringWithHash aUpperWithHash(aUpper);
    auto mapIt2 = mpImpl->maStrMap.find(aUpperWithHash);
    if (mapIt2 != mpImpl->maStrMap.end())
    {
        // there is an already existing upper string
        mapIt->second = mapIt2->first.str;
        return SharedString(mapIt->first.str.pData, mapIt->second.pData);
    }

    // There is no already existing upper string.
    // First, update using the iterator, can't do this later because
    // the iterator will be invalid.
    mapIt->second = aUpper;
    mpImpl->maStrMap.emplace_hint(mapIt2, aUpperWithHash, aUpper);
    return SharedString(rStr.pData, aUpper.pData);
}

void SharedStringPool::purge()
{
    osl::MutexGuard aGuard(&mpImpl->maMutex);

    // Because we can have an uppercase entry mapped to itself,
    // and then a bunch of lowercase entries mapped to that same
    // upper-case entry, we need to scan the map twice - the first
    // time to remove lowercase entries, and then only can we
    // check for unused uppercase entries.

    auto it = mpImpl->maStrMap.begin();
    auto itEnd = mpImpl->maStrMap.end();
    while (it != itEnd)
    {
        rtl_uString* p1 = it->first.str.pData;
        rtl_uString* p2 = it->second.pData;
        if (p1 != p2)
        {
            // normal case - lowercase mapped to uppercase, which
            // means that the lowercase entry has one ref-counted
            // entry as the key in the map
            if (getRefCount(p1) == 1)
            {
                it = mpImpl->maStrMap.erase(it);
                continue;
            }
        }
        ++it;
    }

    it = mpImpl->maStrMap.begin();
    itEnd = mpImpl->maStrMap.end();
    while (it != itEnd)
    {
        rtl_uString* p1 = it->first.str.pData;
        rtl_uString* p2 = it->second.pData;
        if (p1 == p2)
        {
            // uppercase which is mapped to itself, which means
            // one ref-counted entry as the key in the map, and
            // one ref-counted entry in the value in the map
            if (getRefCount(p1) == 2)
            {
                it = mpImpl->maStrMap.erase(it);
                continue;
            }
        }
        ++it;
    }
}

size_t SharedStringPool::getCount() const
{
    osl::MutexGuard aGuard(&mpImpl->maMutex);
    return mpImpl->maStrMap.size();
}

size_t SharedStringPool::getCountIgnoreCase() const
{
    osl::MutexGuard aGuard(&mpImpl->maMutex);
    // this is only called from unit tests, so no need to be efficient
    std::unordered_set<OUString> aUpperSet;
    for (auto const& pair : mpImpl->maStrMap)
        aUpperSet.insert(pair.second);
    return aUpperSet.size();
}
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */